1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (c) 2013 Nicira, Inc.
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/capability.h>
9*4882a593Smuzhiyun #include <linux/module.h>
10*4882a593Smuzhiyun #include <linux/types.h>
11*4882a593Smuzhiyun #include <linux/kernel.h>
12*4882a593Smuzhiyun #include <linux/slab.h>
13*4882a593Smuzhiyun #include <linux/uaccess.h>
14*4882a593Smuzhiyun #include <linux/skbuff.h>
15*4882a593Smuzhiyun #include <linux/netdevice.h>
16*4882a593Smuzhiyun #include <linux/in.h>
17*4882a593Smuzhiyun #include <linux/tcp.h>
18*4882a593Smuzhiyun #include <linux/udp.h>
19*4882a593Smuzhiyun #include <linux/if_arp.h>
20*4882a593Smuzhiyun #include <linux/init.h>
21*4882a593Smuzhiyun #include <linux/in6.h>
22*4882a593Smuzhiyun #include <linux/inetdevice.h>
23*4882a593Smuzhiyun #include <linux/igmp.h>
24*4882a593Smuzhiyun #include <linux/netfilter_ipv4.h>
25*4882a593Smuzhiyun #include <linux/etherdevice.h>
26*4882a593Smuzhiyun #include <linux/if_ether.h>
27*4882a593Smuzhiyun #include <linux/if_vlan.h>
28*4882a593Smuzhiyun #include <linux/rculist.h>
29*4882a593Smuzhiyun #include <linux/err.h>
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun #include <net/sock.h>
32*4882a593Smuzhiyun #include <net/ip.h>
33*4882a593Smuzhiyun #include <net/icmp.h>
34*4882a593Smuzhiyun #include <net/protocol.h>
35*4882a593Smuzhiyun #include <net/ip_tunnels.h>
36*4882a593Smuzhiyun #include <net/arp.h>
37*4882a593Smuzhiyun #include <net/checksum.h>
38*4882a593Smuzhiyun #include <net/dsfield.h>
39*4882a593Smuzhiyun #include <net/inet_ecn.h>
40*4882a593Smuzhiyun #include <net/xfrm.h>
41*4882a593Smuzhiyun #include <net/net_namespace.h>
42*4882a593Smuzhiyun #include <net/netns/generic.h>
43*4882a593Smuzhiyun #include <net/rtnetlink.h>
44*4882a593Smuzhiyun #include <net/udp.h>
45*4882a593Smuzhiyun #include <net/dst_metadata.h>
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_IPV6)
48*4882a593Smuzhiyun #include <net/ipv6.h>
49*4882a593Smuzhiyun #include <net/ip6_fib.h>
50*4882a593Smuzhiyun #include <net/ip6_route.h>
51*4882a593Smuzhiyun #endif
52*4882a593Smuzhiyun
ip_tunnel_hash(__be32 key,__be32 remote)53*4882a593Smuzhiyun static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54*4882a593Smuzhiyun {
55*4882a593Smuzhiyun return hash_32((__force u32)key ^ (__force u32)remote,
56*4882a593Smuzhiyun IP_TNL_HASH_BITS);
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun
ip_tunnel_key_match(const struct ip_tunnel_parm * p,__be16 flags,__be32 key)59*4882a593Smuzhiyun static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60*4882a593Smuzhiyun __be16 flags, __be32 key)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun if (p->i_flags & TUNNEL_KEY) {
63*4882a593Smuzhiyun if (flags & TUNNEL_KEY)
64*4882a593Smuzhiyun return key == p->i_key;
65*4882a593Smuzhiyun else
66*4882a593Smuzhiyun /* key expected, none present */
67*4882a593Smuzhiyun return false;
68*4882a593Smuzhiyun } else
69*4882a593Smuzhiyun return !(flags & TUNNEL_KEY);
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun /* Fallback tunnel: no source, no destination, no key, no options
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun Tunnel hash table:
75*4882a593Smuzhiyun We require exact key match i.e. if a key is present in packet
76*4882a593Smuzhiyun it will match only tunnel with the same key; if it is not present,
77*4882a593Smuzhiyun it will match only keyless tunnel.
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun All keysless packets, if not matched configured keyless tunnels
80*4882a593Smuzhiyun will match fallback tunnel.
81*4882a593Smuzhiyun Given src, dst and key, find appropriate for input tunnel.
82*4882a593Smuzhiyun */
ip_tunnel_lookup(struct ip_tunnel_net * itn,int link,__be16 flags,__be32 remote,__be32 local,__be32 key)83*4882a593Smuzhiyun struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84*4882a593Smuzhiyun int link, __be16 flags,
85*4882a593Smuzhiyun __be32 remote, __be32 local,
86*4882a593Smuzhiyun __be32 key)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun struct ip_tunnel *t, *cand = NULL;
89*4882a593Smuzhiyun struct hlist_head *head;
90*4882a593Smuzhiyun struct net_device *ndev;
91*4882a593Smuzhiyun unsigned int hash;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun hash = ip_tunnel_hash(key, remote);
94*4882a593Smuzhiyun head = &itn->tunnels[hash];
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun hlist_for_each_entry_rcu(t, head, hash_node) {
97*4882a593Smuzhiyun if (local != t->parms.iph.saddr ||
98*4882a593Smuzhiyun remote != t->parms.iph.daddr ||
99*4882a593Smuzhiyun !(t->dev->flags & IFF_UP))
100*4882a593Smuzhiyun continue;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun if (!ip_tunnel_key_match(&t->parms, flags, key))
103*4882a593Smuzhiyun continue;
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun if (t->parms.link == link)
106*4882a593Smuzhiyun return t;
107*4882a593Smuzhiyun else
108*4882a593Smuzhiyun cand = t;
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun hlist_for_each_entry_rcu(t, head, hash_node) {
112*4882a593Smuzhiyun if (remote != t->parms.iph.daddr ||
113*4882a593Smuzhiyun t->parms.iph.saddr != 0 ||
114*4882a593Smuzhiyun !(t->dev->flags & IFF_UP))
115*4882a593Smuzhiyun continue;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun if (!ip_tunnel_key_match(&t->parms, flags, key))
118*4882a593Smuzhiyun continue;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun if (t->parms.link == link)
121*4882a593Smuzhiyun return t;
122*4882a593Smuzhiyun else if (!cand)
123*4882a593Smuzhiyun cand = t;
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun hash = ip_tunnel_hash(key, 0);
127*4882a593Smuzhiyun head = &itn->tunnels[hash];
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun hlist_for_each_entry_rcu(t, head, hash_node) {
130*4882a593Smuzhiyun if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131*4882a593Smuzhiyun (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132*4882a593Smuzhiyun continue;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun if (!(t->dev->flags & IFF_UP))
135*4882a593Smuzhiyun continue;
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun if (!ip_tunnel_key_match(&t->parms, flags, key))
138*4882a593Smuzhiyun continue;
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun if (t->parms.link == link)
141*4882a593Smuzhiyun return t;
142*4882a593Smuzhiyun else if (!cand)
143*4882a593Smuzhiyun cand = t;
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun hlist_for_each_entry_rcu(t, head, hash_node) {
147*4882a593Smuzhiyun if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148*4882a593Smuzhiyun t->parms.iph.saddr != 0 ||
149*4882a593Smuzhiyun t->parms.iph.daddr != 0 ||
150*4882a593Smuzhiyun !(t->dev->flags & IFF_UP))
151*4882a593Smuzhiyun continue;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun if (t->parms.link == link)
154*4882a593Smuzhiyun return t;
155*4882a593Smuzhiyun else if (!cand)
156*4882a593Smuzhiyun cand = t;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun if (cand)
160*4882a593Smuzhiyun return cand;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun t = rcu_dereference(itn->collect_md_tun);
163*4882a593Smuzhiyun if (t && t->dev->flags & IFF_UP)
164*4882a593Smuzhiyun return t;
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun ndev = READ_ONCE(itn->fb_tunnel_dev);
167*4882a593Smuzhiyun if (ndev && ndev->flags & IFF_UP)
168*4882a593Smuzhiyun return netdev_priv(ndev);
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun return NULL;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173*4882a593Smuzhiyun
ip_bucket(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)174*4882a593Smuzhiyun static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175*4882a593Smuzhiyun struct ip_tunnel_parm *parms)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun unsigned int h;
178*4882a593Smuzhiyun __be32 remote;
179*4882a593Smuzhiyun __be32 i_key = parms->i_key;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182*4882a593Smuzhiyun remote = parms->iph.daddr;
183*4882a593Smuzhiyun else
184*4882a593Smuzhiyun remote = 0;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187*4882a593Smuzhiyun i_key = 0;
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun h = ip_tunnel_hash(i_key, remote);
190*4882a593Smuzhiyun return &itn->tunnels[h];
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun
ip_tunnel_add(struct ip_tunnel_net * itn,struct ip_tunnel * t)193*4882a593Smuzhiyun static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun struct hlist_head *head = ip_bucket(itn, &t->parms);
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun if (t->collect_md)
198*4882a593Smuzhiyun rcu_assign_pointer(itn->collect_md_tun, t);
199*4882a593Smuzhiyun hlist_add_head_rcu(&t->hash_node, head);
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun
ip_tunnel_del(struct ip_tunnel_net * itn,struct ip_tunnel * t)202*4882a593Smuzhiyun static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203*4882a593Smuzhiyun {
204*4882a593Smuzhiyun if (t->collect_md)
205*4882a593Smuzhiyun rcu_assign_pointer(itn->collect_md_tun, NULL);
206*4882a593Smuzhiyun hlist_del_init_rcu(&t->hash_node);
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
ip_tunnel_find(struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms,int type)209*4882a593Smuzhiyun static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210*4882a593Smuzhiyun struct ip_tunnel_parm *parms,
211*4882a593Smuzhiyun int type)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun __be32 remote = parms->iph.daddr;
214*4882a593Smuzhiyun __be32 local = parms->iph.saddr;
215*4882a593Smuzhiyun __be32 key = parms->i_key;
216*4882a593Smuzhiyun __be16 flags = parms->i_flags;
217*4882a593Smuzhiyun int link = parms->link;
218*4882a593Smuzhiyun struct ip_tunnel *t = NULL;
219*4882a593Smuzhiyun struct hlist_head *head = ip_bucket(itn, parms);
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun hlist_for_each_entry_rcu(t, head, hash_node) {
222*4882a593Smuzhiyun if (local == t->parms.iph.saddr &&
223*4882a593Smuzhiyun remote == t->parms.iph.daddr &&
224*4882a593Smuzhiyun link == t->parms.link &&
225*4882a593Smuzhiyun type == t->dev->type &&
226*4882a593Smuzhiyun ip_tunnel_key_match(&t->parms, flags, key))
227*4882a593Smuzhiyun break;
228*4882a593Smuzhiyun }
229*4882a593Smuzhiyun return t;
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun
__ip_tunnel_create(struct net * net,const struct rtnl_link_ops * ops,struct ip_tunnel_parm * parms)232*4882a593Smuzhiyun static struct net_device *__ip_tunnel_create(struct net *net,
233*4882a593Smuzhiyun const struct rtnl_link_ops *ops,
234*4882a593Smuzhiyun struct ip_tunnel_parm *parms)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun int err;
237*4882a593Smuzhiyun struct ip_tunnel *tunnel;
238*4882a593Smuzhiyun struct net_device *dev;
239*4882a593Smuzhiyun char name[IFNAMSIZ];
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun err = -E2BIG;
242*4882a593Smuzhiyun if (parms->name[0]) {
243*4882a593Smuzhiyun if (!dev_valid_name(parms->name))
244*4882a593Smuzhiyun goto failed;
245*4882a593Smuzhiyun strlcpy(name, parms->name, IFNAMSIZ);
246*4882a593Smuzhiyun } else {
247*4882a593Smuzhiyun if (strlen(ops->kind) > (IFNAMSIZ - 3))
248*4882a593Smuzhiyun goto failed;
249*4882a593Smuzhiyun strcpy(name, ops->kind);
250*4882a593Smuzhiyun strcat(name, "%d");
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun ASSERT_RTNL();
254*4882a593Smuzhiyun dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255*4882a593Smuzhiyun if (!dev) {
256*4882a593Smuzhiyun err = -ENOMEM;
257*4882a593Smuzhiyun goto failed;
258*4882a593Smuzhiyun }
259*4882a593Smuzhiyun dev_net_set(dev, net);
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun dev->rtnl_link_ops = ops;
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun tunnel = netdev_priv(dev);
264*4882a593Smuzhiyun tunnel->parms = *parms;
265*4882a593Smuzhiyun tunnel->net = net;
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun err = register_netdevice(dev);
268*4882a593Smuzhiyun if (err)
269*4882a593Smuzhiyun goto failed_free;
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun return dev;
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun failed_free:
274*4882a593Smuzhiyun free_netdev(dev);
275*4882a593Smuzhiyun failed:
276*4882a593Smuzhiyun return ERR_PTR(err);
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun
ip_tunnel_bind_dev(struct net_device * dev)279*4882a593Smuzhiyun static int ip_tunnel_bind_dev(struct net_device *dev)
280*4882a593Smuzhiyun {
281*4882a593Smuzhiyun struct net_device *tdev = NULL;
282*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
283*4882a593Smuzhiyun const struct iphdr *iph;
284*4882a593Smuzhiyun int hlen = LL_MAX_HEADER;
285*4882a593Smuzhiyun int mtu = ETH_DATA_LEN;
286*4882a593Smuzhiyun int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun iph = &tunnel->parms.iph;
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun /* Guess output device to choose reasonable mtu and needed_headroom */
291*4882a593Smuzhiyun if (iph->daddr) {
292*4882a593Smuzhiyun struct flowi4 fl4;
293*4882a593Smuzhiyun struct rtable *rt;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296*4882a593Smuzhiyun iph->saddr, tunnel->parms.o_key,
297*4882a593Smuzhiyun RT_TOS(iph->tos), tunnel->parms.link,
298*4882a593Smuzhiyun tunnel->fwmark, 0);
299*4882a593Smuzhiyun rt = ip_route_output_key(tunnel->net, &fl4);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun if (!IS_ERR(rt)) {
302*4882a593Smuzhiyun tdev = rt->dst.dev;
303*4882a593Smuzhiyun ip_rt_put(rt);
304*4882a593Smuzhiyun }
305*4882a593Smuzhiyun if (dev->type != ARPHRD_ETHER)
306*4882a593Smuzhiyun dev->flags |= IFF_POINTOPOINT;
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun dst_cache_reset(&tunnel->dst_cache);
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun if (!tdev && tunnel->parms.link)
312*4882a593Smuzhiyun tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun if (tdev) {
315*4882a593Smuzhiyun hlen = tdev->hard_header_len + tdev->needed_headroom;
316*4882a593Smuzhiyun mtu = min(tdev->mtu, IP_MAX_MTU);
317*4882a593Smuzhiyun }
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun dev->needed_headroom = t_hlen + hlen;
320*4882a593Smuzhiyun mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun if (mtu < IPV4_MIN_MTU)
323*4882a593Smuzhiyun mtu = IPV4_MIN_MTU;
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun return mtu;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
ip_tunnel_create(struct net * net,struct ip_tunnel_net * itn,struct ip_tunnel_parm * parms)328*4882a593Smuzhiyun static struct ip_tunnel *ip_tunnel_create(struct net *net,
329*4882a593Smuzhiyun struct ip_tunnel_net *itn,
330*4882a593Smuzhiyun struct ip_tunnel_parm *parms)
331*4882a593Smuzhiyun {
332*4882a593Smuzhiyun struct ip_tunnel *nt;
333*4882a593Smuzhiyun struct net_device *dev;
334*4882a593Smuzhiyun int t_hlen;
335*4882a593Smuzhiyun int mtu;
336*4882a593Smuzhiyun int err;
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339*4882a593Smuzhiyun if (IS_ERR(dev))
340*4882a593Smuzhiyun return ERR_CAST(dev);
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun mtu = ip_tunnel_bind_dev(dev);
343*4882a593Smuzhiyun err = dev_set_mtu(dev, mtu);
344*4882a593Smuzhiyun if (err)
345*4882a593Smuzhiyun goto err_dev_set_mtu;
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun nt = netdev_priv(dev);
348*4882a593Smuzhiyun t_hlen = nt->hlen + sizeof(struct iphdr);
349*4882a593Smuzhiyun dev->min_mtu = ETH_MIN_MTU;
350*4882a593Smuzhiyun dev->max_mtu = IP_MAX_MTU - t_hlen;
351*4882a593Smuzhiyun if (dev->type == ARPHRD_ETHER)
352*4882a593Smuzhiyun dev->max_mtu -= dev->hard_header_len;
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun ip_tunnel_add(itn, nt);
355*4882a593Smuzhiyun return nt;
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun err_dev_set_mtu:
358*4882a593Smuzhiyun unregister_netdevice(dev);
359*4882a593Smuzhiyun return ERR_PTR(err);
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun
ip_tunnel_rcv(struct ip_tunnel * tunnel,struct sk_buff * skb,const struct tnl_ptk_info * tpi,struct metadata_dst * tun_dst,bool log_ecn_error)362*4882a593Smuzhiyun int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363*4882a593Smuzhiyun const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364*4882a593Smuzhiyun bool log_ecn_error)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun const struct iphdr *iph = ip_hdr(skb);
367*4882a593Smuzhiyun int err;
368*4882a593Smuzhiyun
369*4882a593Smuzhiyun #ifdef CONFIG_NET_IPGRE_BROADCAST
370*4882a593Smuzhiyun if (ipv4_is_multicast(iph->daddr)) {
371*4882a593Smuzhiyun tunnel->dev->stats.multicast++;
372*4882a593Smuzhiyun skb->pkt_type = PACKET_BROADCAST;
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun #endif
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377*4882a593Smuzhiyun ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378*4882a593Smuzhiyun tunnel->dev->stats.rx_crc_errors++;
379*4882a593Smuzhiyun tunnel->dev->stats.rx_errors++;
380*4882a593Smuzhiyun goto drop;
381*4882a593Smuzhiyun }
382*4882a593Smuzhiyun
383*4882a593Smuzhiyun if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384*4882a593Smuzhiyun if (!(tpi->flags&TUNNEL_SEQ) ||
385*4882a593Smuzhiyun (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386*4882a593Smuzhiyun tunnel->dev->stats.rx_fifo_errors++;
387*4882a593Smuzhiyun tunnel->dev->stats.rx_errors++;
388*4882a593Smuzhiyun goto drop;
389*4882a593Smuzhiyun }
390*4882a593Smuzhiyun tunnel->i_seqno = ntohl(tpi->seq) + 1;
391*4882a593Smuzhiyun }
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun err = IP_ECN_decapsulate(iph, skb);
396*4882a593Smuzhiyun if (unlikely(err)) {
397*4882a593Smuzhiyun if (log_ecn_error)
398*4882a593Smuzhiyun net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399*4882a593Smuzhiyun &iph->saddr, iph->tos);
400*4882a593Smuzhiyun if (err > 1) {
401*4882a593Smuzhiyun ++tunnel->dev->stats.rx_frame_errors;
402*4882a593Smuzhiyun ++tunnel->dev->stats.rx_errors;
403*4882a593Smuzhiyun goto drop;
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun dev_sw_netstats_rx_add(tunnel->dev, skb->len);
408*4882a593Smuzhiyun skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun if (tunnel->dev->type == ARPHRD_ETHER) {
411*4882a593Smuzhiyun skb->protocol = eth_type_trans(skb, tunnel->dev);
412*4882a593Smuzhiyun skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
413*4882a593Smuzhiyun } else {
414*4882a593Smuzhiyun skb->dev = tunnel->dev;
415*4882a593Smuzhiyun }
416*4882a593Smuzhiyun
417*4882a593Smuzhiyun if (tun_dst)
418*4882a593Smuzhiyun skb_dst_set(skb, (struct dst_entry *)tun_dst);
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun gro_cells_receive(&tunnel->gro_cells, skb);
421*4882a593Smuzhiyun return 0;
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun drop:
424*4882a593Smuzhiyun if (tun_dst)
425*4882a593Smuzhiyun dst_release((struct dst_entry *)tun_dst);
426*4882a593Smuzhiyun kfree_skb(skb);
427*4882a593Smuzhiyun return 0;
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
430*4882a593Smuzhiyun
ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)431*4882a593Smuzhiyun int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
432*4882a593Smuzhiyun unsigned int num)
433*4882a593Smuzhiyun {
434*4882a593Smuzhiyun if (num >= MAX_IPTUN_ENCAP_OPS)
435*4882a593Smuzhiyun return -ERANGE;
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun return !cmpxchg((const struct ip_tunnel_encap_ops **)
438*4882a593Smuzhiyun &iptun_encaps[num],
439*4882a593Smuzhiyun NULL, ops) ? 0 : -1;
440*4882a593Smuzhiyun }
441*4882a593Smuzhiyun EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
442*4882a593Smuzhiyun
ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops * ops,unsigned int num)443*4882a593Smuzhiyun int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
444*4882a593Smuzhiyun unsigned int num)
445*4882a593Smuzhiyun {
446*4882a593Smuzhiyun int ret;
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun if (num >= MAX_IPTUN_ENCAP_OPS)
449*4882a593Smuzhiyun return -ERANGE;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
452*4882a593Smuzhiyun &iptun_encaps[num],
453*4882a593Smuzhiyun ops, NULL) == ops) ? 0 : -1;
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun synchronize_net();
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun return ret;
458*4882a593Smuzhiyun }
459*4882a593Smuzhiyun EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
460*4882a593Smuzhiyun
ip_tunnel_encap_setup(struct ip_tunnel * t,struct ip_tunnel_encap * ipencap)461*4882a593Smuzhiyun int ip_tunnel_encap_setup(struct ip_tunnel *t,
462*4882a593Smuzhiyun struct ip_tunnel_encap *ipencap)
463*4882a593Smuzhiyun {
464*4882a593Smuzhiyun int hlen;
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun memset(&t->encap, 0, sizeof(t->encap));
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun hlen = ip_encap_hlen(ipencap);
469*4882a593Smuzhiyun if (hlen < 0)
470*4882a593Smuzhiyun return hlen;
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun t->encap.type = ipencap->type;
473*4882a593Smuzhiyun t->encap.sport = ipencap->sport;
474*4882a593Smuzhiyun t->encap.dport = ipencap->dport;
475*4882a593Smuzhiyun t->encap.flags = ipencap->flags;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun t->encap_hlen = hlen;
478*4882a593Smuzhiyun t->hlen = t->encap_hlen + t->tun_hlen;
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun return 0;
481*4882a593Smuzhiyun }
482*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
483*4882a593Smuzhiyun
tnl_update_pmtu(struct net_device * dev,struct sk_buff * skb,struct rtable * rt,__be16 df,const struct iphdr * inner_iph,int tunnel_hlen,__be32 dst,bool md)484*4882a593Smuzhiyun static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485*4882a593Smuzhiyun struct rtable *rt, __be16 df,
486*4882a593Smuzhiyun const struct iphdr *inner_iph,
487*4882a593Smuzhiyun int tunnel_hlen, __be32 dst, bool md)
488*4882a593Smuzhiyun {
489*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
490*4882a593Smuzhiyun int pkt_size;
491*4882a593Smuzhiyun int mtu;
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
494*4882a593Smuzhiyun pkt_size = skb->len - tunnel_hlen;
495*4882a593Smuzhiyun pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun if (df) {
498*4882a593Smuzhiyun mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
499*4882a593Smuzhiyun mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
500*4882a593Smuzhiyun } else {
501*4882a593Smuzhiyun mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun if (skb_valid_dst(skb))
505*4882a593Smuzhiyun skb_dst_update_pmtu_no_confirm(skb, mtu);
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun if (skb->protocol == htons(ETH_P_IP)) {
508*4882a593Smuzhiyun if (!skb_is_gso(skb) &&
509*4882a593Smuzhiyun (inner_iph->frag_off & htons(IP_DF)) &&
510*4882a593Smuzhiyun mtu < pkt_size) {
511*4882a593Smuzhiyun icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512*4882a593Smuzhiyun return -E2BIG;
513*4882a593Smuzhiyun }
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_IPV6)
516*4882a593Smuzhiyun else if (skb->protocol == htons(ETH_P_IPV6)) {
517*4882a593Smuzhiyun struct rt6_info *rt6;
518*4882a593Smuzhiyun __be32 daddr;
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521*4882a593Smuzhiyun NULL;
522*4882a593Smuzhiyun daddr = md ? dst : tunnel->parms.iph.daddr;
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525*4882a593Smuzhiyun mtu >= IPV6_MIN_MTU) {
526*4882a593Smuzhiyun if ((daddr && !ipv4_is_multicast(daddr)) ||
527*4882a593Smuzhiyun rt6->rt6i_dst.plen == 128) {
528*4882a593Smuzhiyun rt6->rt6i_flags |= RTF_MODIFIED;
529*4882a593Smuzhiyun dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun }
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534*4882a593Smuzhiyun mtu < pkt_size) {
535*4882a593Smuzhiyun icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536*4882a593Smuzhiyun return -E2BIG;
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun }
539*4882a593Smuzhiyun #endif
540*4882a593Smuzhiyun return 0;
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun
ip_md_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,u8 proto,int tunnel_hlen)543*4882a593Smuzhiyun void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544*4882a593Smuzhiyun u8 proto, int tunnel_hlen)
545*4882a593Smuzhiyun {
546*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
547*4882a593Smuzhiyun u32 headroom = sizeof(struct iphdr);
548*4882a593Smuzhiyun struct ip_tunnel_info *tun_info;
549*4882a593Smuzhiyun const struct ip_tunnel_key *key;
550*4882a593Smuzhiyun const struct iphdr *inner_iph;
551*4882a593Smuzhiyun struct rtable *rt = NULL;
552*4882a593Smuzhiyun struct flowi4 fl4;
553*4882a593Smuzhiyun __be16 df = 0;
554*4882a593Smuzhiyun u8 tos, ttl;
555*4882a593Smuzhiyun bool use_cache;
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun tun_info = skb_tunnel_info(skb);
558*4882a593Smuzhiyun if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
559*4882a593Smuzhiyun ip_tunnel_info_af(tun_info) != AF_INET))
560*4882a593Smuzhiyun goto tx_error;
561*4882a593Smuzhiyun key = &tun_info->key;
562*4882a593Smuzhiyun memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
563*4882a593Smuzhiyun inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
564*4882a593Smuzhiyun tos = key->tos;
565*4882a593Smuzhiyun if (tos == 1) {
566*4882a593Smuzhiyun if (skb->protocol == htons(ETH_P_IP))
567*4882a593Smuzhiyun tos = inner_iph->tos;
568*4882a593Smuzhiyun else if (skb->protocol == htons(ETH_P_IPV6))
569*4882a593Smuzhiyun tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
570*4882a593Smuzhiyun }
571*4882a593Smuzhiyun ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
572*4882a593Smuzhiyun tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
573*4882a593Smuzhiyun 0, skb->mark, skb_get_hash(skb));
574*4882a593Smuzhiyun if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
575*4882a593Smuzhiyun goto tx_error;
576*4882a593Smuzhiyun
577*4882a593Smuzhiyun use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
578*4882a593Smuzhiyun if (use_cache)
579*4882a593Smuzhiyun rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
580*4882a593Smuzhiyun if (!rt) {
581*4882a593Smuzhiyun rt = ip_route_output_key(tunnel->net, &fl4);
582*4882a593Smuzhiyun if (IS_ERR(rt)) {
583*4882a593Smuzhiyun dev->stats.tx_carrier_errors++;
584*4882a593Smuzhiyun goto tx_error;
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun if (use_cache)
587*4882a593Smuzhiyun dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
588*4882a593Smuzhiyun fl4.saddr);
589*4882a593Smuzhiyun }
590*4882a593Smuzhiyun if (rt->dst.dev == dev) {
591*4882a593Smuzhiyun ip_rt_put(rt);
592*4882a593Smuzhiyun dev->stats.collisions++;
593*4882a593Smuzhiyun goto tx_error;
594*4882a593Smuzhiyun }
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
597*4882a593Smuzhiyun df = htons(IP_DF);
598*4882a593Smuzhiyun if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
599*4882a593Smuzhiyun key->u.ipv4.dst, true)) {
600*4882a593Smuzhiyun ip_rt_put(rt);
601*4882a593Smuzhiyun goto tx_error;
602*4882a593Smuzhiyun }
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
605*4882a593Smuzhiyun ttl = key->ttl;
606*4882a593Smuzhiyun if (ttl == 0) {
607*4882a593Smuzhiyun if (skb->protocol == htons(ETH_P_IP))
608*4882a593Smuzhiyun ttl = inner_iph->ttl;
609*4882a593Smuzhiyun else if (skb->protocol == htons(ETH_P_IPV6))
610*4882a593Smuzhiyun ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
611*4882a593Smuzhiyun else
612*4882a593Smuzhiyun ttl = ip4_dst_hoplimit(&rt->dst);
613*4882a593Smuzhiyun }
614*4882a593Smuzhiyun
615*4882a593Smuzhiyun headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
616*4882a593Smuzhiyun if (headroom > dev->needed_headroom)
617*4882a593Smuzhiyun dev->needed_headroom = headroom;
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun if (skb_cow_head(skb, dev->needed_headroom)) {
620*4882a593Smuzhiyun ip_rt_put(rt);
621*4882a593Smuzhiyun goto tx_dropped;
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
624*4882a593Smuzhiyun df, !net_eq(tunnel->net, dev_net(dev)));
625*4882a593Smuzhiyun return;
626*4882a593Smuzhiyun tx_error:
627*4882a593Smuzhiyun dev->stats.tx_errors++;
628*4882a593Smuzhiyun goto kfree;
629*4882a593Smuzhiyun tx_dropped:
630*4882a593Smuzhiyun dev->stats.tx_dropped++;
631*4882a593Smuzhiyun kfree:
632*4882a593Smuzhiyun kfree_skb(skb);
633*4882a593Smuzhiyun }
634*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
635*4882a593Smuzhiyun
ip_tunnel_xmit(struct sk_buff * skb,struct net_device * dev,const struct iphdr * tnl_params,u8 protocol)636*4882a593Smuzhiyun void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
637*4882a593Smuzhiyun const struct iphdr *tnl_params, u8 protocol)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
640*4882a593Smuzhiyun struct ip_tunnel_info *tun_info = NULL;
641*4882a593Smuzhiyun const struct iphdr *inner_iph;
642*4882a593Smuzhiyun unsigned int max_headroom; /* The extra header space needed */
643*4882a593Smuzhiyun struct rtable *rt = NULL; /* Route to the other host */
644*4882a593Smuzhiyun bool use_cache = false;
645*4882a593Smuzhiyun struct flowi4 fl4;
646*4882a593Smuzhiyun bool md = false;
647*4882a593Smuzhiyun bool connected;
648*4882a593Smuzhiyun u8 tos, ttl;
649*4882a593Smuzhiyun __be32 dst;
650*4882a593Smuzhiyun __be16 df;
651*4882a593Smuzhiyun
652*4882a593Smuzhiyun inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
653*4882a593Smuzhiyun connected = (tunnel->parms.iph.daddr != 0);
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun dst = tnl_params->daddr;
658*4882a593Smuzhiyun if (dst == 0) {
659*4882a593Smuzhiyun /* NBMA tunnel */
660*4882a593Smuzhiyun
661*4882a593Smuzhiyun if (!skb_dst(skb)) {
662*4882a593Smuzhiyun dev->stats.tx_fifo_errors++;
663*4882a593Smuzhiyun goto tx_error;
664*4882a593Smuzhiyun }
665*4882a593Smuzhiyun
666*4882a593Smuzhiyun tun_info = skb_tunnel_info(skb);
667*4882a593Smuzhiyun if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
668*4882a593Smuzhiyun ip_tunnel_info_af(tun_info) == AF_INET &&
669*4882a593Smuzhiyun tun_info->key.u.ipv4.dst) {
670*4882a593Smuzhiyun dst = tun_info->key.u.ipv4.dst;
671*4882a593Smuzhiyun md = true;
672*4882a593Smuzhiyun connected = true;
673*4882a593Smuzhiyun }
674*4882a593Smuzhiyun else if (skb->protocol == htons(ETH_P_IP)) {
675*4882a593Smuzhiyun rt = skb_rtable(skb);
676*4882a593Smuzhiyun dst = rt_nexthop(rt, inner_iph->daddr);
677*4882a593Smuzhiyun }
678*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_IPV6)
679*4882a593Smuzhiyun else if (skb->protocol == htons(ETH_P_IPV6)) {
680*4882a593Smuzhiyun const struct in6_addr *addr6;
681*4882a593Smuzhiyun struct neighbour *neigh;
682*4882a593Smuzhiyun bool do_tx_error_icmp;
683*4882a593Smuzhiyun int addr_type;
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun neigh = dst_neigh_lookup(skb_dst(skb),
686*4882a593Smuzhiyun &ipv6_hdr(skb)->daddr);
687*4882a593Smuzhiyun if (!neigh)
688*4882a593Smuzhiyun goto tx_error;
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun addr6 = (const struct in6_addr *)&neigh->primary_key;
691*4882a593Smuzhiyun addr_type = ipv6_addr_type(addr6);
692*4882a593Smuzhiyun
693*4882a593Smuzhiyun if (addr_type == IPV6_ADDR_ANY) {
694*4882a593Smuzhiyun addr6 = &ipv6_hdr(skb)->daddr;
695*4882a593Smuzhiyun addr_type = ipv6_addr_type(addr6);
696*4882a593Smuzhiyun }
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
699*4882a593Smuzhiyun do_tx_error_icmp = true;
700*4882a593Smuzhiyun else {
701*4882a593Smuzhiyun do_tx_error_icmp = false;
702*4882a593Smuzhiyun dst = addr6->s6_addr32[3];
703*4882a593Smuzhiyun }
704*4882a593Smuzhiyun neigh_release(neigh);
705*4882a593Smuzhiyun if (do_tx_error_icmp)
706*4882a593Smuzhiyun goto tx_error_icmp;
707*4882a593Smuzhiyun }
708*4882a593Smuzhiyun #endif
709*4882a593Smuzhiyun else
710*4882a593Smuzhiyun goto tx_error;
711*4882a593Smuzhiyun
712*4882a593Smuzhiyun if (!md)
713*4882a593Smuzhiyun connected = false;
714*4882a593Smuzhiyun }
715*4882a593Smuzhiyun
716*4882a593Smuzhiyun tos = tnl_params->tos;
717*4882a593Smuzhiyun if (tos & 0x1) {
718*4882a593Smuzhiyun tos &= ~0x1;
719*4882a593Smuzhiyun if (skb->protocol == htons(ETH_P_IP)) {
720*4882a593Smuzhiyun tos = inner_iph->tos;
721*4882a593Smuzhiyun connected = false;
722*4882a593Smuzhiyun } else if (skb->protocol == htons(ETH_P_IPV6)) {
723*4882a593Smuzhiyun tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
724*4882a593Smuzhiyun connected = false;
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun }
727*4882a593Smuzhiyun
728*4882a593Smuzhiyun ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
729*4882a593Smuzhiyun tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
730*4882a593Smuzhiyun tunnel->fwmark, skb_get_hash(skb));
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
733*4882a593Smuzhiyun goto tx_error;
734*4882a593Smuzhiyun
735*4882a593Smuzhiyun if (connected && md) {
736*4882a593Smuzhiyun use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
737*4882a593Smuzhiyun if (use_cache)
738*4882a593Smuzhiyun rt = dst_cache_get_ip4(&tun_info->dst_cache,
739*4882a593Smuzhiyun &fl4.saddr);
740*4882a593Smuzhiyun } else {
741*4882a593Smuzhiyun rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
742*4882a593Smuzhiyun &fl4.saddr) : NULL;
743*4882a593Smuzhiyun }
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun if (!rt) {
746*4882a593Smuzhiyun rt = ip_route_output_key(tunnel->net, &fl4);
747*4882a593Smuzhiyun
748*4882a593Smuzhiyun if (IS_ERR(rt)) {
749*4882a593Smuzhiyun dev->stats.tx_carrier_errors++;
750*4882a593Smuzhiyun goto tx_error;
751*4882a593Smuzhiyun }
752*4882a593Smuzhiyun if (use_cache)
753*4882a593Smuzhiyun dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
754*4882a593Smuzhiyun fl4.saddr);
755*4882a593Smuzhiyun else if (!md && connected)
756*4882a593Smuzhiyun dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
757*4882a593Smuzhiyun fl4.saddr);
758*4882a593Smuzhiyun }
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun if (rt->dst.dev == dev) {
761*4882a593Smuzhiyun ip_rt_put(rt);
762*4882a593Smuzhiyun dev->stats.collisions++;
763*4882a593Smuzhiyun goto tx_error;
764*4882a593Smuzhiyun }
765*4882a593Smuzhiyun
766*4882a593Smuzhiyun df = tnl_params->frag_off;
767*4882a593Smuzhiyun if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
768*4882a593Smuzhiyun df |= (inner_iph->frag_off & htons(IP_DF));
769*4882a593Smuzhiyun
770*4882a593Smuzhiyun if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
771*4882a593Smuzhiyun ip_rt_put(rt);
772*4882a593Smuzhiyun goto tx_error;
773*4882a593Smuzhiyun }
774*4882a593Smuzhiyun
775*4882a593Smuzhiyun if (tunnel->err_count > 0) {
776*4882a593Smuzhiyun if (time_before(jiffies,
777*4882a593Smuzhiyun tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
778*4882a593Smuzhiyun tunnel->err_count--;
779*4882a593Smuzhiyun
780*4882a593Smuzhiyun dst_link_failure(skb);
781*4882a593Smuzhiyun } else
782*4882a593Smuzhiyun tunnel->err_count = 0;
783*4882a593Smuzhiyun }
784*4882a593Smuzhiyun
785*4882a593Smuzhiyun tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
786*4882a593Smuzhiyun ttl = tnl_params->ttl;
787*4882a593Smuzhiyun if (ttl == 0) {
788*4882a593Smuzhiyun if (skb->protocol == htons(ETH_P_IP))
789*4882a593Smuzhiyun ttl = inner_iph->ttl;
790*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_IPV6)
791*4882a593Smuzhiyun else if (skb->protocol == htons(ETH_P_IPV6))
792*4882a593Smuzhiyun ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
793*4882a593Smuzhiyun #endif
794*4882a593Smuzhiyun else
795*4882a593Smuzhiyun ttl = ip4_dst_hoplimit(&rt->dst);
796*4882a593Smuzhiyun }
797*4882a593Smuzhiyun
798*4882a593Smuzhiyun max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
799*4882a593Smuzhiyun + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
800*4882a593Smuzhiyun if (max_headroom > dev->needed_headroom)
801*4882a593Smuzhiyun dev->needed_headroom = max_headroom;
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun if (skb_cow_head(skb, dev->needed_headroom)) {
804*4882a593Smuzhiyun ip_rt_put(rt);
805*4882a593Smuzhiyun dev->stats.tx_dropped++;
806*4882a593Smuzhiyun kfree_skb(skb);
807*4882a593Smuzhiyun return;
808*4882a593Smuzhiyun }
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
811*4882a593Smuzhiyun df, !net_eq(tunnel->net, dev_net(dev)));
812*4882a593Smuzhiyun return;
813*4882a593Smuzhiyun
814*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_IPV6)
815*4882a593Smuzhiyun tx_error_icmp:
816*4882a593Smuzhiyun dst_link_failure(skb);
817*4882a593Smuzhiyun #endif
818*4882a593Smuzhiyun tx_error:
819*4882a593Smuzhiyun dev->stats.tx_errors++;
820*4882a593Smuzhiyun kfree_skb(skb);
821*4882a593Smuzhiyun }
822*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
823*4882a593Smuzhiyun
ip_tunnel_update(struct ip_tunnel_net * itn,struct ip_tunnel * t,struct net_device * dev,struct ip_tunnel_parm * p,bool set_mtu,__u32 fwmark)824*4882a593Smuzhiyun static void ip_tunnel_update(struct ip_tunnel_net *itn,
825*4882a593Smuzhiyun struct ip_tunnel *t,
826*4882a593Smuzhiyun struct net_device *dev,
827*4882a593Smuzhiyun struct ip_tunnel_parm *p,
828*4882a593Smuzhiyun bool set_mtu,
829*4882a593Smuzhiyun __u32 fwmark)
830*4882a593Smuzhiyun {
831*4882a593Smuzhiyun ip_tunnel_del(itn, t);
832*4882a593Smuzhiyun t->parms.iph.saddr = p->iph.saddr;
833*4882a593Smuzhiyun t->parms.iph.daddr = p->iph.daddr;
834*4882a593Smuzhiyun t->parms.i_key = p->i_key;
835*4882a593Smuzhiyun t->parms.o_key = p->o_key;
836*4882a593Smuzhiyun if (dev->type != ARPHRD_ETHER) {
837*4882a593Smuzhiyun memcpy(dev->dev_addr, &p->iph.saddr, 4);
838*4882a593Smuzhiyun memcpy(dev->broadcast, &p->iph.daddr, 4);
839*4882a593Smuzhiyun }
840*4882a593Smuzhiyun ip_tunnel_add(itn, t);
841*4882a593Smuzhiyun
842*4882a593Smuzhiyun t->parms.iph.ttl = p->iph.ttl;
843*4882a593Smuzhiyun t->parms.iph.tos = p->iph.tos;
844*4882a593Smuzhiyun t->parms.iph.frag_off = p->iph.frag_off;
845*4882a593Smuzhiyun
846*4882a593Smuzhiyun if (t->parms.link != p->link || t->fwmark != fwmark) {
847*4882a593Smuzhiyun int mtu;
848*4882a593Smuzhiyun
849*4882a593Smuzhiyun t->parms.link = p->link;
850*4882a593Smuzhiyun t->fwmark = fwmark;
851*4882a593Smuzhiyun mtu = ip_tunnel_bind_dev(dev);
852*4882a593Smuzhiyun if (set_mtu)
853*4882a593Smuzhiyun dev->mtu = mtu;
854*4882a593Smuzhiyun }
855*4882a593Smuzhiyun dst_cache_reset(&t->dst_cache);
856*4882a593Smuzhiyun netdev_state_change(dev);
857*4882a593Smuzhiyun }
858*4882a593Smuzhiyun
ip_tunnel_ctl(struct net_device * dev,struct ip_tunnel_parm * p,int cmd)859*4882a593Smuzhiyun int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
860*4882a593Smuzhiyun {
861*4882a593Smuzhiyun int err = 0;
862*4882a593Smuzhiyun struct ip_tunnel *t = netdev_priv(dev);
863*4882a593Smuzhiyun struct net *net = t->net;
864*4882a593Smuzhiyun struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
865*4882a593Smuzhiyun
866*4882a593Smuzhiyun switch (cmd) {
867*4882a593Smuzhiyun case SIOCGETTUNNEL:
868*4882a593Smuzhiyun if (dev == itn->fb_tunnel_dev) {
869*4882a593Smuzhiyun t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
870*4882a593Smuzhiyun if (!t)
871*4882a593Smuzhiyun t = netdev_priv(dev);
872*4882a593Smuzhiyun }
873*4882a593Smuzhiyun memcpy(p, &t->parms, sizeof(*p));
874*4882a593Smuzhiyun break;
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun case SIOCADDTUNNEL:
877*4882a593Smuzhiyun case SIOCCHGTUNNEL:
878*4882a593Smuzhiyun err = -EPERM;
879*4882a593Smuzhiyun if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
880*4882a593Smuzhiyun goto done;
881*4882a593Smuzhiyun if (p->iph.ttl)
882*4882a593Smuzhiyun p->iph.frag_off |= htons(IP_DF);
883*4882a593Smuzhiyun if (!(p->i_flags & VTI_ISVTI)) {
884*4882a593Smuzhiyun if (!(p->i_flags & TUNNEL_KEY))
885*4882a593Smuzhiyun p->i_key = 0;
886*4882a593Smuzhiyun if (!(p->o_flags & TUNNEL_KEY))
887*4882a593Smuzhiyun p->o_key = 0;
888*4882a593Smuzhiyun }
889*4882a593Smuzhiyun
890*4882a593Smuzhiyun t = ip_tunnel_find(itn, p, itn->type);
891*4882a593Smuzhiyun
892*4882a593Smuzhiyun if (cmd == SIOCADDTUNNEL) {
893*4882a593Smuzhiyun if (!t) {
894*4882a593Smuzhiyun t = ip_tunnel_create(net, itn, p);
895*4882a593Smuzhiyun err = PTR_ERR_OR_ZERO(t);
896*4882a593Smuzhiyun break;
897*4882a593Smuzhiyun }
898*4882a593Smuzhiyun
899*4882a593Smuzhiyun err = -EEXIST;
900*4882a593Smuzhiyun break;
901*4882a593Smuzhiyun }
902*4882a593Smuzhiyun if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
903*4882a593Smuzhiyun if (t) {
904*4882a593Smuzhiyun if (t->dev != dev) {
905*4882a593Smuzhiyun err = -EEXIST;
906*4882a593Smuzhiyun break;
907*4882a593Smuzhiyun }
908*4882a593Smuzhiyun } else {
909*4882a593Smuzhiyun unsigned int nflags = 0;
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun if (ipv4_is_multicast(p->iph.daddr))
912*4882a593Smuzhiyun nflags = IFF_BROADCAST;
913*4882a593Smuzhiyun else if (p->iph.daddr)
914*4882a593Smuzhiyun nflags = IFF_POINTOPOINT;
915*4882a593Smuzhiyun
916*4882a593Smuzhiyun if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
917*4882a593Smuzhiyun err = -EINVAL;
918*4882a593Smuzhiyun break;
919*4882a593Smuzhiyun }
920*4882a593Smuzhiyun
921*4882a593Smuzhiyun t = netdev_priv(dev);
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun }
924*4882a593Smuzhiyun
925*4882a593Smuzhiyun if (t) {
926*4882a593Smuzhiyun err = 0;
927*4882a593Smuzhiyun ip_tunnel_update(itn, t, dev, p, true, 0);
928*4882a593Smuzhiyun } else {
929*4882a593Smuzhiyun err = -ENOENT;
930*4882a593Smuzhiyun }
931*4882a593Smuzhiyun break;
932*4882a593Smuzhiyun
933*4882a593Smuzhiyun case SIOCDELTUNNEL:
934*4882a593Smuzhiyun err = -EPERM;
935*4882a593Smuzhiyun if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
936*4882a593Smuzhiyun goto done;
937*4882a593Smuzhiyun
938*4882a593Smuzhiyun if (dev == itn->fb_tunnel_dev) {
939*4882a593Smuzhiyun err = -ENOENT;
940*4882a593Smuzhiyun t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
941*4882a593Smuzhiyun if (!t)
942*4882a593Smuzhiyun goto done;
943*4882a593Smuzhiyun err = -EPERM;
944*4882a593Smuzhiyun if (t == netdev_priv(itn->fb_tunnel_dev))
945*4882a593Smuzhiyun goto done;
946*4882a593Smuzhiyun dev = t->dev;
947*4882a593Smuzhiyun }
948*4882a593Smuzhiyun unregister_netdevice(dev);
949*4882a593Smuzhiyun err = 0;
950*4882a593Smuzhiyun break;
951*4882a593Smuzhiyun
952*4882a593Smuzhiyun default:
953*4882a593Smuzhiyun err = -EINVAL;
954*4882a593Smuzhiyun }
955*4882a593Smuzhiyun
956*4882a593Smuzhiyun done:
957*4882a593Smuzhiyun return err;
958*4882a593Smuzhiyun }
959*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
960*4882a593Smuzhiyun
ip_tunnel_ioctl(struct net_device * dev,struct ifreq * ifr,int cmd)961*4882a593Smuzhiyun int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
962*4882a593Smuzhiyun {
963*4882a593Smuzhiyun struct ip_tunnel_parm p;
964*4882a593Smuzhiyun int err;
965*4882a593Smuzhiyun
966*4882a593Smuzhiyun if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
967*4882a593Smuzhiyun return -EFAULT;
968*4882a593Smuzhiyun err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
969*4882a593Smuzhiyun if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
970*4882a593Smuzhiyun return -EFAULT;
971*4882a593Smuzhiyun return err;
972*4882a593Smuzhiyun }
973*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
974*4882a593Smuzhiyun
__ip_tunnel_change_mtu(struct net_device * dev,int new_mtu,bool strict)975*4882a593Smuzhiyun int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
976*4882a593Smuzhiyun {
977*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
978*4882a593Smuzhiyun int t_hlen = tunnel->hlen + sizeof(struct iphdr);
979*4882a593Smuzhiyun int max_mtu = IP_MAX_MTU - t_hlen;
980*4882a593Smuzhiyun
981*4882a593Smuzhiyun if (dev->type == ARPHRD_ETHER)
982*4882a593Smuzhiyun max_mtu -= dev->hard_header_len;
983*4882a593Smuzhiyun
984*4882a593Smuzhiyun if (new_mtu < ETH_MIN_MTU)
985*4882a593Smuzhiyun return -EINVAL;
986*4882a593Smuzhiyun
987*4882a593Smuzhiyun if (new_mtu > max_mtu) {
988*4882a593Smuzhiyun if (strict)
989*4882a593Smuzhiyun return -EINVAL;
990*4882a593Smuzhiyun
991*4882a593Smuzhiyun new_mtu = max_mtu;
992*4882a593Smuzhiyun }
993*4882a593Smuzhiyun
994*4882a593Smuzhiyun dev->mtu = new_mtu;
995*4882a593Smuzhiyun return 0;
996*4882a593Smuzhiyun }
997*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
998*4882a593Smuzhiyun
ip_tunnel_change_mtu(struct net_device * dev,int new_mtu)999*4882a593Smuzhiyun int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1000*4882a593Smuzhiyun {
1001*4882a593Smuzhiyun return __ip_tunnel_change_mtu(dev, new_mtu, true);
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1004*4882a593Smuzhiyun
ip_tunnel_dev_free(struct net_device * dev)1005*4882a593Smuzhiyun static void ip_tunnel_dev_free(struct net_device *dev)
1006*4882a593Smuzhiyun {
1007*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
1008*4882a593Smuzhiyun
1009*4882a593Smuzhiyun gro_cells_destroy(&tunnel->gro_cells);
1010*4882a593Smuzhiyun dst_cache_destroy(&tunnel->dst_cache);
1011*4882a593Smuzhiyun free_percpu(dev->tstats);
1012*4882a593Smuzhiyun }
1013*4882a593Smuzhiyun
ip_tunnel_dellink(struct net_device * dev,struct list_head * head)1014*4882a593Smuzhiyun void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1015*4882a593Smuzhiyun {
1016*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
1017*4882a593Smuzhiyun struct ip_tunnel_net *itn;
1018*4882a593Smuzhiyun
1019*4882a593Smuzhiyun itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1020*4882a593Smuzhiyun
1021*4882a593Smuzhiyun if (itn->fb_tunnel_dev != dev) {
1022*4882a593Smuzhiyun ip_tunnel_del(itn, netdev_priv(dev));
1023*4882a593Smuzhiyun unregister_netdevice_queue(dev, head);
1024*4882a593Smuzhiyun }
1025*4882a593Smuzhiyun }
1026*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1027*4882a593Smuzhiyun
ip_tunnel_get_link_net(const struct net_device * dev)1028*4882a593Smuzhiyun struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1029*4882a593Smuzhiyun {
1030*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
1031*4882a593Smuzhiyun
1032*4882a593Smuzhiyun return tunnel->net;
1033*4882a593Smuzhiyun }
1034*4882a593Smuzhiyun EXPORT_SYMBOL(ip_tunnel_get_link_net);
1035*4882a593Smuzhiyun
ip_tunnel_get_iflink(const struct net_device * dev)1036*4882a593Smuzhiyun int ip_tunnel_get_iflink(const struct net_device *dev)
1037*4882a593Smuzhiyun {
1038*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
1039*4882a593Smuzhiyun
1040*4882a593Smuzhiyun return tunnel->parms.link;
1041*4882a593Smuzhiyun }
1042*4882a593Smuzhiyun EXPORT_SYMBOL(ip_tunnel_get_iflink);
1043*4882a593Smuzhiyun
ip_tunnel_init_net(struct net * net,unsigned int ip_tnl_net_id,struct rtnl_link_ops * ops,char * devname)1044*4882a593Smuzhiyun int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1045*4882a593Smuzhiyun struct rtnl_link_ops *ops, char *devname)
1046*4882a593Smuzhiyun {
1047*4882a593Smuzhiyun struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1048*4882a593Smuzhiyun struct ip_tunnel_parm parms;
1049*4882a593Smuzhiyun unsigned int i;
1050*4882a593Smuzhiyun
1051*4882a593Smuzhiyun itn->rtnl_link_ops = ops;
1052*4882a593Smuzhiyun for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1053*4882a593Smuzhiyun INIT_HLIST_HEAD(&itn->tunnels[i]);
1054*4882a593Smuzhiyun
1055*4882a593Smuzhiyun if (!ops || !net_has_fallback_tunnels(net)) {
1056*4882a593Smuzhiyun struct ip_tunnel_net *it_init_net;
1057*4882a593Smuzhiyun
1058*4882a593Smuzhiyun it_init_net = net_generic(&init_net, ip_tnl_net_id);
1059*4882a593Smuzhiyun itn->type = it_init_net->type;
1060*4882a593Smuzhiyun itn->fb_tunnel_dev = NULL;
1061*4882a593Smuzhiyun return 0;
1062*4882a593Smuzhiyun }
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun memset(&parms, 0, sizeof(parms));
1065*4882a593Smuzhiyun if (devname)
1066*4882a593Smuzhiyun strlcpy(parms.name, devname, IFNAMSIZ);
1067*4882a593Smuzhiyun
1068*4882a593Smuzhiyun rtnl_lock();
1069*4882a593Smuzhiyun itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1070*4882a593Smuzhiyun /* FB netdevice is special: we have one, and only one per netns.
1071*4882a593Smuzhiyun * Allowing to move it to another netns is clearly unsafe.
1072*4882a593Smuzhiyun */
1073*4882a593Smuzhiyun if (!IS_ERR(itn->fb_tunnel_dev)) {
1074*4882a593Smuzhiyun itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1075*4882a593Smuzhiyun itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1076*4882a593Smuzhiyun ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1077*4882a593Smuzhiyun itn->type = itn->fb_tunnel_dev->type;
1078*4882a593Smuzhiyun }
1079*4882a593Smuzhiyun rtnl_unlock();
1080*4882a593Smuzhiyun
1081*4882a593Smuzhiyun return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1082*4882a593Smuzhiyun }
1083*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1084*4882a593Smuzhiyun
ip_tunnel_destroy(struct net * net,struct ip_tunnel_net * itn,struct list_head * head,struct rtnl_link_ops * ops)1085*4882a593Smuzhiyun static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1086*4882a593Smuzhiyun struct list_head *head,
1087*4882a593Smuzhiyun struct rtnl_link_ops *ops)
1088*4882a593Smuzhiyun {
1089*4882a593Smuzhiyun struct net_device *dev, *aux;
1090*4882a593Smuzhiyun int h;
1091*4882a593Smuzhiyun
1092*4882a593Smuzhiyun for_each_netdev_safe(net, dev, aux)
1093*4882a593Smuzhiyun if (dev->rtnl_link_ops == ops)
1094*4882a593Smuzhiyun unregister_netdevice_queue(dev, head);
1095*4882a593Smuzhiyun
1096*4882a593Smuzhiyun for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1097*4882a593Smuzhiyun struct ip_tunnel *t;
1098*4882a593Smuzhiyun struct hlist_node *n;
1099*4882a593Smuzhiyun struct hlist_head *thead = &itn->tunnels[h];
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun hlist_for_each_entry_safe(t, n, thead, hash_node)
1102*4882a593Smuzhiyun /* If dev is in the same netns, it has already
1103*4882a593Smuzhiyun * been added to the list by the previous loop.
1104*4882a593Smuzhiyun */
1105*4882a593Smuzhiyun if (!net_eq(dev_net(t->dev), net))
1106*4882a593Smuzhiyun unregister_netdevice_queue(t->dev, head);
1107*4882a593Smuzhiyun }
1108*4882a593Smuzhiyun }
1109*4882a593Smuzhiyun
ip_tunnel_delete_nets(struct list_head * net_list,unsigned int id,struct rtnl_link_ops * ops)1110*4882a593Smuzhiyun void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1111*4882a593Smuzhiyun struct rtnl_link_ops *ops)
1112*4882a593Smuzhiyun {
1113*4882a593Smuzhiyun struct ip_tunnel_net *itn;
1114*4882a593Smuzhiyun struct net *net;
1115*4882a593Smuzhiyun LIST_HEAD(list);
1116*4882a593Smuzhiyun
1117*4882a593Smuzhiyun rtnl_lock();
1118*4882a593Smuzhiyun list_for_each_entry(net, net_list, exit_list) {
1119*4882a593Smuzhiyun itn = net_generic(net, id);
1120*4882a593Smuzhiyun ip_tunnel_destroy(net, itn, &list, ops);
1121*4882a593Smuzhiyun }
1122*4882a593Smuzhiyun unregister_netdevice_many(&list);
1123*4882a593Smuzhiyun rtnl_unlock();
1124*4882a593Smuzhiyun }
1125*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1126*4882a593Smuzhiyun
ip_tunnel_newlink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1127*4882a593Smuzhiyun int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1128*4882a593Smuzhiyun struct ip_tunnel_parm *p, __u32 fwmark)
1129*4882a593Smuzhiyun {
1130*4882a593Smuzhiyun struct ip_tunnel *nt;
1131*4882a593Smuzhiyun struct net *net = dev_net(dev);
1132*4882a593Smuzhiyun struct ip_tunnel_net *itn;
1133*4882a593Smuzhiyun int mtu;
1134*4882a593Smuzhiyun int err;
1135*4882a593Smuzhiyun
1136*4882a593Smuzhiyun nt = netdev_priv(dev);
1137*4882a593Smuzhiyun itn = net_generic(net, nt->ip_tnl_net_id);
1138*4882a593Smuzhiyun
1139*4882a593Smuzhiyun if (nt->collect_md) {
1140*4882a593Smuzhiyun if (rtnl_dereference(itn->collect_md_tun))
1141*4882a593Smuzhiyun return -EEXIST;
1142*4882a593Smuzhiyun } else {
1143*4882a593Smuzhiyun if (ip_tunnel_find(itn, p, dev->type))
1144*4882a593Smuzhiyun return -EEXIST;
1145*4882a593Smuzhiyun }
1146*4882a593Smuzhiyun
1147*4882a593Smuzhiyun nt->net = net;
1148*4882a593Smuzhiyun nt->parms = *p;
1149*4882a593Smuzhiyun nt->fwmark = fwmark;
1150*4882a593Smuzhiyun err = register_netdevice(dev);
1151*4882a593Smuzhiyun if (err)
1152*4882a593Smuzhiyun goto err_register_netdevice;
1153*4882a593Smuzhiyun
1154*4882a593Smuzhiyun if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1155*4882a593Smuzhiyun eth_hw_addr_random(dev);
1156*4882a593Smuzhiyun
1157*4882a593Smuzhiyun mtu = ip_tunnel_bind_dev(dev);
1158*4882a593Smuzhiyun if (tb[IFLA_MTU]) {
1159*4882a593Smuzhiyun unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1160*4882a593Smuzhiyun
1161*4882a593Smuzhiyun if (dev->type == ARPHRD_ETHER)
1162*4882a593Smuzhiyun max -= dev->hard_header_len;
1163*4882a593Smuzhiyun
1164*4882a593Smuzhiyun mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1165*4882a593Smuzhiyun }
1166*4882a593Smuzhiyun
1167*4882a593Smuzhiyun err = dev_set_mtu(dev, mtu);
1168*4882a593Smuzhiyun if (err)
1169*4882a593Smuzhiyun goto err_dev_set_mtu;
1170*4882a593Smuzhiyun
1171*4882a593Smuzhiyun ip_tunnel_add(itn, nt);
1172*4882a593Smuzhiyun return 0;
1173*4882a593Smuzhiyun
1174*4882a593Smuzhiyun err_dev_set_mtu:
1175*4882a593Smuzhiyun unregister_netdevice(dev);
1176*4882a593Smuzhiyun err_register_netdevice:
1177*4882a593Smuzhiyun return err;
1178*4882a593Smuzhiyun }
1179*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1180*4882a593Smuzhiyun
ip_tunnel_changelink(struct net_device * dev,struct nlattr * tb[],struct ip_tunnel_parm * p,__u32 fwmark)1181*4882a593Smuzhiyun int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1182*4882a593Smuzhiyun struct ip_tunnel_parm *p, __u32 fwmark)
1183*4882a593Smuzhiyun {
1184*4882a593Smuzhiyun struct ip_tunnel *t;
1185*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
1186*4882a593Smuzhiyun struct net *net = tunnel->net;
1187*4882a593Smuzhiyun struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1188*4882a593Smuzhiyun
1189*4882a593Smuzhiyun if (dev == itn->fb_tunnel_dev)
1190*4882a593Smuzhiyun return -EINVAL;
1191*4882a593Smuzhiyun
1192*4882a593Smuzhiyun t = ip_tunnel_find(itn, p, dev->type);
1193*4882a593Smuzhiyun
1194*4882a593Smuzhiyun if (t) {
1195*4882a593Smuzhiyun if (t->dev != dev)
1196*4882a593Smuzhiyun return -EEXIST;
1197*4882a593Smuzhiyun } else {
1198*4882a593Smuzhiyun t = tunnel;
1199*4882a593Smuzhiyun
1200*4882a593Smuzhiyun if (dev->type != ARPHRD_ETHER) {
1201*4882a593Smuzhiyun unsigned int nflags = 0;
1202*4882a593Smuzhiyun
1203*4882a593Smuzhiyun if (ipv4_is_multicast(p->iph.daddr))
1204*4882a593Smuzhiyun nflags = IFF_BROADCAST;
1205*4882a593Smuzhiyun else if (p->iph.daddr)
1206*4882a593Smuzhiyun nflags = IFF_POINTOPOINT;
1207*4882a593Smuzhiyun
1208*4882a593Smuzhiyun if ((dev->flags ^ nflags) &
1209*4882a593Smuzhiyun (IFF_POINTOPOINT | IFF_BROADCAST))
1210*4882a593Smuzhiyun return -EINVAL;
1211*4882a593Smuzhiyun }
1212*4882a593Smuzhiyun }
1213*4882a593Smuzhiyun
1214*4882a593Smuzhiyun ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1215*4882a593Smuzhiyun return 0;
1216*4882a593Smuzhiyun }
1217*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1218*4882a593Smuzhiyun
ip_tunnel_init(struct net_device * dev)1219*4882a593Smuzhiyun int ip_tunnel_init(struct net_device *dev)
1220*4882a593Smuzhiyun {
1221*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
1222*4882a593Smuzhiyun struct iphdr *iph = &tunnel->parms.iph;
1223*4882a593Smuzhiyun int err;
1224*4882a593Smuzhiyun
1225*4882a593Smuzhiyun dev->needs_free_netdev = true;
1226*4882a593Smuzhiyun dev->priv_destructor = ip_tunnel_dev_free;
1227*4882a593Smuzhiyun dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1228*4882a593Smuzhiyun if (!dev->tstats)
1229*4882a593Smuzhiyun return -ENOMEM;
1230*4882a593Smuzhiyun
1231*4882a593Smuzhiyun err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1232*4882a593Smuzhiyun if (err) {
1233*4882a593Smuzhiyun free_percpu(dev->tstats);
1234*4882a593Smuzhiyun return err;
1235*4882a593Smuzhiyun }
1236*4882a593Smuzhiyun
1237*4882a593Smuzhiyun err = gro_cells_init(&tunnel->gro_cells, dev);
1238*4882a593Smuzhiyun if (err) {
1239*4882a593Smuzhiyun dst_cache_destroy(&tunnel->dst_cache);
1240*4882a593Smuzhiyun free_percpu(dev->tstats);
1241*4882a593Smuzhiyun return err;
1242*4882a593Smuzhiyun }
1243*4882a593Smuzhiyun
1244*4882a593Smuzhiyun tunnel->dev = dev;
1245*4882a593Smuzhiyun tunnel->net = dev_net(dev);
1246*4882a593Smuzhiyun strcpy(tunnel->parms.name, dev->name);
1247*4882a593Smuzhiyun iph->version = 4;
1248*4882a593Smuzhiyun iph->ihl = 5;
1249*4882a593Smuzhiyun
1250*4882a593Smuzhiyun if (tunnel->collect_md)
1251*4882a593Smuzhiyun netif_keep_dst(dev);
1252*4882a593Smuzhiyun return 0;
1253*4882a593Smuzhiyun }
1254*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_init);
1255*4882a593Smuzhiyun
ip_tunnel_uninit(struct net_device * dev)1256*4882a593Smuzhiyun void ip_tunnel_uninit(struct net_device *dev)
1257*4882a593Smuzhiyun {
1258*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
1259*4882a593Smuzhiyun struct net *net = tunnel->net;
1260*4882a593Smuzhiyun struct ip_tunnel_net *itn;
1261*4882a593Smuzhiyun
1262*4882a593Smuzhiyun itn = net_generic(net, tunnel->ip_tnl_net_id);
1263*4882a593Smuzhiyun ip_tunnel_del(itn, netdev_priv(dev));
1264*4882a593Smuzhiyun if (itn->fb_tunnel_dev == dev)
1265*4882a593Smuzhiyun WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1266*4882a593Smuzhiyun
1267*4882a593Smuzhiyun dst_cache_reset(&tunnel->dst_cache);
1268*4882a593Smuzhiyun }
1269*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1270*4882a593Smuzhiyun
1271*4882a593Smuzhiyun /* Do least required initialization, rest of init is done in tunnel_init call */
ip_tunnel_setup(struct net_device * dev,unsigned int net_id)1272*4882a593Smuzhiyun void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1273*4882a593Smuzhiyun {
1274*4882a593Smuzhiyun struct ip_tunnel *tunnel = netdev_priv(dev);
1275*4882a593Smuzhiyun tunnel->ip_tnl_net_id = net_id;
1276*4882a593Smuzhiyun }
1277*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1278*4882a593Smuzhiyun
1279*4882a593Smuzhiyun MODULE_LICENSE("GPL");
1280