1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * mpls tunnels An implementation mpls tunnels using the light weight tunnel
4*4882a593Smuzhiyun * infrastructure
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun #include <linux/types.h>
9*4882a593Smuzhiyun #include <linux/skbuff.h>
10*4882a593Smuzhiyun #include <linux/net.h>
11*4882a593Smuzhiyun #include <linux/module.h>
12*4882a593Smuzhiyun #include <linux/mpls.h>
13*4882a593Smuzhiyun #include <linux/vmalloc.h>
14*4882a593Smuzhiyun #include <net/ip.h>
15*4882a593Smuzhiyun #include <net/dst.h>
16*4882a593Smuzhiyun #include <net/lwtunnel.h>
17*4882a593Smuzhiyun #include <net/netevent.h>
18*4882a593Smuzhiyun #include <net/netns/generic.h>
19*4882a593Smuzhiyun #include <net/ip6_fib.h>
20*4882a593Smuzhiyun #include <net/route.h>
21*4882a593Smuzhiyun #include <net/mpls_iptunnel.h>
22*4882a593Smuzhiyun #include <linux/mpls_iptunnel.h>
23*4882a593Smuzhiyun #include "internal.h"
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
26*4882a593Smuzhiyun [MPLS_IPTUNNEL_DST] = { .len = sizeof(u32) },
27*4882a593Smuzhiyun [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 },
28*4882a593Smuzhiyun };
29*4882a593Smuzhiyun
mpls_encap_size(struct mpls_iptunnel_encap * en)30*4882a593Smuzhiyun static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
31*4882a593Smuzhiyun {
32*4882a593Smuzhiyun /* The size of the layer 2.5 labels to be added for this route */
33*4882a593Smuzhiyun return en->labels * sizeof(struct mpls_shim_hdr);
34*4882a593Smuzhiyun }
35*4882a593Smuzhiyun
mpls_xmit(struct sk_buff * skb)36*4882a593Smuzhiyun static int mpls_xmit(struct sk_buff *skb)
37*4882a593Smuzhiyun {
38*4882a593Smuzhiyun struct mpls_iptunnel_encap *tun_encap_info;
39*4882a593Smuzhiyun struct mpls_shim_hdr *hdr;
40*4882a593Smuzhiyun struct net_device *out_dev;
41*4882a593Smuzhiyun unsigned int hh_len;
42*4882a593Smuzhiyun unsigned int new_header_size;
43*4882a593Smuzhiyun unsigned int mtu;
44*4882a593Smuzhiyun struct dst_entry *dst = skb_dst(skb);
45*4882a593Smuzhiyun struct rtable *rt = NULL;
46*4882a593Smuzhiyun struct rt6_info *rt6 = NULL;
47*4882a593Smuzhiyun struct mpls_dev *out_mdev;
48*4882a593Smuzhiyun struct net *net;
49*4882a593Smuzhiyun int err = 0;
50*4882a593Smuzhiyun bool bos;
51*4882a593Smuzhiyun int i;
52*4882a593Smuzhiyun unsigned int ttl;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun /* Find the output device */
55*4882a593Smuzhiyun out_dev = dst->dev;
56*4882a593Smuzhiyun net = dev_net(out_dev);
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun skb_orphan(skb);
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun if (!mpls_output_possible(out_dev) ||
61*4882a593Smuzhiyun !dst->lwtstate || skb_warn_if_lro(skb))
62*4882a593Smuzhiyun goto drop;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun skb_forward_csum(skb);
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun /* Obtain the ttl using the following set of rules.
69*4882a593Smuzhiyun *
70*4882a593Smuzhiyun * LWT ttl propagation setting:
71*4882a593Smuzhiyun * - disabled => use default TTL value from LWT
72*4882a593Smuzhiyun * - enabled => use TTL value from IPv4/IPv6 header
73*4882a593Smuzhiyun * - default =>
74*4882a593Smuzhiyun * Global ttl propagation setting:
75*4882a593Smuzhiyun * - disabled => use default TTL value from global setting
76*4882a593Smuzhiyun * - enabled => use TTL value from IPv4/IPv6 header
77*4882a593Smuzhiyun */
78*4882a593Smuzhiyun if (dst->ops->family == AF_INET) {
79*4882a593Smuzhiyun if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
80*4882a593Smuzhiyun ttl = tun_encap_info->default_ttl;
81*4882a593Smuzhiyun else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
82*4882a593Smuzhiyun !net->mpls.ip_ttl_propagate)
83*4882a593Smuzhiyun ttl = net->mpls.default_ttl;
84*4882a593Smuzhiyun else
85*4882a593Smuzhiyun ttl = ip_hdr(skb)->ttl;
86*4882a593Smuzhiyun rt = (struct rtable *)dst;
87*4882a593Smuzhiyun } else if (dst->ops->family == AF_INET6) {
88*4882a593Smuzhiyun if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED)
89*4882a593Smuzhiyun ttl = tun_encap_info->default_ttl;
90*4882a593Smuzhiyun else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
91*4882a593Smuzhiyun !net->mpls.ip_ttl_propagate)
92*4882a593Smuzhiyun ttl = net->mpls.default_ttl;
93*4882a593Smuzhiyun else
94*4882a593Smuzhiyun ttl = ipv6_hdr(skb)->hop_limit;
95*4882a593Smuzhiyun rt6 = (struct rt6_info *)dst;
96*4882a593Smuzhiyun } else {
97*4882a593Smuzhiyun goto drop;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun /* Verify the destination can hold the packet */
101*4882a593Smuzhiyun new_header_size = mpls_encap_size(tun_encap_info);
102*4882a593Smuzhiyun mtu = mpls_dev_mtu(out_dev);
103*4882a593Smuzhiyun if (mpls_pkt_too_big(skb, mtu - new_header_size))
104*4882a593Smuzhiyun goto drop;
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun hh_len = LL_RESERVED_SPACE(out_dev);
107*4882a593Smuzhiyun if (!out_dev->header_ops)
108*4882a593Smuzhiyun hh_len = 0;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun /* Ensure there is enough space for the headers in the skb */
111*4882a593Smuzhiyun if (skb_cow(skb, hh_len + new_header_size))
112*4882a593Smuzhiyun goto drop;
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun skb_set_inner_protocol(skb, skb->protocol);
115*4882a593Smuzhiyun skb_reset_inner_network_header(skb);
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun skb_push(skb, new_header_size);
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun skb_reset_network_header(skb);
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun skb->dev = out_dev;
122*4882a593Smuzhiyun skb->protocol = htons(ETH_P_MPLS_UC);
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun /* Push the new labels */
125*4882a593Smuzhiyun hdr = mpls_hdr(skb);
126*4882a593Smuzhiyun bos = true;
127*4882a593Smuzhiyun for (i = tun_encap_info->labels - 1; i >= 0; i--) {
128*4882a593Smuzhiyun hdr[i] = mpls_entry_encode(tun_encap_info->label[i],
129*4882a593Smuzhiyun ttl, 0, bos);
130*4882a593Smuzhiyun bos = false;
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun mpls_stats_inc_outucastpkts(out_dev, skb);
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun if (rt) {
136*4882a593Smuzhiyun if (rt->rt_gw_family == AF_INET6)
137*4882a593Smuzhiyun err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6,
138*4882a593Smuzhiyun skb);
139*4882a593Smuzhiyun else
140*4882a593Smuzhiyun err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4,
141*4882a593Smuzhiyun skb);
142*4882a593Smuzhiyun } else if (rt6) {
143*4882a593Smuzhiyun if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) {
144*4882a593Smuzhiyun /* 6PE (RFC 4798) */
145*4882a593Smuzhiyun err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3],
146*4882a593Smuzhiyun skb);
147*4882a593Smuzhiyun } else
148*4882a593Smuzhiyun err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
149*4882a593Smuzhiyun skb);
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun if (err)
152*4882a593Smuzhiyun net_dbg_ratelimited("%s: packet transmission failed: %d\n",
153*4882a593Smuzhiyun __func__, err);
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun return LWTUNNEL_XMIT_DONE;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun drop:
158*4882a593Smuzhiyun out_mdev = out_dev ? mpls_dev_get(out_dev) : NULL;
159*4882a593Smuzhiyun if (out_mdev)
160*4882a593Smuzhiyun MPLS_INC_STATS(out_mdev, tx_errors);
161*4882a593Smuzhiyun kfree_skb(skb);
162*4882a593Smuzhiyun return -EINVAL;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun
mpls_build_state(struct net * net,struct nlattr * nla,unsigned int family,const void * cfg,struct lwtunnel_state ** ts,struct netlink_ext_ack * extack)165*4882a593Smuzhiyun static int mpls_build_state(struct net *net, struct nlattr *nla,
166*4882a593Smuzhiyun unsigned int family, const void *cfg,
167*4882a593Smuzhiyun struct lwtunnel_state **ts,
168*4882a593Smuzhiyun struct netlink_ext_ack *extack)
169*4882a593Smuzhiyun {
170*4882a593Smuzhiyun struct mpls_iptunnel_encap *tun_encap_info;
171*4882a593Smuzhiyun struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
172*4882a593Smuzhiyun struct lwtunnel_state *newts;
173*4882a593Smuzhiyun u8 n_labels;
174*4882a593Smuzhiyun int ret;
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun ret = nla_parse_nested_deprecated(tb, MPLS_IPTUNNEL_MAX, nla,
177*4882a593Smuzhiyun mpls_iptunnel_policy, extack);
178*4882a593Smuzhiyun if (ret < 0)
179*4882a593Smuzhiyun return ret;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun if (!tb[MPLS_IPTUNNEL_DST]) {
182*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "MPLS_IPTUNNEL_DST attribute is missing");
183*4882a593Smuzhiyun return -EINVAL;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun /* determine number of labels */
187*4882a593Smuzhiyun if (nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
188*4882a593Smuzhiyun &n_labels, NULL, extack))
189*4882a593Smuzhiyun return -EINVAL;
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label,
192*4882a593Smuzhiyun n_labels));
193*4882a593Smuzhiyun if (!newts)
194*4882a593Smuzhiyun return -ENOMEM;
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun tun_encap_info = mpls_lwtunnel_encap(newts);
197*4882a593Smuzhiyun ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], n_labels,
198*4882a593Smuzhiyun &tun_encap_info->labels, tun_encap_info->label,
199*4882a593Smuzhiyun extack);
200*4882a593Smuzhiyun if (ret)
201*4882a593Smuzhiyun goto errout;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT;
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun if (tb[MPLS_IPTUNNEL_TTL]) {
206*4882a593Smuzhiyun tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]);
207*4882a593Smuzhiyun /* TTL 0 implies propagate from IP header */
208*4882a593Smuzhiyun tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ?
209*4882a593Smuzhiyun MPLS_TTL_PROP_DISABLED :
210*4882a593Smuzhiyun MPLS_TTL_PROP_ENABLED;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun newts->type = LWTUNNEL_ENCAP_MPLS;
214*4882a593Smuzhiyun newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
215*4882a593Smuzhiyun newts->headroom = mpls_encap_size(tun_encap_info);
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun *ts = newts;
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun return 0;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun errout:
222*4882a593Smuzhiyun kfree(newts);
223*4882a593Smuzhiyun *ts = NULL;
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun return ret;
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
mpls_fill_encap_info(struct sk_buff * skb,struct lwtunnel_state * lwtstate)228*4882a593Smuzhiyun static int mpls_fill_encap_info(struct sk_buff *skb,
229*4882a593Smuzhiyun struct lwtunnel_state *lwtstate)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun struct mpls_iptunnel_encap *tun_encap_info;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun tun_encap_info = mpls_lwtunnel_encap(lwtstate);
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun if (nla_put_labels(skb, MPLS_IPTUNNEL_DST, tun_encap_info->labels,
236*4882a593Smuzhiyun tun_encap_info->label))
237*4882a593Smuzhiyun goto nla_put_failure;
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT &&
240*4882a593Smuzhiyun nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl))
241*4882a593Smuzhiyun goto nla_put_failure;
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun return 0;
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun nla_put_failure:
246*4882a593Smuzhiyun return -EMSGSIZE;
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun
mpls_encap_nlsize(struct lwtunnel_state * lwtstate)249*4882a593Smuzhiyun static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
250*4882a593Smuzhiyun {
251*4882a593Smuzhiyun struct mpls_iptunnel_encap *tun_encap_info;
252*4882a593Smuzhiyun int nlsize;
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun tun_encap_info = mpls_lwtunnel_encap(lwtstate);
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun nlsize = nla_total_size(tun_encap_info->labels * 4);
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT)
259*4882a593Smuzhiyun nlsize += nla_total_size(1);
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun return nlsize;
262*4882a593Smuzhiyun }
263*4882a593Smuzhiyun
mpls_encap_cmp(struct lwtunnel_state * a,struct lwtunnel_state * b)264*4882a593Smuzhiyun static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
265*4882a593Smuzhiyun {
266*4882a593Smuzhiyun struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a);
267*4882a593Smuzhiyun struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
268*4882a593Smuzhiyun int l;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun if (a_hdr->labels != b_hdr->labels ||
271*4882a593Smuzhiyun a_hdr->ttl_propagate != b_hdr->ttl_propagate ||
272*4882a593Smuzhiyun a_hdr->default_ttl != b_hdr->default_ttl)
273*4882a593Smuzhiyun return 1;
274*4882a593Smuzhiyun
275*4882a593Smuzhiyun for (l = 0; l < a_hdr->labels; l++)
276*4882a593Smuzhiyun if (a_hdr->label[l] != b_hdr->label[l])
277*4882a593Smuzhiyun return 1;
278*4882a593Smuzhiyun return 0;
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun static const struct lwtunnel_encap_ops mpls_iptun_ops = {
282*4882a593Smuzhiyun .build_state = mpls_build_state,
283*4882a593Smuzhiyun .xmit = mpls_xmit,
284*4882a593Smuzhiyun .fill_encap = mpls_fill_encap_info,
285*4882a593Smuzhiyun .get_encap_size = mpls_encap_nlsize,
286*4882a593Smuzhiyun .cmp_encap = mpls_encap_cmp,
287*4882a593Smuzhiyun .owner = THIS_MODULE,
288*4882a593Smuzhiyun };
289*4882a593Smuzhiyun
mpls_iptunnel_init(void)290*4882a593Smuzhiyun static int __init mpls_iptunnel_init(void)
291*4882a593Smuzhiyun {
292*4882a593Smuzhiyun return lwtunnel_encap_add_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS);
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun module_init(mpls_iptunnel_init);
295*4882a593Smuzhiyun
mpls_iptunnel_exit(void)296*4882a593Smuzhiyun static void __exit mpls_iptunnel_exit(void)
297*4882a593Smuzhiyun {
298*4882a593Smuzhiyun lwtunnel_encap_del_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS);
299*4882a593Smuzhiyun }
300*4882a593Smuzhiyun module_exit(mpls_iptunnel_exit);
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun MODULE_ALIAS_RTNL_LWT(MPLS);
303*4882a593Smuzhiyun MODULE_SOFTDEP("post: mpls_gso");
304*4882a593Smuzhiyun MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels");
305*4882a593Smuzhiyun MODULE_LICENSE("GPL v2");
306