1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Linux INET6 implementation
4*4882a593Smuzhiyun * FIB front-end.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Authors:
7*4882a593Smuzhiyun * Pedro Roque <roque@di.fc.ul.pt>
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun /* Changes:
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * YOSHIFUJI Hideaki @USAGI
13*4882a593Smuzhiyun * reworked default router selection.
14*4882a593Smuzhiyun * - respect outgoing interface
15*4882a593Smuzhiyun * - select from (probably) reachable routers (i.e.
16*4882a593Smuzhiyun * routers in REACHABLE, STALE, DELAY or PROBE states).
17*4882a593Smuzhiyun * - always select the same router if it is (probably)
18*4882a593Smuzhiyun * reachable. otherwise, round-robin the list.
19*4882a593Smuzhiyun * Ville Nuorvala
20*4882a593Smuzhiyun * Fixed routing subtrees.
21*4882a593Smuzhiyun */
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun #define pr_fmt(fmt) "IPv6: " fmt
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun #include <linux/capability.h>
26*4882a593Smuzhiyun #include <linux/errno.h>
27*4882a593Smuzhiyun #include <linux/export.h>
28*4882a593Smuzhiyun #include <linux/types.h>
29*4882a593Smuzhiyun #include <linux/times.h>
30*4882a593Smuzhiyun #include <linux/socket.h>
31*4882a593Smuzhiyun #include <linux/sockios.h>
32*4882a593Smuzhiyun #include <linux/net.h>
33*4882a593Smuzhiyun #include <linux/route.h>
34*4882a593Smuzhiyun #include <linux/netdevice.h>
35*4882a593Smuzhiyun #include <linux/in6.h>
36*4882a593Smuzhiyun #include <linux/mroute6.h>
37*4882a593Smuzhiyun #include <linux/init.h>
38*4882a593Smuzhiyun #include <linux/if_arp.h>
39*4882a593Smuzhiyun #include <linux/proc_fs.h>
40*4882a593Smuzhiyun #include <linux/seq_file.h>
41*4882a593Smuzhiyun #include <linux/nsproxy.h>
42*4882a593Smuzhiyun #include <linux/slab.h>
43*4882a593Smuzhiyun #include <linux/jhash.h>
44*4882a593Smuzhiyun #include <linux/siphash.h>
45*4882a593Smuzhiyun #include <net/net_namespace.h>
46*4882a593Smuzhiyun #include <net/snmp.h>
47*4882a593Smuzhiyun #include <net/ipv6.h>
48*4882a593Smuzhiyun #include <net/ip6_fib.h>
49*4882a593Smuzhiyun #include <net/ip6_route.h>
50*4882a593Smuzhiyun #include <net/ndisc.h>
51*4882a593Smuzhiyun #include <net/addrconf.h>
52*4882a593Smuzhiyun #include <net/tcp.h>
53*4882a593Smuzhiyun #include <linux/rtnetlink.h>
54*4882a593Smuzhiyun #include <net/dst.h>
55*4882a593Smuzhiyun #include <net/dst_metadata.h>
56*4882a593Smuzhiyun #include <net/xfrm.h>
57*4882a593Smuzhiyun #include <net/netevent.h>
58*4882a593Smuzhiyun #include <net/netlink.h>
59*4882a593Smuzhiyun #include <net/rtnh.h>
60*4882a593Smuzhiyun #include <net/lwtunnel.h>
61*4882a593Smuzhiyun #include <net/ip_tunnels.h>
62*4882a593Smuzhiyun #include <net/l3mdev.h>
63*4882a593Smuzhiyun #include <net/ip.h>
64*4882a593Smuzhiyun #include <linux/uaccess.h>
65*4882a593Smuzhiyun #include <linux/btf_ids.h>
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun #ifdef CONFIG_SYSCTL
68*4882a593Smuzhiyun #include <linux/sysctl.h>
69*4882a593Smuzhiyun #endif
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun static int ip6_rt_type_to_error(u8 fib6_type);
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
74*4882a593Smuzhiyun #include <trace/events/fib6.h>
75*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
76*4882a593Smuzhiyun #undef CREATE_TRACE_POINTS
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun enum rt6_nud_state {
79*4882a593Smuzhiyun RT6_NUD_FAIL_HARD = -3,
80*4882a593Smuzhiyun RT6_NUD_FAIL_PROBE = -2,
81*4882a593Smuzhiyun RT6_NUD_FAIL_DO_RR = -1,
82*4882a593Smuzhiyun RT6_NUD_SUCCEED = 1
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86*4882a593Smuzhiyun static unsigned int ip6_default_advmss(const struct dst_entry *dst);
87*4882a593Smuzhiyun static unsigned int ip6_mtu(const struct dst_entry *dst);
88*4882a593Smuzhiyun static struct dst_entry *ip6_negative_advice(struct dst_entry *);
89*4882a593Smuzhiyun static void ip6_dst_destroy(struct dst_entry *);
90*4882a593Smuzhiyun static void ip6_dst_ifdown(struct dst_entry *,
91*4882a593Smuzhiyun struct net_device *dev, int how);
92*4882a593Smuzhiyun static int ip6_dst_gc(struct dst_ops *ops);
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun static int ip6_pkt_discard(struct sk_buff *skb);
95*4882a593Smuzhiyun static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
96*4882a593Smuzhiyun static int ip6_pkt_prohibit(struct sk_buff *skb);
97*4882a593Smuzhiyun static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
98*4882a593Smuzhiyun static void ip6_link_failure(struct sk_buff *skb);
99*4882a593Smuzhiyun static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
100*4882a593Smuzhiyun struct sk_buff *skb, u32 mtu,
101*4882a593Smuzhiyun bool confirm_neigh);
102*4882a593Smuzhiyun static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
103*4882a593Smuzhiyun struct sk_buff *skb);
104*4882a593Smuzhiyun static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
105*4882a593Smuzhiyun int strict);
106*4882a593Smuzhiyun static size_t rt6_nlmsg_size(struct fib6_info *f6i);
107*4882a593Smuzhiyun static int rt6_fill_node(struct net *net, struct sk_buff *skb,
108*4882a593Smuzhiyun struct fib6_info *rt, struct dst_entry *dst,
109*4882a593Smuzhiyun struct in6_addr *dest, struct in6_addr *src,
110*4882a593Smuzhiyun int iif, int type, u32 portid, u32 seq,
111*4882a593Smuzhiyun unsigned int flags);
112*4882a593Smuzhiyun static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
113*4882a593Smuzhiyun const struct in6_addr *daddr,
114*4882a593Smuzhiyun const struct in6_addr *saddr);
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun #ifdef CONFIG_IPV6_ROUTE_INFO
117*4882a593Smuzhiyun static struct fib6_info *rt6_add_route_info(struct net *net,
118*4882a593Smuzhiyun const struct in6_addr *prefix, int prefixlen,
119*4882a593Smuzhiyun const struct in6_addr *gwaddr,
120*4882a593Smuzhiyun struct net_device *dev,
121*4882a593Smuzhiyun unsigned int pref);
122*4882a593Smuzhiyun static struct fib6_info *rt6_get_route_info(struct net *net,
123*4882a593Smuzhiyun const struct in6_addr *prefix, int prefixlen,
124*4882a593Smuzhiyun const struct in6_addr *gwaddr,
125*4882a593Smuzhiyun struct net_device *dev);
126*4882a593Smuzhiyun #endif
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun struct uncached_list {
129*4882a593Smuzhiyun spinlock_t lock;
130*4882a593Smuzhiyun struct list_head head;
131*4882a593Smuzhiyun };
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134*4882a593Smuzhiyun
rt6_uncached_list_add(struct rt6_info * rt)135*4882a593Smuzhiyun void rt6_uncached_list_add(struct rt6_info *rt)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun rt->rt6i_uncached_list = ul;
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun spin_lock_bh(&ul->lock);
142*4882a593Smuzhiyun list_add_tail(&rt->rt6i_uncached, &ul->head);
143*4882a593Smuzhiyun spin_unlock_bh(&ul->lock);
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
rt6_uncached_list_del(struct rt6_info * rt)146*4882a593Smuzhiyun void rt6_uncached_list_del(struct rt6_info *rt)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun if (!list_empty(&rt->rt6i_uncached)) {
149*4882a593Smuzhiyun struct uncached_list *ul = rt->rt6i_uncached_list;
150*4882a593Smuzhiyun struct net *net = dev_net(rt->dst.dev);
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun spin_lock_bh(&ul->lock);
153*4882a593Smuzhiyun list_del(&rt->rt6i_uncached);
154*4882a593Smuzhiyun atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
155*4882a593Smuzhiyun spin_unlock_bh(&ul->lock);
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
rt6_uncached_list_flush_dev(struct net * net,struct net_device * dev)159*4882a593Smuzhiyun static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160*4882a593Smuzhiyun {
161*4882a593Smuzhiyun struct net_device *loopback_dev = net->loopback_dev;
162*4882a593Smuzhiyun int cpu;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun if (dev == loopback_dev)
165*4882a593Smuzhiyun return;
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
168*4882a593Smuzhiyun struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169*4882a593Smuzhiyun struct rt6_info *rt;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun spin_lock_bh(&ul->lock);
172*4882a593Smuzhiyun list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173*4882a593Smuzhiyun struct inet6_dev *rt_idev = rt->rt6i_idev;
174*4882a593Smuzhiyun struct net_device *rt_dev = rt->dst.dev;
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun if (rt_idev->dev == dev) {
177*4882a593Smuzhiyun rt->rt6i_idev = in6_dev_get(loopback_dev);
178*4882a593Smuzhiyun in6_dev_put(rt_idev);
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun if (rt_dev == dev) {
182*4882a593Smuzhiyun rt->dst.dev = blackhole_netdev;
183*4882a593Smuzhiyun dev_hold(rt->dst.dev);
184*4882a593Smuzhiyun dev_put(rt_dev);
185*4882a593Smuzhiyun }
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun spin_unlock_bh(&ul->lock);
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun
choose_neigh_daddr(const struct in6_addr * p,struct sk_buff * skb,const void * daddr)191*4882a593Smuzhiyun static inline const void *choose_neigh_daddr(const struct in6_addr *p,
192*4882a593Smuzhiyun struct sk_buff *skb,
193*4882a593Smuzhiyun const void *daddr)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun if (!ipv6_addr_any(p))
196*4882a593Smuzhiyun return (const void *) p;
197*4882a593Smuzhiyun else if (skb)
198*4882a593Smuzhiyun return &ipv6_hdr(skb)->daddr;
199*4882a593Smuzhiyun return daddr;
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun
ip6_neigh_lookup(const struct in6_addr * gw,struct net_device * dev,struct sk_buff * skb,const void * daddr)202*4882a593Smuzhiyun struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203*4882a593Smuzhiyun struct net_device *dev,
204*4882a593Smuzhiyun struct sk_buff *skb,
205*4882a593Smuzhiyun const void *daddr)
206*4882a593Smuzhiyun {
207*4882a593Smuzhiyun struct neighbour *n;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun daddr = choose_neigh_daddr(gw, skb, daddr);
210*4882a593Smuzhiyun n = __ipv6_neigh_lookup(dev, daddr);
211*4882a593Smuzhiyun if (n)
212*4882a593Smuzhiyun return n;
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun n = neigh_create(&nd_tbl, daddr, dev);
215*4882a593Smuzhiyun return IS_ERR(n) ? NULL : n;
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun
ip6_dst_neigh_lookup(const struct dst_entry * dst,struct sk_buff * skb,const void * daddr)218*4882a593Smuzhiyun static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
219*4882a593Smuzhiyun struct sk_buff *skb,
220*4882a593Smuzhiyun const void *daddr)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
225*4882a593Smuzhiyun dst->dev, skb, daddr);
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
ip6_confirm_neigh(const struct dst_entry * dst,const void * daddr)228*4882a593Smuzhiyun static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun struct net_device *dev = dst->dev;
231*4882a593Smuzhiyun struct rt6_info *rt = (struct rt6_info *)dst;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
234*4882a593Smuzhiyun if (!daddr)
235*4882a593Smuzhiyun return;
236*4882a593Smuzhiyun if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
237*4882a593Smuzhiyun return;
238*4882a593Smuzhiyun if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
239*4882a593Smuzhiyun return;
240*4882a593Smuzhiyun __ipv6_confirm_neigh(dev, daddr);
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun static struct dst_ops ip6_dst_ops_template = {
244*4882a593Smuzhiyun .family = AF_INET6,
245*4882a593Smuzhiyun .gc = ip6_dst_gc,
246*4882a593Smuzhiyun .gc_thresh = 1024,
247*4882a593Smuzhiyun .check = ip6_dst_check,
248*4882a593Smuzhiyun .default_advmss = ip6_default_advmss,
249*4882a593Smuzhiyun .mtu = ip6_mtu,
250*4882a593Smuzhiyun .cow_metrics = dst_cow_metrics_generic,
251*4882a593Smuzhiyun .destroy = ip6_dst_destroy,
252*4882a593Smuzhiyun .ifdown = ip6_dst_ifdown,
253*4882a593Smuzhiyun .negative_advice = ip6_negative_advice,
254*4882a593Smuzhiyun .link_failure = ip6_link_failure,
255*4882a593Smuzhiyun .update_pmtu = ip6_rt_update_pmtu,
256*4882a593Smuzhiyun .redirect = rt6_do_redirect,
257*4882a593Smuzhiyun .local_out = __ip6_local_out,
258*4882a593Smuzhiyun .neigh_lookup = ip6_dst_neigh_lookup,
259*4882a593Smuzhiyun .confirm_neigh = ip6_confirm_neigh,
260*4882a593Smuzhiyun };
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun static struct dst_ops ip6_dst_blackhole_ops = {
263*4882a593Smuzhiyun .family = AF_INET6,
264*4882a593Smuzhiyun .default_advmss = ip6_default_advmss,
265*4882a593Smuzhiyun .neigh_lookup = ip6_dst_neigh_lookup,
266*4882a593Smuzhiyun .check = ip6_dst_check,
267*4882a593Smuzhiyun .destroy = ip6_dst_destroy,
268*4882a593Smuzhiyun .cow_metrics = dst_cow_metrics_generic,
269*4882a593Smuzhiyun .update_pmtu = dst_blackhole_update_pmtu,
270*4882a593Smuzhiyun .redirect = dst_blackhole_redirect,
271*4882a593Smuzhiyun .mtu = dst_blackhole_mtu,
272*4882a593Smuzhiyun };
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun static const u32 ip6_template_metrics[RTAX_MAX] = {
275*4882a593Smuzhiyun [RTAX_HOPLIMIT - 1] = 0,
276*4882a593Smuzhiyun };
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun static const struct fib6_info fib6_null_entry_template = {
279*4882a593Smuzhiyun .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
280*4882a593Smuzhiyun .fib6_protocol = RTPROT_KERNEL,
281*4882a593Smuzhiyun .fib6_metric = ~(u32)0,
282*4882a593Smuzhiyun .fib6_ref = REFCOUNT_INIT(1),
283*4882a593Smuzhiyun .fib6_type = RTN_UNREACHABLE,
284*4882a593Smuzhiyun .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
285*4882a593Smuzhiyun };
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun static const struct rt6_info ip6_null_entry_template = {
288*4882a593Smuzhiyun .dst = {
289*4882a593Smuzhiyun .__refcnt = ATOMIC_INIT(1),
290*4882a593Smuzhiyun .__use = 1,
291*4882a593Smuzhiyun .obsolete = DST_OBSOLETE_FORCE_CHK,
292*4882a593Smuzhiyun .error = -ENETUNREACH,
293*4882a593Smuzhiyun .input = ip6_pkt_discard,
294*4882a593Smuzhiyun .output = ip6_pkt_discard_out,
295*4882a593Smuzhiyun },
296*4882a593Smuzhiyun .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
297*4882a593Smuzhiyun };
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun static const struct rt6_info ip6_prohibit_entry_template = {
302*4882a593Smuzhiyun .dst = {
303*4882a593Smuzhiyun .__refcnt = ATOMIC_INIT(1),
304*4882a593Smuzhiyun .__use = 1,
305*4882a593Smuzhiyun .obsolete = DST_OBSOLETE_FORCE_CHK,
306*4882a593Smuzhiyun .error = -EACCES,
307*4882a593Smuzhiyun .input = ip6_pkt_prohibit,
308*4882a593Smuzhiyun .output = ip6_pkt_prohibit_out,
309*4882a593Smuzhiyun },
310*4882a593Smuzhiyun .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
311*4882a593Smuzhiyun };
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun static const struct rt6_info ip6_blk_hole_entry_template = {
314*4882a593Smuzhiyun .dst = {
315*4882a593Smuzhiyun .__refcnt = ATOMIC_INIT(1),
316*4882a593Smuzhiyun .__use = 1,
317*4882a593Smuzhiyun .obsolete = DST_OBSOLETE_FORCE_CHK,
318*4882a593Smuzhiyun .error = -EINVAL,
319*4882a593Smuzhiyun .input = dst_discard,
320*4882a593Smuzhiyun .output = dst_discard_out,
321*4882a593Smuzhiyun },
322*4882a593Smuzhiyun .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
323*4882a593Smuzhiyun };
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun #endif
326*4882a593Smuzhiyun
rt6_info_init(struct rt6_info * rt)327*4882a593Smuzhiyun static void rt6_info_init(struct rt6_info *rt)
328*4882a593Smuzhiyun {
329*4882a593Smuzhiyun struct dst_entry *dst = &rt->dst;
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
332*4882a593Smuzhiyun INIT_LIST_HEAD(&rt->rt6i_uncached);
333*4882a593Smuzhiyun }
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct net * net,struct net_device * dev,int flags)336*4882a593Smuzhiyun struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
337*4882a593Smuzhiyun int flags)
338*4882a593Smuzhiyun {
339*4882a593Smuzhiyun struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
340*4882a593Smuzhiyun 1, DST_OBSOLETE_FORCE_CHK, flags);
341*4882a593Smuzhiyun
342*4882a593Smuzhiyun if (rt) {
343*4882a593Smuzhiyun rt6_info_init(rt);
344*4882a593Smuzhiyun atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun return rt;
348*4882a593Smuzhiyun }
349*4882a593Smuzhiyun EXPORT_SYMBOL(ip6_dst_alloc);
350*4882a593Smuzhiyun
ip6_dst_destroy(struct dst_entry * dst)351*4882a593Smuzhiyun static void ip6_dst_destroy(struct dst_entry *dst)
352*4882a593Smuzhiyun {
353*4882a593Smuzhiyun struct rt6_info *rt = (struct rt6_info *)dst;
354*4882a593Smuzhiyun struct fib6_info *from;
355*4882a593Smuzhiyun struct inet6_dev *idev;
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun ip_dst_metrics_put(dst);
358*4882a593Smuzhiyun rt6_uncached_list_del(rt);
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun idev = rt->rt6i_idev;
361*4882a593Smuzhiyun if (idev) {
362*4882a593Smuzhiyun rt->rt6i_idev = NULL;
363*4882a593Smuzhiyun in6_dev_put(idev);
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun from = xchg((__force struct fib6_info **)&rt->from, NULL);
367*4882a593Smuzhiyun fib6_info_release(from);
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)370*4882a593Smuzhiyun static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
371*4882a593Smuzhiyun int how)
372*4882a593Smuzhiyun {
373*4882a593Smuzhiyun struct rt6_info *rt = (struct rt6_info *)dst;
374*4882a593Smuzhiyun struct inet6_dev *idev = rt->rt6i_idev;
375*4882a593Smuzhiyun struct net_device *loopback_dev =
376*4882a593Smuzhiyun dev_net(dev)->loopback_dev;
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun if (idev && idev->dev != loopback_dev) {
379*4882a593Smuzhiyun struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
380*4882a593Smuzhiyun if (loopback_idev) {
381*4882a593Smuzhiyun rt->rt6i_idev = loopback_idev;
382*4882a593Smuzhiyun in6_dev_put(idev);
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun }
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun
__rt6_check_expired(const struct rt6_info * rt)387*4882a593Smuzhiyun static bool __rt6_check_expired(const struct rt6_info *rt)
388*4882a593Smuzhiyun {
389*4882a593Smuzhiyun if (rt->rt6i_flags & RTF_EXPIRES)
390*4882a593Smuzhiyun return time_after(jiffies, rt->dst.expires);
391*4882a593Smuzhiyun else
392*4882a593Smuzhiyun return false;
393*4882a593Smuzhiyun }
394*4882a593Smuzhiyun
rt6_check_expired(const struct rt6_info * rt)395*4882a593Smuzhiyun static bool rt6_check_expired(const struct rt6_info *rt)
396*4882a593Smuzhiyun {
397*4882a593Smuzhiyun struct fib6_info *from;
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun from = rcu_dereference(rt->from);
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun if (rt->rt6i_flags & RTF_EXPIRES) {
402*4882a593Smuzhiyun if (time_after(jiffies, rt->dst.expires))
403*4882a593Smuzhiyun return true;
404*4882a593Smuzhiyun } else if (from) {
405*4882a593Smuzhiyun return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
406*4882a593Smuzhiyun fib6_check_expired(from);
407*4882a593Smuzhiyun }
408*4882a593Smuzhiyun return false;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
fib6_select_path(const struct net * net,struct fib6_result * res,struct flowi6 * fl6,int oif,bool have_oif_match,const struct sk_buff * skb,int strict)411*4882a593Smuzhiyun void fib6_select_path(const struct net *net, struct fib6_result *res,
412*4882a593Smuzhiyun struct flowi6 *fl6, int oif, bool have_oif_match,
413*4882a593Smuzhiyun const struct sk_buff *skb, int strict)
414*4882a593Smuzhiyun {
415*4882a593Smuzhiyun struct fib6_info *sibling, *next_sibling;
416*4882a593Smuzhiyun struct fib6_info *match = res->f6i;
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
419*4882a593Smuzhiyun goto out;
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun if (match->nh && have_oif_match && res->nh)
422*4882a593Smuzhiyun return;
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun /* We might have already computed the hash for ICMPv6 errors. In such
425*4882a593Smuzhiyun * case it will always be non-zero. Otherwise now is the time to do it.
426*4882a593Smuzhiyun */
427*4882a593Smuzhiyun if (!fl6->mp_hash &&
428*4882a593Smuzhiyun (!match->nh || nexthop_is_multipath(match->nh)))
429*4882a593Smuzhiyun fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun if (unlikely(match->nh)) {
432*4882a593Smuzhiyun nexthop_path_fib6_result(res, fl6->mp_hash);
433*4882a593Smuzhiyun return;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
437*4882a593Smuzhiyun goto out;
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
440*4882a593Smuzhiyun fib6_siblings) {
441*4882a593Smuzhiyun const struct fib6_nh *nh = sibling->fib6_nh;
442*4882a593Smuzhiyun int nh_upper_bound;
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
445*4882a593Smuzhiyun if (fl6->mp_hash > nh_upper_bound)
446*4882a593Smuzhiyun continue;
447*4882a593Smuzhiyun if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
448*4882a593Smuzhiyun break;
449*4882a593Smuzhiyun match = sibling;
450*4882a593Smuzhiyun break;
451*4882a593Smuzhiyun }
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun out:
454*4882a593Smuzhiyun res->f6i = match;
455*4882a593Smuzhiyun res->nh = match->fib6_nh;
456*4882a593Smuzhiyun }
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun /*
459*4882a593Smuzhiyun * Route lookup. rcu_read_lock() should be held.
460*4882a593Smuzhiyun */
461*4882a593Smuzhiyun
__rt6_device_match(struct net * net,const struct fib6_nh * nh,const struct in6_addr * saddr,int oif,int flags)462*4882a593Smuzhiyun static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
463*4882a593Smuzhiyun const struct in6_addr *saddr, int oif, int flags)
464*4882a593Smuzhiyun {
465*4882a593Smuzhiyun const struct net_device *dev;
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun if (nh->fib_nh_flags & RTNH_F_DEAD)
468*4882a593Smuzhiyun return false;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun dev = nh->fib_nh_dev;
471*4882a593Smuzhiyun if (oif) {
472*4882a593Smuzhiyun if (dev->ifindex == oif)
473*4882a593Smuzhiyun return true;
474*4882a593Smuzhiyun } else {
475*4882a593Smuzhiyun if (ipv6_chk_addr(net, saddr, dev,
476*4882a593Smuzhiyun flags & RT6_LOOKUP_F_IFACE))
477*4882a593Smuzhiyun return true;
478*4882a593Smuzhiyun }
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun return false;
481*4882a593Smuzhiyun }
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun struct fib6_nh_dm_arg {
484*4882a593Smuzhiyun struct net *net;
485*4882a593Smuzhiyun const struct in6_addr *saddr;
486*4882a593Smuzhiyun int oif;
487*4882a593Smuzhiyun int flags;
488*4882a593Smuzhiyun struct fib6_nh *nh;
489*4882a593Smuzhiyun };
490*4882a593Smuzhiyun
__rt6_nh_dev_match(struct fib6_nh * nh,void * _arg)491*4882a593Smuzhiyun static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
492*4882a593Smuzhiyun {
493*4882a593Smuzhiyun struct fib6_nh_dm_arg *arg = _arg;
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun arg->nh = nh;
496*4882a593Smuzhiyun return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
497*4882a593Smuzhiyun arg->flags);
498*4882a593Smuzhiyun }
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun /* returns fib6_nh from nexthop or NULL */
rt6_nh_dev_match(struct net * net,struct nexthop * nh,struct fib6_result * res,const struct in6_addr * saddr,int oif,int flags)501*4882a593Smuzhiyun static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
502*4882a593Smuzhiyun struct fib6_result *res,
503*4882a593Smuzhiyun const struct in6_addr *saddr,
504*4882a593Smuzhiyun int oif, int flags)
505*4882a593Smuzhiyun {
506*4882a593Smuzhiyun struct fib6_nh_dm_arg arg = {
507*4882a593Smuzhiyun .net = net,
508*4882a593Smuzhiyun .saddr = saddr,
509*4882a593Smuzhiyun .oif = oif,
510*4882a593Smuzhiyun .flags = flags,
511*4882a593Smuzhiyun };
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun if (nexthop_is_blackhole(nh))
514*4882a593Smuzhiyun return NULL;
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
517*4882a593Smuzhiyun return arg.nh;
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun return NULL;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun
rt6_device_match(struct net * net,struct fib6_result * res,const struct in6_addr * saddr,int oif,int flags)522*4882a593Smuzhiyun static void rt6_device_match(struct net *net, struct fib6_result *res,
523*4882a593Smuzhiyun const struct in6_addr *saddr, int oif, int flags)
524*4882a593Smuzhiyun {
525*4882a593Smuzhiyun struct fib6_info *f6i = res->f6i;
526*4882a593Smuzhiyun struct fib6_info *spf6i;
527*4882a593Smuzhiyun struct fib6_nh *nh;
528*4882a593Smuzhiyun
529*4882a593Smuzhiyun if (!oif && ipv6_addr_any(saddr)) {
530*4882a593Smuzhiyun if (unlikely(f6i->nh)) {
531*4882a593Smuzhiyun nh = nexthop_fib6_nh(f6i->nh);
532*4882a593Smuzhiyun if (nexthop_is_blackhole(f6i->nh))
533*4882a593Smuzhiyun goto out_blackhole;
534*4882a593Smuzhiyun } else {
535*4882a593Smuzhiyun nh = f6i->fib6_nh;
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun if (!(nh->fib_nh_flags & RTNH_F_DEAD))
538*4882a593Smuzhiyun goto out;
539*4882a593Smuzhiyun }
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
542*4882a593Smuzhiyun bool matched = false;
543*4882a593Smuzhiyun
544*4882a593Smuzhiyun if (unlikely(spf6i->nh)) {
545*4882a593Smuzhiyun nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
546*4882a593Smuzhiyun oif, flags);
547*4882a593Smuzhiyun if (nh)
548*4882a593Smuzhiyun matched = true;
549*4882a593Smuzhiyun } else {
550*4882a593Smuzhiyun nh = spf6i->fib6_nh;
551*4882a593Smuzhiyun if (__rt6_device_match(net, nh, saddr, oif, flags))
552*4882a593Smuzhiyun matched = true;
553*4882a593Smuzhiyun }
554*4882a593Smuzhiyun if (matched) {
555*4882a593Smuzhiyun res->f6i = spf6i;
556*4882a593Smuzhiyun goto out;
557*4882a593Smuzhiyun }
558*4882a593Smuzhiyun }
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun if (oif && flags & RT6_LOOKUP_F_IFACE) {
561*4882a593Smuzhiyun res->f6i = net->ipv6.fib6_null_entry;
562*4882a593Smuzhiyun nh = res->f6i->fib6_nh;
563*4882a593Smuzhiyun goto out;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun if (unlikely(f6i->nh)) {
567*4882a593Smuzhiyun nh = nexthop_fib6_nh(f6i->nh);
568*4882a593Smuzhiyun if (nexthop_is_blackhole(f6i->nh))
569*4882a593Smuzhiyun goto out_blackhole;
570*4882a593Smuzhiyun } else {
571*4882a593Smuzhiyun nh = f6i->fib6_nh;
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun if (nh->fib_nh_flags & RTNH_F_DEAD) {
575*4882a593Smuzhiyun res->f6i = net->ipv6.fib6_null_entry;
576*4882a593Smuzhiyun nh = res->f6i->fib6_nh;
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun out:
579*4882a593Smuzhiyun res->nh = nh;
580*4882a593Smuzhiyun res->fib6_type = res->f6i->fib6_type;
581*4882a593Smuzhiyun res->fib6_flags = res->f6i->fib6_flags;
582*4882a593Smuzhiyun return;
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun out_blackhole:
585*4882a593Smuzhiyun res->fib6_flags |= RTF_REJECT;
586*4882a593Smuzhiyun res->fib6_type = RTN_BLACKHOLE;
587*4882a593Smuzhiyun res->nh = nh;
588*4882a593Smuzhiyun }
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun #ifdef CONFIG_IPV6_ROUTER_PREF
591*4882a593Smuzhiyun struct __rt6_probe_work {
592*4882a593Smuzhiyun struct work_struct work;
593*4882a593Smuzhiyun struct in6_addr target;
594*4882a593Smuzhiyun struct net_device *dev;
595*4882a593Smuzhiyun };
596*4882a593Smuzhiyun
rt6_probe_deferred(struct work_struct * w)597*4882a593Smuzhiyun static void rt6_probe_deferred(struct work_struct *w)
598*4882a593Smuzhiyun {
599*4882a593Smuzhiyun struct in6_addr mcaddr;
600*4882a593Smuzhiyun struct __rt6_probe_work *work =
601*4882a593Smuzhiyun container_of(w, struct __rt6_probe_work, work);
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun addrconf_addr_solict_mult(&work->target, &mcaddr);
604*4882a593Smuzhiyun ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
605*4882a593Smuzhiyun dev_put(work->dev);
606*4882a593Smuzhiyun kfree(work);
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun
rt6_probe(struct fib6_nh * fib6_nh)609*4882a593Smuzhiyun static void rt6_probe(struct fib6_nh *fib6_nh)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun struct __rt6_probe_work *work = NULL;
612*4882a593Smuzhiyun const struct in6_addr *nh_gw;
613*4882a593Smuzhiyun unsigned long last_probe;
614*4882a593Smuzhiyun struct neighbour *neigh;
615*4882a593Smuzhiyun struct net_device *dev;
616*4882a593Smuzhiyun struct inet6_dev *idev;
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun /*
619*4882a593Smuzhiyun * Okay, this does not seem to be appropriate
620*4882a593Smuzhiyun * for now, however, we need to check if it
621*4882a593Smuzhiyun * is really so; aka Router Reachability Probing.
622*4882a593Smuzhiyun *
623*4882a593Smuzhiyun * Router Reachability Probe MUST be rate-limited
624*4882a593Smuzhiyun * to no more than one per minute.
625*4882a593Smuzhiyun */
626*4882a593Smuzhiyun if (!fib6_nh->fib_nh_gw_family)
627*4882a593Smuzhiyun return;
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun nh_gw = &fib6_nh->fib_nh_gw6;
630*4882a593Smuzhiyun dev = fib6_nh->fib_nh_dev;
631*4882a593Smuzhiyun rcu_read_lock_bh();
632*4882a593Smuzhiyun last_probe = READ_ONCE(fib6_nh->last_probe);
633*4882a593Smuzhiyun idev = __in6_dev_get(dev);
634*4882a593Smuzhiyun neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
635*4882a593Smuzhiyun if (neigh) {
636*4882a593Smuzhiyun if (neigh->nud_state & NUD_VALID)
637*4882a593Smuzhiyun goto out;
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun write_lock(&neigh->lock);
640*4882a593Smuzhiyun if (!(neigh->nud_state & NUD_VALID) &&
641*4882a593Smuzhiyun time_after(jiffies,
642*4882a593Smuzhiyun neigh->updated + idev->cnf.rtr_probe_interval)) {
643*4882a593Smuzhiyun work = kmalloc(sizeof(*work), GFP_ATOMIC);
644*4882a593Smuzhiyun if (work)
645*4882a593Smuzhiyun __neigh_set_probe_once(neigh);
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun write_unlock(&neigh->lock);
648*4882a593Smuzhiyun } else if (time_after(jiffies, last_probe +
649*4882a593Smuzhiyun idev->cnf.rtr_probe_interval)) {
650*4882a593Smuzhiyun work = kmalloc(sizeof(*work), GFP_ATOMIC);
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun
653*4882a593Smuzhiyun if (!work || cmpxchg(&fib6_nh->last_probe,
654*4882a593Smuzhiyun last_probe, jiffies) != last_probe) {
655*4882a593Smuzhiyun kfree(work);
656*4882a593Smuzhiyun } else {
657*4882a593Smuzhiyun INIT_WORK(&work->work, rt6_probe_deferred);
658*4882a593Smuzhiyun work->target = *nh_gw;
659*4882a593Smuzhiyun dev_hold(dev);
660*4882a593Smuzhiyun work->dev = dev;
661*4882a593Smuzhiyun schedule_work(&work->work);
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun out:
665*4882a593Smuzhiyun rcu_read_unlock_bh();
666*4882a593Smuzhiyun }
667*4882a593Smuzhiyun #else
rt6_probe(struct fib6_nh * fib6_nh)668*4882a593Smuzhiyun static inline void rt6_probe(struct fib6_nh *fib6_nh)
669*4882a593Smuzhiyun {
670*4882a593Smuzhiyun }
671*4882a593Smuzhiyun #endif
672*4882a593Smuzhiyun
673*4882a593Smuzhiyun /*
674*4882a593Smuzhiyun * Default Router Selection (RFC 2461 6.3.6)
675*4882a593Smuzhiyun */
rt6_check_neigh(const struct fib6_nh * fib6_nh)676*4882a593Smuzhiyun static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
677*4882a593Smuzhiyun {
678*4882a593Smuzhiyun enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
679*4882a593Smuzhiyun struct neighbour *neigh;
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun rcu_read_lock_bh();
682*4882a593Smuzhiyun neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
683*4882a593Smuzhiyun &fib6_nh->fib_nh_gw6);
684*4882a593Smuzhiyun if (neigh) {
685*4882a593Smuzhiyun read_lock(&neigh->lock);
686*4882a593Smuzhiyun if (neigh->nud_state & NUD_VALID)
687*4882a593Smuzhiyun ret = RT6_NUD_SUCCEED;
688*4882a593Smuzhiyun #ifdef CONFIG_IPV6_ROUTER_PREF
689*4882a593Smuzhiyun else if (!(neigh->nud_state & NUD_FAILED))
690*4882a593Smuzhiyun ret = RT6_NUD_SUCCEED;
691*4882a593Smuzhiyun else
692*4882a593Smuzhiyun ret = RT6_NUD_FAIL_PROBE;
693*4882a593Smuzhiyun #endif
694*4882a593Smuzhiyun read_unlock(&neigh->lock);
695*4882a593Smuzhiyun } else {
696*4882a593Smuzhiyun ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
697*4882a593Smuzhiyun RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
698*4882a593Smuzhiyun }
699*4882a593Smuzhiyun rcu_read_unlock_bh();
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun return ret;
702*4882a593Smuzhiyun }
703*4882a593Smuzhiyun
rt6_score_route(const struct fib6_nh * nh,u32 fib6_flags,int oif,int strict)704*4882a593Smuzhiyun static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
705*4882a593Smuzhiyun int strict)
706*4882a593Smuzhiyun {
707*4882a593Smuzhiyun int m = 0;
708*4882a593Smuzhiyun
709*4882a593Smuzhiyun if (!oif || nh->fib_nh_dev->ifindex == oif)
710*4882a593Smuzhiyun m = 2;
711*4882a593Smuzhiyun
712*4882a593Smuzhiyun if (!m && (strict & RT6_LOOKUP_F_IFACE))
713*4882a593Smuzhiyun return RT6_NUD_FAIL_HARD;
714*4882a593Smuzhiyun #ifdef CONFIG_IPV6_ROUTER_PREF
715*4882a593Smuzhiyun m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
716*4882a593Smuzhiyun #endif
717*4882a593Smuzhiyun if ((strict & RT6_LOOKUP_F_REACHABLE) &&
718*4882a593Smuzhiyun !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
719*4882a593Smuzhiyun int n = rt6_check_neigh(nh);
720*4882a593Smuzhiyun if (n < 0)
721*4882a593Smuzhiyun return n;
722*4882a593Smuzhiyun }
723*4882a593Smuzhiyun return m;
724*4882a593Smuzhiyun }
725*4882a593Smuzhiyun
find_match(struct fib6_nh * nh,u32 fib6_flags,int oif,int strict,int * mpri,bool * do_rr)726*4882a593Smuzhiyun static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
727*4882a593Smuzhiyun int oif, int strict, int *mpri, bool *do_rr)
728*4882a593Smuzhiyun {
729*4882a593Smuzhiyun bool match_do_rr = false;
730*4882a593Smuzhiyun bool rc = false;
731*4882a593Smuzhiyun int m;
732*4882a593Smuzhiyun
733*4882a593Smuzhiyun if (nh->fib_nh_flags & RTNH_F_DEAD)
734*4882a593Smuzhiyun goto out;
735*4882a593Smuzhiyun
736*4882a593Smuzhiyun if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
737*4882a593Smuzhiyun nh->fib_nh_flags & RTNH_F_LINKDOWN &&
738*4882a593Smuzhiyun !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
739*4882a593Smuzhiyun goto out;
740*4882a593Smuzhiyun
741*4882a593Smuzhiyun m = rt6_score_route(nh, fib6_flags, oif, strict);
742*4882a593Smuzhiyun if (m == RT6_NUD_FAIL_DO_RR) {
743*4882a593Smuzhiyun match_do_rr = true;
744*4882a593Smuzhiyun m = 0; /* lowest valid score */
745*4882a593Smuzhiyun } else if (m == RT6_NUD_FAIL_HARD) {
746*4882a593Smuzhiyun goto out;
747*4882a593Smuzhiyun }
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun if (strict & RT6_LOOKUP_F_REACHABLE)
750*4882a593Smuzhiyun rt6_probe(nh);
751*4882a593Smuzhiyun
752*4882a593Smuzhiyun /* note that m can be RT6_NUD_FAIL_PROBE at this point */
753*4882a593Smuzhiyun if (m > *mpri) {
754*4882a593Smuzhiyun *do_rr = match_do_rr;
755*4882a593Smuzhiyun *mpri = m;
756*4882a593Smuzhiyun rc = true;
757*4882a593Smuzhiyun }
758*4882a593Smuzhiyun out:
759*4882a593Smuzhiyun return rc;
760*4882a593Smuzhiyun }
761*4882a593Smuzhiyun
762*4882a593Smuzhiyun struct fib6_nh_frl_arg {
763*4882a593Smuzhiyun u32 flags;
764*4882a593Smuzhiyun int oif;
765*4882a593Smuzhiyun int strict;
766*4882a593Smuzhiyun int *mpri;
767*4882a593Smuzhiyun bool *do_rr;
768*4882a593Smuzhiyun struct fib6_nh *nh;
769*4882a593Smuzhiyun };
770*4882a593Smuzhiyun
rt6_nh_find_match(struct fib6_nh * nh,void * _arg)771*4882a593Smuzhiyun static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
772*4882a593Smuzhiyun {
773*4882a593Smuzhiyun struct fib6_nh_frl_arg *arg = _arg;
774*4882a593Smuzhiyun
775*4882a593Smuzhiyun arg->nh = nh;
776*4882a593Smuzhiyun return find_match(nh, arg->flags, arg->oif, arg->strict,
777*4882a593Smuzhiyun arg->mpri, arg->do_rr);
778*4882a593Smuzhiyun }
779*4882a593Smuzhiyun
__find_rr_leaf(struct fib6_info * f6i_start,struct fib6_info * nomatch,u32 metric,struct fib6_result * res,struct fib6_info ** cont,int oif,int strict,bool * do_rr,int * mpri)780*4882a593Smuzhiyun static void __find_rr_leaf(struct fib6_info *f6i_start,
781*4882a593Smuzhiyun struct fib6_info *nomatch, u32 metric,
782*4882a593Smuzhiyun struct fib6_result *res, struct fib6_info **cont,
783*4882a593Smuzhiyun int oif, int strict, bool *do_rr, int *mpri)
784*4882a593Smuzhiyun {
785*4882a593Smuzhiyun struct fib6_info *f6i;
786*4882a593Smuzhiyun
787*4882a593Smuzhiyun for (f6i = f6i_start;
788*4882a593Smuzhiyun f6i && f6i != nomatch;
789*4882a593Smuzhiyun f6i = rcu_dereference(f6i->fib6_next)) {
790*4882a593Smuzhiyun bool matched = false;
791*4882a593Smuzhiyun struct fib6_nh *nh;
792*4882a593Smuzhiyun
793*4882a593Smuzhiyun if (cont && f6i->fib6_metric != metric) {
794*4882a593Smuzhiyun *cont = f6i;
795*4882a593Smuzhiyun return;
796*4882a593Smuzhiyun }
797*4882a593Smuzhiyun
798*4882a593Smuzhiyun if (fib6_check_expired(f6i))
799*4882a593Smuzhiyun continue;
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun if (unlikely(f6i->nh)) {
802*4882a593Smuzhiyun struct fib6_nh_frl_arg arg = {
803*4882a593Smuzhiyun .flags = f6i->fib6_flags,
804*4882a593Smuzhiyun .oif = oif,
805*4882a593Smuzhiyun .strict = strict,
806*4882a593Smuzhiyun .mpri = mpri,
807*4882a593Smuzhiyun .do_rr = do_rr
808*4882a593Smuzhiyun };
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun if (nexthop_is_blackhole(f6i->nh)) {
811*4882a593Smuzhiyun res->fib6_flags = RTF_REJECT;
812*4882a593Smuzhiyun res->fib6_type = RTN_BLACKHOLE;
813*4882a593Smuzhiyun res->f6i = f6i;
814*4882a593Smuzhiyun res->nh = nexthop_fib6_nh(f6i->nh);
815*4882a593Smuzhiyun return;
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
818*4882a593Smuzhiyun &arg)) {
819*4882a593Smuzhiyun matched = true;
820*4882a593Smuzhiyun nh = arg.nh;
821*4882a593Smuzhiyun }
822*4882a593Smuzhiyun } else {
823*4882a593Smuzhiyun nh = f6i->fib6_nh;
824*4882a593Smuzhiyun if (find_match(nh, f6i->fib6_flags, oif, strict,
825*4882a593Smuzhiyun mpri, do_rr))
826*4882a593Smuzhiyun matched = true;
827*4882a593Smuzhiyun }
828*4882a593Smuzhiyun if (matched) {
829*4882a593Smuzhiyun res->f6i = f6i;
830*4882a593Smuzhiyun res->nh = nh;
831*4882a593Smuzhiyun res->fib6_flags = f6i->fib6_flags;
832*4882a593Smuzhiyun res->fib6_type = f6i->fib6_type;
833*4882a593Smuzhiyun }
834*4882a593Smuzhiyun }
835*4882a593Smuzhiyun }
836*4882a593Smuzhiyun
find_rr_leaf(struct fib6_node * fn,struct fib6_info * leaf,struct fib6_info * rr_head,int oif,int strict,bool * do_rr,struct fib6_result * res)837*4882a593Smuzhiyun static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
838*4882a593Smuzhiyun struct fib6_info *rr_head, int oif, int strict,
839*4882a593Smuzhiyun bool *do_rr, struct fib6_result *res)
840*4882a593Smuzhiyun {
841*4882a593Smuzhiyun u32 metric = rr_head->fib6_metric;
842*4882a593Smuzhiyun struct fib6_info *cont = NULL;
843*4882a593Smuzhiyun int mpri = -1;
844*4882a593Smuzhiyun
845*4882a593Smuzhiyun __find_rr_leaf(rr_head, NULL, metric, res, &cont,
846*4882a593Smuzhiyun oif, strict, do_rr, &mpri);
847*4882a593Smuzhiyun
848*4882a593Smuzhiyun __find_rr_leaf(leaf, rr_head, metric, res, &cont,
849*4882a593Smuzhiyun oif, strict, do_rr, &mpri);
850*4882a593Smuzhiyun
851*4882a593Smuzhiyun if (res->f6i || !cont)
852*4882a593Smuzhiyun return;
853*4882a593Smuzhiyun
854*4882a593Smuzhiyun __find_rr_leaf(cont, NULL, metric, res, NULL,
855*4882a593Smuzhiyun oif, strict, do_rr, &mpri);
856*4882a593Smuzhiyun }
857*4882a593Smuzhiyun
rt6_select(struct net * net,struct fib6_node * fn,int oif,struct fib6_result * res,int strict)858*4882a593Smuzhiyun static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
859*4882a593Smuzhiyun struct fib6_result *res, int strict)
860*4882a593Smuzhiyun {
861*4882a593Smuzhiyun struct fib6_info *leaf = rcu_dereference(fn->leaf);
862*4882a593Smuzhiyun struct fib6_info *rt0;
863*4882a593Smuzhiyun bool do_rr = false;
864*4882a593Smuzhiyun int key_plen;
865*4882a593Smuzhiyun
866*4882a593Smuzhiyun /* make sure this function or its helpers sets f6i */
867*4882a593Smuzhiyun res->f6i = NULL;
868*4882a593Smuzhiyun
869*4882a593Smuzhiyun if (!leaf || leaf == net->ipv6.fib6_null_entry)
870*4882a593Smuzhiyun goto out;
871*4882a593Smuzhiyun
872*4882a593Smuzhiyun rt0 = rcu_dereference(fn->rr_ptr);
873*4882a593Smuzhiyun if (!rt0)
874*4882a593Smuzhiyun rt0 = leaf;
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun /* Double check to make sure fn is not an intermediate node
877*4882a593Smuzhiyun * and fn->leaf does not points to its child's leaf
878*4882a593Smuzhiyun * (This might happen if all routes under fn are deleted from
879*4882a593Smuzhiyun * the tree and fib6_repair_tree() is called on the node.)
880*4882a593Smuzhiyun */
881*4882a593Smuzhiyun key_plen = rt0->fib6_dst.plen;
882*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
883*4882a593Smuzhiyun if (rt0->fib6_src.plen)
884*4882a593Smuzhiyun key_plen = rt0->fib6_src.plen;
885*4882a593Smuzhiyun #endif
886*4882a593Smuzhiyun if (fn->fn_bit != key_plen)
887*4882a593Smuzhiyun goto out;
888*4882a593Smuzhiyun
889*4882a593Smuzhiyun find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
890*4882a593Smuzhiyun if (do_rr) {
891*4882a593Smuzhiyun struct fib6_info *next = rcu_dereference(rt0->fib6_next);
892*4882a593Smuzhiyun
893*4882a593Smuzhiyun /* no entries matched; do round-robin */
894*4882a593Smuzhiyun if (!next || next->fib6_metric != rt0->fib6_metric)
895*4882a593Smuzhiyun next = leaf;
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun if (next != rt0) {
898*4882a593Smuzhiyun spin_lock_bh(&leaf->fib6_table->tb6_lock);
899*4882a593Smuzhiyun /* make sure next is not being deleted from the tree */
900*4882a593Smuzhiyun if (next->fib6_node)
901*4882a593Smuzhiyun rcu_assign_pointer(fn->rr_ptr, next);
902*4882a593Smuzhiyun spin_unlock_bh(&leaf->fib6_table->tb6_lock);
903*4882a593Smuzhiyun }
904*4882a593Smuzhiyun }
905*4882a593Smuzhiyun
906*4882a593Smuzhiyun out:
907*4882a593Smuzhiyun if (!res->f6i) {
908*4882a593Smuzhiyun res->f6i = net->ipv6.fib6_null_entry;
909*4882a593Smuzhiyun res->nh = res->f6i->fib6_nh;
910*4882a593Smuzhiyun res->fib6_flags = res->f6i->fib6_flags;
911*4882a593Smuzhiyun res->fib6_type = res->f6i->fib6_type;
912*4882a593Smuzhiyun }
913*4882a593Smuzhiyun }
914*4882a593Smuzhiyun
rt6_is_gw_or_nonexthop(const struct fib6_result * res)915*4882a593Smuzhiyun static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
916*4882a593Smuzhiyun {
917*4882a593Smuzhiyun return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
918*4882a593Smuzhiyun res->nh->fib_nh_gw_family;
919*4882a593Smuzhiyun }
920*4882a593Smuzhiyun
921*4882a593Smuzhiyun #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)922*4882a593Smuzhiyun int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
923*4882a593Smuzhiyun const struct in6_addr *gwaddr)
924*4882a593Smuzhiyun {
925*4882a593Smuzhiyun struct net *net = dev_net(dev);
926*4882a593Smuzhiyun struct route_info *rinfo = (struct route_info *) opt;
927*4882a593Smuzhiyun struct in6_addr prefix_buf, *prefix;
928*4882a593Smuzhiyun unsigned int pref;
929*4882a593Smuzhiyun unsigned long lifetime;
930*4882a593Smuzhiyun struct fib6_info *rt;
931*4882a593Smuzhiyun
932*4882a593Smuzhiyun if (len < sizeof(struct route_info)) {
933*4882a593Smuzhiyun return -EINVAL;
934*4882a593Smuzhiyun }
935*4882a593Smuzhiyun
936*4882a593Smuzhiyun /* Sanity check for prefix_len and length */
937*4882a593Smuzhiyun if (rinfo->length > 3) {
938*4882a593Smuzhiyun return -EINVAL;
939*4882a593Smuzhiyun } else if (rinfo->prefix_len > 128) {
940*4882a593Smuzhiyun return -EINVAL;
941*4882a593Smuzhiyun } else if (rinfo->prefix_len > 64) {
942*4882a593Smuzhiyun if (rinfo->length < 2) {
943*4882a593Smuzhiyun return -EINVAL;
944*4882a593Smuzhiyun }
945*4882a593Smuzhiyun } else if (rinfo->prefix_len > 0) {
946*4882a593Smuzhiyun if (rinfo->length < 1) {
947*4882a593Smuzhiyun return -EINVAL;
948*4882a593Smuzhiyun }
949*4882a593Smuzhiyun }
950*4882a593Smuzhiyun
951*4882a593Smuzhiyun pref = rinfo->route_pref;
952*4882a593Smuzhiyun if (pref == ICMPV6_ROUTER_PREF_INVALID)
953*4882a593Smuzhiyun return -EINVAL;
954*4882a593Smuzhiyun
955*4882a593Smuzhiyun lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
956*4882a593Smuzhiyun
957*4882a593Smuzhiyun if (rinfo->length == 3)
958*4882a593Smuzhiyun prefix = (struct in6_addr *)rinfo->prefix;
959*4882a593Smuzhiyun else {
960*4882a593Smuzhiyun /* this function is safe */
961*4882a593Smuzhiyun ipv6_addr_prefix(&prefix_buf,
962*4882a593Smuzhiyun (struct in6_addr *)rinfo->prefix,
963*4882a593Smuzhiyun rinfo->prefix_len);
964*4882a593Smuzhiyun prefix = &prefix_buf;
965*4882a593Smuzhiyun }
966*4882a593Smuzhiyun
967*4882a593Smuzhiyun if (rinfo->prefix_len == 0)
968*4882a593Smuzhiyun rt = rt6_get_dflt_router(net, gwaddr, dev);
969*4882a593Smuzhiyun else
970*4882a593Smuzhiyun rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
971*4882a593Smuzhiyun gwaddr, dev);
972*4882a593Smuzhiyun
973*4882a593Smuzhiyun if (rt && !lifetime) {
974*4882a593Smuzhiyun ip6_del_rt(net, rt, false);
975*4882a593Smuzhiyun rt = NULL;
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun
978*4882a593Smuzhiyun if (!rt && lifetime)
979*4882a593Smuzhiyun rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
980*4882a593Smuzhiyun dev, pref);
981*4882a593Smuzhiyun else if (rt)
982*4882a593Smuzhiyun rt->fib6_flags = RTF_ROUTEINFO |
983*4882a593Smuzhiyun (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
984*4882a593Smuzhiyun
985*4882a593Smuzhiyun if (rt) {
986*4882a593Smuzhiyun if (!addrconf_finite_timeout(lifetime))
987*4882a593Smuzhiyun fib6_clean_expires(rt);
988*4882a593Smuzhiyun else
989*4882a593Smuzhiyun fib6_set_expires(rt, jiffies + HZ * lifetime);
990*4882a593Smuzhiyun
991*4882a593Smuzhiyun fib6_info_release(rt);
992*4882a593Smuzhiyun }
993*4882a593Smuzhiyun return 0;
994*4882a593Smuzhiyun }
995*4882a593Smuzhiyun #endif
996*4882a593Smuzhiyun
997*4882a593Smuzhiyun /*
998*4882a593Smuzhiyun * Misc support functions
999*4882a593Smuzhiyun */
1000*4882a593Smuzhiyun
1001*4882a593Smuzhiyun /* called with rcu_lock held */
ip6_rt_get_dev_rcu(const struct fib6_result * res)1002*4882a593Smuzhiyun static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
1003*4882a593Smuzhiyun {
1004*4882a593Smuzhiyun struct net_device *dev = res->nh->fib_nh_dev;
1005*4882a593Smuzhiyun
1006*4882a593Smuzhiyun if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
1007*4882a593Smuzhiyun /* for copies of local routes, dst->dev needs to be the
1008*4882a593Smuzhiyun * device if it is a master device, the master device if
1009*4882a593Smuzhiyun * device is enslaved, and the loopback as the default
1010*4882a593Smuzhiyun */
1011*4882a593Smuzhiyun if (netif_is_l3_slave(dev) &&
1012*4882a593Smuzhiyun !rt6_need_strict(&res->f6i->fib6_dst.addr))
1013*4882a593Smuzhiyun dev = l3mdev_master_dev_rcu(dev);
1014*4882a593Smuzhiyun else if (!netif_is_l3_master(dev))
1015*4882a593Smuzhiyun dev = dev_net(dev)->loopback_dev;
1016*4882a593Smuzhiyun /* last case is netif_is_l3_master(dev) is true in which
1017*4882a593Smuzhiyun * case we want dev returned to be dev
1018*4882a593Smuzhiyun */
1019*4882a593Smuzhiyun }
1020*4882a593Smuzhiyun
1021*4882a593Smuzhiyun return dev;
1022*4882a593Smuzhiyun }
1023*4882a593Smuzhiyun
1024*4882a593Smuzhiyun static const int fib6_prop[RTN_MAX + 1] = {
1025*4882a593Smuzhiyun [RTN_UNSPEC] = 0,
1026*4882a593Smuzhiyun [RTN_UNICAST] = 0,
1027*4882a593Smuzhiyun [RTN_LOCAL] = 0,
1028*4882a593Smuzhiyun [RTN_BROADCAST] = 0,
1029*4882a593Smuzhiyun [RTN_ANYCAST] = 0,
1030*4882a593Smuzhiyun [RTN_MULTICAST] = 0,
1031*4882a593Smuzhiyun [RTN_BLACKHOLE] = -EINVAL,
1032*4882a593Smuzhiyun [RTN_UNREACHABLE] = -EHOSTUNREACH,
1033*4882a593Smuzhiyun [RTN_PROHIBIT] = -EACCES,
1034*4882a593Smuzhiyun [RTN_THROW] = -EAGAIN,
1035*4882a593Smuzhiyun [RTN_NAT] = -EINVAL,
1036*4882a593Smuzhiyun [RTN_XRESOLVE] = -EINVAL,
1037*4882a593Smuzhiyun };
1038*4882a593Smuzhiyun
ip6_rt_type_to_error(u8 fib6_type)1039*4882a593Smuzhiyun static int ip6_rt_type_to_error(u8 fib6_type)
1040*4882a593Smuzhiyun {
1041*4882a593Smuzhiyun return fib6_prop[fib6_type];
1042*4882a593Smuzhiyun }
1043*4882a593Smuzhiyun
fib6_info_dst_flags(struct fib6_info * rt)1044*4882a593Smuzhiyun static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
1045*4882a593Smuzhiyun {
1046*4882a593Smuzhiyun unsigned short flags = 0;
1047*4882a593Smuzhiyun
1048*4882a593Smuzhiyun if (rt->dst_nocount)
1049*4882a593Smuzhiyun flags |= DST_NOCOUNT;
1050*4882a593Smuzhiyun if (rt->dst_nopolicy)
1051*4882a593Smuzhiyun flags |= DST_NOPOLICY;
1052*4882a593Smuzhiyun
1053*4882a593Smuzhiyun return flags;
1054*4882a593Smuzhiyun }
1055*4882a593Smuzhiyun
ip6_rt_init_dst_reject(struct rt6_info * rt,u8 fib6_type)1056*4882a593Smuzhiyun static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
1057*4882a593Smuzhiyun {
1058*4882a593Smuzhiyun rt->dst.error = ip6_rt_type_to_error(fib6_type);
1059*4882a593Smuzhiyun
1060*4882a593Smuzhiyun switch (fib6_type) {
1061*4882a593Smuzhiyun case RTN_BLACKHOLE:
1062*4882a593Smuzhiyun rt->dst.output = dst_discard_out;
1063*4882a593Smuzhiyun rt->dst.input = dst_discard;
1064*4882a593Smuzhiyun break;
1065*4882a593Smuzhiyun case RTN_PROHIBIT:
1066*4882a593Smuzhiyun rt->dst.output = ip6_pkt_prohibit_out;
1067*4882a593Smuzhiyun rt->dst.input = ip6_pkt_prohibit;
1068*4882a593Smuzhiyun break;
1069*4882a593Smuzhiyun case RTN_THROW:
1070*4882a593Smuzhiyun case RTN_UNREACHABLE:
1071*4882a593Smuzhiyun default:
1072*4882a593Smuzhiyun rt->dst.output = ip6_pkt_discard_out;
1073*4882a593Smuzhiyun rt->dst.input = ip6_pkt_discard;
1074*4882a593Smuzhiyun break;
1075*4882a593Smuzhiyun }
1076*4882a593Smuzhiyun }
1077*4882a593Smuzhiyun
ip6_rt_init_dst(struct rt6_info * rt,const struct fib6_result * res)1078*4882a593Smuzhiyun static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
1079*4882a593Smuzhiyun {
1080*4882a593Smuzhiyun struct fib6_info *f6i = res->f6i;
1081*4882a593Smuzhiyun
1082*4882a593Smuzhiyun if (res->fib6_flags & RTF_REJECT) {
1083*4882a593Smuzhiyun ip6_rt_init_dst_reject(rt, res->fib6_type);
1084*4882a593Smuzhiyun return;
1085*4882a593Smuzhiyun }
1086*4882a593Smuzhiyun
1087*4882a593Smuzhiyun rt->dst.error = 0;
1088*4882a593Smuzhiyun rt->dst.output = ip6_output;
1089*4882a593Smuzhiyun
1090*4882a593Smuzhiyun if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
1091*4882a593Smuzhiyun rt->dst.input = ip6_input;
1092*4882a593Smuzhiyun } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
1093*4882a593Smuzhiyun rt->dst.input = ip6_mc_input;
1094*4882a593Smuzhiyun } else {
1095*4882a593Smuzhiyun rt->dst.input = ip6_forward;
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun
1098*4882a593Smuzhiyun if (res->nh->fib_nh_lws) {
1099*4882a593Smuzhiyun rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
1100*4882a593Smuzhiyun lwtunnel_set_redirect(&rt->dst);
1101*4882a593Smuzhiyun }
1102*4882a593Smuzhiyun
1103*4882a593Smuzhiyun rt->dst.lastuse = jiffies;
1104*4882a593Smuzhiyun }
1105*4882a593Smuzhiyun
1106*4882a593Smuzhiyun /* Caller must already hold reference to @from */
rt6_set_from(struct rt6_info * rt,struct fib6_info * from)1107*4882a593Smuzhiyun static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
1108*4882a593Smuzhiyun {
1109*4882a593Smuzhiyun rt->rt6i_flags &= ~RTF_EXPIRES;
1110*4882a593Smuzhiyun rcu_assign_pointer(rt->from, from);
1111*4882a593Smuzhiyun ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1112*4882a593Smuzhiyun }
1113*4882a593Smuzhiyun
1114*4882a593Smuzhiyun /* Caller must already hold reference to f6i in result */
ip6_rt_copy_init(struct rt6_info * rt,const struct fib6_result * res)1115*4882a593Smuzhiyun static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1116*4882a593Smuzhiyun {
1117*4882a593Smuzhiyun const struct fib6_nh *nh = res->nh;
1118*4882a593Smuzhiyun const struct net_device *dev = nh->fib_nh_dev;
1119*4882a593Smuzhiyun struct fib6_info *f6i = res->f6i;
1120*4882a593Smuzhiyun
1121*4882a593Smuzhiyun ip6_rt_init_dst(rt, res);
1122*4882a593Smuzhiyun
1123*4882a593Smuzhiyun rt->rt6i_dst = f6i->fib6_dst;
1124*4882a593Smuzhiyun rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1125*4882a593Smuzhiyun rt->rt6i_flags = res->fib6_flags;
1126*4882a593Smuzhiyun if (nh->fib_nh_gw_family) {
1127*4882a593Smuzhiyun rt->rt6i_gateway = nh->fib_nh_gw6;
1128*4882a593Smuzhiyun rt->rt6i_flags |= RTF_GATEWAY;
1129*4882a593Smuzhiyun }
1130*4882a593Smuzhiyun rt6_set_from(rt, f6i);
1131*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1132*4882a593Smuzhiyun rt->rt6i_src = f6i->fib6_src;
1133*4882a593Smuzhiyun #endif
1134*4882a593Smuzhiyun }
1135*4882a593Smuzhiyun
fib6_backtrack(struct fib6_node * fn,struct in6_addr * saddr)1136*4882a593Smuzhiyun static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1137*4882a593Smuzhiyun struct in6_addr *saddr)
1138*4882a593Smuzhiyun {
1139*4882a593Smuzhiyun struct fib6_node *pn, *sn;
1140*4882a593Smuzhiyun while (1) {
1141*4882a593Smuzhiyun if (fn->fn_flags & RTN_TL_ROOT)
1142*4882a593Smuzhiyun return NULL;
1143*4882a593Smuzhiyun pn = rcu_dereference(fn->parent);
1144*4882a593Smuzhiyun sn = FIB6_SUBTREE(pn);
1145*4882a593Smuzhiyun if (sn && sn != fn)
1146*4882a593Smuzhiyun fn = fib6_node_lookup(sn, NULL, saddr);
1147*4882a593Smuzhiyun else
1148*4882a593Smuzhiyun fn = pn;
1149*4882a593Smuzhiyun if (fn->fn_flags & RTN_RTINFO)
1150*4882a593Smuzhiyun return fn;
1151*4882a593Smuzhiyun }
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun
ip6_hold_safe(struct net * net,struct rt6_info ** prt)1154*4882a593Smuzhiyun static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1155*4882a593Smuzhiyun {
1156*4882a593Smuzhiyun struct rt6_info *rt = *prt;
1157*4882a593Smuzhiyun
1158*4882a593Smuzhiyun if (dst_hold_safe(&rt->dst))
1159*4882a593Smuzhiyun return true;
1160*4882a593Smuzhiyun if (net) {
1161*4882a593Smuzhiyun rt = net->ipv6.ip6_null_entry;
1162*4882a593Smuzhiyun dst_hold(&rt->dst);
1163*4882a593Smuzhiyun } else {
1164*4882a593Smuzhiyun rt = NULL;
1165*4882a593Smuzhiyun }
1166*4882a593Smuzhiyun *prt = rt;
1167*4882a593Smuzhiyun return false;
1168*4882a593Smuzhiyun }
1169*4882a593Smuzhiyun
1170*4882a593Smuzhiyun /* called with rcu_lock held */
ip6_create_rt_rcu(const struct fib6_result * res)1171*4882a593Smuzhiyun static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1172*4882a593Smuzhiyun {
1173*4882a593Smuzhiyun struct net_device *dev = res->nh->fib_nh_dev;
1174*4882a593Smuzhiyun struct fib6_info *f6i = res->f6i;
1175*4882a593Smuzhiyun unsigned short flags;
1176*4882a593Smuzhiyun struct rt6_info *nrt;
1177*4882a593Smuzhiyun
1178*4882a593Smuzhiyun if (!fib6_info_hold_safe(f6i))
1179*4882a593Smuzhiyun goto fallback;
1180*4882a593Smuzhiyun
1181*4882a593Smuzhiyun flags = fib6_info_dst_flags(f6i);
1182*4882a593Smuzhiyun nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1183*4882a593Smuzhiyun if (!nrt) {
1184*4882a593Smuzhiyun fib6_info_release(f6i);
1185*4882a593Smuzhiyun goto fallback;
1186*4882a593Smuzhiyun }
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun ip6_rt_copy_init(nrt, res);
1189*4882a593Smuzhiyun return nrt;
1190*4882a593Smuzhiyun
1191*4882a593Smuzhiyun fallback:
1192*4882a593Smuzhiyun nrt = dev_net(dev)->ipv6.ip6_null_entry;
1193*4882a593Smuzhiyun dst_hold(&nrt->dst);
1194*4882a593Smuzhiyun return nrt;
1195*4882a593Smuzhiyun }
1196*4882a593Smuzhiyun
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,const struct sk_buff * skb,int flags)1197*4882a593Smuzhiyun INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net,
1198*4882a593Smuzhiyun struct fib6_table *table,
1199*4882a593Smuzhiyun struct flowi6 *fl6,
1200*4882a593Smuzhiyun const struct sk_buff *skb,
1201*4882a593Smuzhiyun int flags)
1202*4882a593Smuzhiyun {
1203*4882a593Smuzhiyun struct fib6_result res = {};
1204*4882a593Smuzhiyun struct fib6_node *fn;
1205*4882a593Smuzhiyun struct rt6_info *rt;
1206*4882a593Smuzhiyun
1207*4882a593Smuzhiyun if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1208*4882a593Smuzhiyun flags &= ~RT6_LOOKUP_F_IFACE;
1209*4882a593Smuzhiyun
1210*4882a593Smuzhiyun rcu_read_lock();
1211*4882a593Smuzhiyun fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1212*4882a593Smuzhiyun restart:
1213*4882a593Smuzhiyun res.f6i = rcu_dereference(fn->leaf);
1214*4882a593Smuzhiyun if (!res.f6i)
1215*4882a593Smuzhiyun res.f6i = net->ipv6.fib6_null_entry;
1216*4882a593Smuzhiyun else
1217*4882a593Smuzhiyun rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1218*4882a593Smuzhiyun flags);
1219*4882a593Smuzhiyun
1220*4882a593Smuzhiyun if (res.f6i == net->ipv6.fib6_null_entry) {
1221*4882a593Smuzhiyun fn = fib6_backtrack(fn, &fl6->saddr);
1222*4882a593Smuzhiyun if (fn)
1223*4882a593Smuzhiyun goto restart;
1224*4882a593Smuzhiyun
1225*4882a593Smuzhiyun rt = net->ipv6.ip6_null_entry;
1226*4882a593Smuzhiyun dst_hold(&rt->dst);
1227*4882a593Smuzhiyun goto out;
1228*4882a593Smuzhiyun } else if (res.fib6_flags & RTF_REJECT) {
1229*4882a593Smuzhiyun goto do_create;
1230*4882a593Smuzhiyun }
1231*4882a593Smuzhiyun
1232*4882a593Smuzhiyun fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1233*4882a593Smuzhiyun fl6->flowi6_oif != 0, skb, flags);
1234*4882a593Smuzhiyun
1235*4882a593Smuzhiyun /* Search through exception table */
1236*4882a593Smuzhiyun rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1237*4882a593Smuzhiyun if (rt) {
1238*4882a593Smuzhiyun if (ip6_hold_safe(net, &rt))
1239*4882a593Smuzhiyun dst_use_noref(&rt->dst, jiffies);
1240*4882a593Smuzhiyun } else {
1241*4882a593Smuzhiyun do_create:
1242*4882a593Smuzhiyun rt = ip6_create_rt_rcu(&res);
1243*4882a593Smuzhiyun }
1244*4882a593Smuzhiyun
1245*4882a593Smuzhiyun out:
1246*4882a593Smuzhiyun trace_fib6_table_lookup(net, &res, table, fl6);
1247*4882a593Smuzhiyun
1248*4882a593Smuzhiyun rcu_read_unlock();
1249*4882a593Smuzhiyun
1250*4882a593Smuzhiyun return rt;
1251*4882a593Smuzhiyun }
1252*4882a593Smuzhiyun
ip6_route_lookup(struct net * net,struct flowi6 * fl6,const struct sk_buff * skb,int flags)1253*4882a593Smuzhiyun struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1254*4882a593Smuzhiyun const struct sk_buff *skb, int flags)
1255*4882a593Smuzhiyun {
1256*4882a593Smuzhiyun return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1257*4882a593Smuzhiyun }
1258*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_route_lookup);
1259*4882a593Smuzhiyun
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,const struct sk_buff * skb,int strict)1260*4882a593Smuzhiyun struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1261*4882a593Smuzhiyun const struct in6_addr *saddr, int oif,
1262*4882a593Smuzhiyun const struct sk_buff *skb, int strict)
1263*4882a593Smuzhiyun {
1264*4882a593Smuzhiyun struct flowi6 fl6 = {
1265*4882a593Smuzhiyun .flowi6_oif = oif,
1266*4882a593Smuzhiyun .daddr = *daddr,
1267*4882a593Smuzhiyun };
1268*4882a593Smuzhiyun struct dst_entry *dst;
1269*4882a593Smuzhiyun int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1270*4882a593Smuzhiyun
1271*4882a593Smuzhiyun if (saddr) {
1272*4882a593Smuzhiyun memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1273*4882a593Smuzhiyun flags |= RT6_LOOKUP_F_HAS_SADDR;
1274*4882a593Smuzhiyun }
1275*4882a593Smuzhiyun
1276*4882a593Smuzhiyun dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1277*4882a593Smuzhiyun if (dst->error == 0)
1278*4882a593Smuzhiyun return (struct rt6_info *) dst;
1279*4882a593Smuzhiyun
1280*4882a593Smuzhiyun dst_release(dst);
1281*4882a593Smuzhiyun
1282*4882a593Smuzhiyun return NULL;
1283*4882a593Smuzhiyun }
1284*4882a593Smuzhiyun EXPORT_SYMBOL(rt6_lookup);
1285*4882a593Smuzhiyun
1286*4882a593Smuzhiyun /* ip6_ins_rt is called with FREE table->tb6_lock.
1287*4882a593Smuzhiyun * It takes new route entry, the addition fails by any reason the
1288*4882a593Smuzhiyun * route is released.
1289*4882a593Smuzhiyun * Caller must hold dst before calling it.
1290*4882a593Smuzhiyun */
1291*4882a593Smuzhiyun
__ip6_ins_rt(struct fib6_info * rt,struct nl_info * info,struct netlink_ext_ack * extack)1292*4882a593Smuzhiyun static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1293*4882a593Smuzhiyun struct netlink_ext_ack *extack)
1294*4882a593Smuzhiyun {
1295*4882a593Smuzhiyun int err;
1296*4882a593Smuzhiyun struct fib6_table *table;
1297*4882a593Smuzhiyun
1298*4882a593Smuzhiyun table = rt->fib6_table;
1299*4882a593Smuzhiyun spin_lock_bh(&table->tb6_lock);
1300*4882a593Smuzhiyun err = fib6_add(&table->tb6_root, rt, info, extack);
1301*4882a593Smuzhiyun spin_unlock_bh(&table->tb6_lock);
1302*4882a593Smuzhiyun
1303*4882a593Smuzhiyun return err;
1304*4882a593Smuzhiyun }
1305*4882a593Smuzhiyun
ip6_ins_rt(struct net * net,struct fib6_info * rt)1306*4882a593Smuzhiyun int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1307*4882a593Smuzhiyun {
1308*4882a593Smuzhiyun struct nl_info info = { .nl_net = net, };
1309*4882a593Smuzhiyun
1310*4882a593Smuzhiyun return __ip6_ins_rt(rt, &info, NULL);
1311*4882a593Smuzhiyun }
1312*4882a593Smuzhiyun
ip6_rt_cache_alloc(const struct fib6_result * res,const struct in6_addr * daddr,const struct in6_addr * saddr)1313*4882a593Smuzhiyun static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
1314*4882a593Smuzhiyun const struct in6_addr *daddr,
1315*4882a593Smuzhiyun const struct in6_addr *saddr)
1316*4882a593Smuzhiyun {
1317*4882a593Smuzhiyun struct fib6_info *f6i = res->f6i;
1318*4882a593Smuzhiyun struct net_device *dev;
1319*4882a593Smuzhiyun struct rt6_info *rt;
1320*4882a593Smuzhiyun
1321*4882a593Smuzhiyun /*
1322*4882a593Smuzhiyun * Clone the route.
1323*4882a593Smuzhiyun */
1324*4882a593Smuzhiyun
1325*4882a593Smuzhiyun if (!fib6_info_hold_safe(f6i))
1326*4882a593Smuzhiyun return NULL;
1327*4882a593Smuzhiyun
1328*4882a593Smuzhiyun dev = ip6_rt_get_dev_rcu(res);
1329*4882a593Smuzhiyun rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1330*4882a593Smuzhiyun if (!rt) {
1331*4882a593Smuzhiyun fib6_info_release(f6i);
1332*4882a593Smuzhiyun return NULL;
1333*4882a593Smuzhiyun }
1334*4882a593Smuzhiyun
1335*4882a593Smuzhiyun ip6_rt_copy_init(rt, res);
1336*4882a593Smuzhiyun rt->rt6i_flags |= RTF_CACHE;
1337*4882a593Smuzhiyun rt->rt6i_dst.addr = *daddr;
1338*4882a593Smuzhiyun rt->rt6i_dst.plen = 128;
1339*4882a593Smuzhiyun
1340*4882a593Smuzhiyun if (!rt6_is_gw_or_nonexthop(res)) {
1341*4882a593Smuzhiyun if (f6i->fib6_dst.plen != 128 &&
1342*4882a593Smuzhiyun ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
1343*4882a593Smuzhiyun rt->rt6i_flags |= RTF_ANYCAST;
1344*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1345*4882a593Smuzhiyun if (rt->rt6i_src.plen && saddr) {
1346*4882a593Smuzhiyun rt->rt6i_src.addr = *saddr;
1347*4882a593Smuzhiyun rt->rt6i_src.plen = 128;
1348*4882a593Smuzhiyun }
1349*4882a593Smuzhiyun #endif
1350*4882a593Smuzhiyun }
1351*4882a593Smuzhiyun
1352*4882a593Smuzhiyun return rt;
1353*4882a593Smuzhiyun }
1354*4882a593Smuzhiyun
ip6_rt_pcpu_alloc(const struct fib6_result * res)1355*4882a593Smuzhiyun static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1356*4882a593Smuzhiyun {
1357*4882a593Smuzhiyun struct fib6_info *f6i = res->f6i;
1358*4882a593Smuzhiyun unsigned short flags = fib6_info_dst_flags(f6i);
1359*4882a593Smuzhiyun struct net_device *dev;
1360*4882a593Smuzhiyun struct rt6_info *pcpu_rt;
1361*4882a593Smuzhiyun
1362*4882a593Smuzhiyun if (!fib6_info_hold_safe(f6i))
1363*4882a593Smuzhiyun return NULL;
1364*4882a593Smuzhiyun
1365*4882a593Smuzhiyun rcu_read_lock();
1366*4882a593Smuzhiyun dev = ip6_rt_get_dev_rcu(res);
1367*4882a593Smuzhiyun pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
1368*4882a593Smuzhiyun rcu_read_unlock();
1369*4882a593Smuzhiyun if (!pcpu_rt) {
1370*4882a593Smuzhiyun fib6_info_release(f6i);
1371*4882a593Smuzhiyun return NULL;
1372*4882a593Smuzhiyun }
1373*4882a593Smuzhiyun ip6_rt_copy_init(pcpu_rt, res);
1374*4882a593Smuzhiyun pcpu_rt->rt6i_flags |= RTF_PCPU;
1375*4882a593Smuzhiyun
1376*4882a593Smuzhiyun if (f6i->nh)
1377*4882a593Smuzhiyun pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
1378*4882a593Smuzhiyun
1379*4882a593Smuzhiyun return pcpu_rt;
1380*4882a593Smuzhiyun }
1381*4882a593Smuzhiyun
rt6_is_valid(const struct rt6_info * rt6)1382*4882a593Smuzhiyun static bool rt6_is_valid(const struct rt6_info *rt6)
1383*4882a593Smuzhiyun {
1384*4882a593Smuzhiyun return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
1385*4882a593Smuzhiyun }
1386*4882a593Smuzhiyun
1387*4882a593Smuzhiyun /* It should be called with rcu_read_lock() acquired */
rt6_get_pcpu_route(const struct fib6_result * res)1388*4882a593Smuzhiyun static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1389*4882a593Smuzhiyun {
1390*4882a593Smuzhiyun struct rt6_info *pcpu_rt;
1391*4882a593Smuzhiyun
1392*4882a593Smuzhiyun pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
1393*4882a593Smuzhiyun
1394*4882a593Smuzhiyun if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
1395*4882a593Smuzhiyun struct rt6_info *prev, **p;
1396*4882a593Smuzhiyun
1397*4882a593Smuzhiyun p = this_cpu_ptr(res->nh->rt6i_pcpu);
1398*4882a593Smuzhiyun prev = xchg(p, NULL);
1399*4882a593Smuzhiyun if (prev) {
1400*4882a593Smuzhiyun dst_dev_put(&prev->dst);
1401*4882a593Smuzhiyun dst_release(&prev->dst);
1402*4882a593Smuzhiyun }
1403*4882a593Smuzhiyun
1404*4882a593Smuzhiyun pcpu_rt = NULL;
1405*4882a593Smuzhiyun }
1406*4882a593Smuzhiyun
1407*4882a593Smuzhiyun return pcpu_rt;
1408*4882a593Smuzhiyun }
1409*4882a593Smuzhiyun
rt6_make_pcpu_route(struct net * net,const struct fib6_result * res)1410*4882a593Smuzhiyun static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1411*4882a593Smuzhiyun const struct fib6_result *res)
1412*4882a593Smuzhiyun {
1413*4882a593Smuzhiyun struct rt6_info *pcpu_rt, *prev, **p;
1414*4882a593Smuzhiyun
1415*4882a593Smuzhiyun pcpu_rt = ip6_rt_pcpu_alloc(res);
1416*4882a593Smuzhiyun if (!pcpu_rt)
1417*4882a593Smuzhiyun return NULL;
1418*4882a593Smuzhiyun
1419*4882a593Smuzhiyun p = this_cpu_ptr(res->nh->rt6i_pcpu);
1420*4882a593Smuzhiyun prev = cmpxchg(p, NULL, pcpu_rt);
1421*4882a593Smuzhiyun BUG_ON(prev);
1422*4882a593Smuzhiyun
1423*4882a593Smuzhiyun if (res->f6i->fib6_destroying) {
1424*4882a593Smuzhiyun struct fib6_info *from;
1425*4882a593Smuzhiyun
1426*4882a593Smuzhiyun from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1427*4882a593Smuzhiyun fib6_info_release(from);
1428*4882a593Smuzhiyun }
1429*4882a593Smuzhiyun
1430*4882a593Smuzhiyun return pcpu_rt;
1431*4882a593Smuzhiyun }
1432*4882a593Smuzhiyun
1433*4882a593Smuzhiyun /* exception hash table implementation
1434*4882a593Smuzhiyun */
1435*4882a593Smuzhiyun static DEFINE_SPINLOCK(rt6_exception_lock);
1436*4882a593Smuzhiyun
1437*4882a593Smuzhiyun /* Remove rt6_ex from hash table and free the memory
1438*4882a593Smuzhiyun * Caller must hold rt6_exception_lock
1439*4882a593Smuzhiyun */
rt6_remove_exception(struct rt6_exception_bucket * bucket,struct rt6_exception * rt6_ex)1440*4882a593Smuzhiyun static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1441*4882a593Smuzhiyun struct rt6_exception *rt6_ex)
1442*4882a593Smuzhiyun {
1443*4882a593Smuzhiyun struct fib6_info *from;
1444*4882a593Smuzhiyun struct net *net;
1445*4882a593Smuzhiyun
1446*4882a593Smuzhiyun if (!bucket || !rt6_ex)
1447*4882a593Smuzhiyun return;
1448*4882a593Smuzhiyun
1449*4882a593Smuzhiyun net = dev_net(rt6_ex->rt6i->dst.dev);
1450*4882a593Smuzhiyun net->ipv6.rt6_stats->fib_rt_cache--;
1451*4882a593Smuzhiyun
1452*4882a593Smuzhiyun /* purge completely the exception to allow releasing the held resources:
1453*4882a593Smuzhiyun * some [sk] cache may keep the dst around for unlimited time
1454*4882a593Smuzhiyun */
1455*4882a593Smuzhiyun from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1456*4882a593Smuzhiyun fib6_info_release(from);
1457*4882a593Smuzhiyun dst_dev_put(&rt6_ex->rt6i->dst);
1458*4882a593Smuzhiyun
1459*4882a593Smuzhiyun hlist_del_rcu(&rt6_ex->hlist);
1460*4882a593Smuzhiyun dst_release(&rt6_ex->rt6i->dst);
1461*4882a593Smuzhiyun kfree_rcu(rt6_ex, rcu);
1462*4882a593Smuzhiyun WARN_ON_ONCE(!bucket->depth);
1463*4882a593Smuzhiyun bucket->depth--;
1464*4882a593Smuzhiyun }
1465*4882a593Smuzhiyun
1466*4882a593Smuzhiyun /* Remove oldest rt6_ex in bucket and free the memory
1467*4882a593Smuzhiyun * Caller must hold rt6_exception_lock
1468*4882a593Smuzhiyun */
rt6_exception_remove_oldest(struct rt6_exception_bucket * bucket)1469*4882a593Smuzhiyun static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1470*4882a593Smuzhiyun {
1471*4882a593Smuzhiyun struct rt6_exception *rt6_ex, *oldest = NULL;
1472*4882a593Smuzhiyun
1473*4882a593Smuzhiyun if (!bucket)
1474*4882a593Smuzhiyun return;
1475*4882a593Smuzhiyun
1476*4882a593Smuzhiyun hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1477*4882a593Smuzhiyun if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1478*4882a593Smuzhiyun oldest = rt6_ex;
1479*4882a593Smuzhiyun }
1480*4882a593Smuzhiyun rt6_remove_exception(bucket, oldest);
1481*4882a593Smuzhiyun }
1482*4882a593Smuzhiyun
rt6_exception_hash(const struct in6_addr * dst,const struct in6_addr * src)1483*4882a593Smuzhiyun static u32 rt6_exception_hash(const struct in6_addr *dst,
1484*4882a593Smuzhiyun const struct in6_addr *src)
1485*4882a593Smuzhiyun {
1486*4882a593Smuzhiyun static siphash_key_t rt6_exception_key __read_mostly;
1487*4882a593Smuzhiyun struct {
1488*4882a593Smuzhiyun struct in6_addr dst;
1489*4882a593Smuzhiyun struct in6_addr src;
1490*4882a593Smuzhiyun } __aligned(SIPHASH_ALIGNMENT) combined = {
1491*4882a593Smuzhiyun .dst = *dst,
1492*4882a593Smuzhiyun };
1493*4882a593Smuzhiyun u64 val;
1494*4882a593Smuzhiyun
1495*4882a593Smuzhiyun net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
1496*4882a593Smuzhiyun
1497*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1498*4882a593Smuzhiyun if (src)
1499*4882a593Smuzhiyun combined.src = *src;
1500*4882a593Smuzhiyun #endif
1501*4882a593Smuzhiyun val = siphash(&combined, sizeof(combined), &rt6_exception_key);
1502*4882a593Smuzhiyun
1503*4882a593Smuzhiyun return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1504*4882a593Smuzhiyun }
1505*4882a593Smuzhiyun
1506*4882a593Smuzhiyun /* Helper function to find the cached rt in the hash table
1507*4882a593Smuzhiyun * and update bucket pointer to point to the bucket for this
1508*4882a593Smuzhiyun * (daddr, saddr) pair
1509*4882a593Smuzhiyun * Caller must hold rt6_exception_lock
1510*4882a593Smuzhiyun */
1511*4882a593Smuzhiyun static struct rt6_exception *
__rt6_find_exception_spinlock(struct rt6_exception_bucket ** bucket,const struct in6_addr * daddr,const struct in6_addr * saddr)1512*4882a593Smuzhiyun __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1513*4882a593Smuzhiyun const struct in6_addr *daddr,
1514*4882a593Smuzhiyun const struct in6_addr *saddr)
1515*4882a593Smuzhiyun {
1516*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
1517*4882a593Smuzhiyun u32 hval;
1518*4882a593Smuzhiyun
1519*4882a593Smuzhiyun if (!(*bucket) || !daddr)
1520*4882a593Smuzhiyun return NULL;
1521*4882a593Smuzhiyun
1522*4882a593Smuzhiyun hval = rt6_exception_hash(daddr, saddr);
1523*4882a593Smuzhiyun *bucket += hval;
1524*4882a593Smuzhiyun
1525*4882a593Smuzhiyun hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1526*4882a593Smuzhiyun struct rt6_info *rt6 = rt6_ex->rt6i;
1527*4882a593Smuzhiyun bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1528*4882a593Smuzhiyun
1529*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1530*4882a593Smuzhiyun if (matched && saddr)
1531*4882a593Smuzhiyun matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1532*4882a593Smuzhiyun #endif
1533*4882a593Smuzhiyun if (matched)
1534*4882a593Smuzhiyun return rt6_ex;
1535*4882a593Smuzhiyun }
1536*4882a593Smuzhiyun return NULL;
1537*4882a593Smuzhiyun }
1538*4882a593Smuzhiyun
1539*4882a593Smuzhiyun /* Helper function to find the cached rt in the hash table
1540*4882a593Smuzhiyun * and update bucket pointer to point to the bucket for this
1541*4882a593Smuzhiyun * (daddr, saddr) pair
1542*4882a593Smuzhiyun * Caller must hold rcu_read_lock()
1543*4882a593Smuzhiyun */
1544*4882a593Smuzhiyun static struct rt6_exception *
__rt6_find_exception_rcu(struct rt6_exception_bucket ** bucket,const struct in6_addr * daddr,const struct in6_addr * saddr)1545*4882a593Smuzhiyun __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1546*4882a593Smuzhiyun const struct in6_addr *daddr,
1547*4882a593Smuzhiyun const struct in6_addr *saddr)
1548*4882a593Smuzhiyun {
1549*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
1550*4882a593Smuzhiyun u32 hval;
1551*4882a593Smuzhiyun
1552*4882a593Smuzhiyun WARN_ON_ONCE(!rcu_read_lock_held());
1553*4882a593Smuzhiyun
1554*4882a593Smuzhiyun if (!(*bucket) || !daddr)
1555*4882a593Smuzhiyun return NULL;
1556*4882a593Smuzhiyun
1557*4882a593Smuzhiyun hval = rt6_exception_hash(daddr, saddr);
1558*4882a593Smuzhiyun *bucket += hval;
1559*4882a593Smuzhiyun
1560*4882a593Smuzhiyun hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1561*4882a593Smuzhiyun struct rt6_info *rt6 = rt6_ex->rt6i;
1562*4882a593Smuzhiyun bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1563*4882a593Smuzhiyun
1564*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1565*4882a593Smuzhiyun if (matched && saddr)
1566*4882a593Smuzhiyun matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1567*4882a593Smuzhiyun #endif
1568*4882a593Smuzhiyun if (matched)
1569*4882a593Smuzhiyun return rt6_ex;
1570*4882a593Smuzhiyun }
1571*4882a593Smuzhiyun return NULL;
1572*4882a593Smuzhiyun }
1573*4882a593Smuzhiyun
fib6_mtu(const struct fib6_result * res)1574*4882a593Smuzhiyun static unsigned int fib6_mtu(const struct fib6_result *res)
1575*4882a593Smuzhiyun {
1576*4882a593Smuzhiyun const struct fib6_nh *nh = res->nh;
1577*4882a593Smuzhiyun unsigned int mtu;
1578*4882a593Smuzhiyun
1579*4882a593Smuzhiyun if (res->f6i->fib6_pmtu) {
1580*4882a593Smuzhiyun mtu = res->f6i->fib6_pmtu;
1581*4882a593Smuzhiyun } else {
1582*4882a593Smuzhiyun struct net_device *dev = nh->fib_nh_dev;
1583*4882a593Smuzhiyun struct inet6_dev *idev;
1584*4882a593Smuzhiyun
1585*4882a593Smuzhiyun rcu_read_lock();
1586*4882a593Smuzhiyun idev = __in6_dev_get(dev);
1587*4882a593Smuzhiyun mtu = idev->cnf.mtu6;
1588*4882a593Smuzhiyun rcu_read_unlock();
1589*4882a593Smuzhiyun }
1590*4882a593Smuzhiyun
1591*4882a593Smuzhiyun mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1592*4882a593Smuzhiyun
1593*4882a593Smuzhiyun return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1594*4882a593Smuzhiyun }
1595*4882a593Smuzhiyun
1596*4882a593Smuzhiyun #define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL
1597*4882a593Smuzhiyun
1598*4882a593Smuzhiyun /* used when the flushed bit is not relevant, only access to the bucket
1599*4882a593Smuzhiyun * (ie., all bucket users except rt6_insert_exception);
1600*4882a593Smuzhiyun *
1601*4882a593Smuzhiyun * called under rcu lock; sometimes called with rt6_exception_lock held
1602*4882a593Smuzhiyun */
1603*4882a593Smuzhiyun static
fib6_nh_get_excptn_bucket(const struct fib6_nh * nh,spinlock_t * lock)1604*4882a593Smuzhiyun struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
1605*4882a593Smuzhiyun spinlock_t *lock)
1606*4882a593Smuzhiyun {
1607*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
1608*4882a593Smuzhiyun
1609*4882a593Smuzhiyun if (lock)
1610*4882a593Smuzhiyun bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1611*4882a593Smuzhiyun lockdep_is_held(lock));
1612*4882a593Smuzhiyun else
1613*4882a593Smuzhiyun bucket = rcu_dereference(nh->rt6i_exception_bucket);
1614*4882a593Smuzhiyun
1615*4882a593Smuzhiyun /* remove bucket flushed bit if set */
1616*4882a593Smuzhiyun if (bucket) {
1617*4882a593Smuzhiyun unsigned long p = (unsigned long)bucket;
1618*4882a593Smuzhiyun
1619*4882a593Smuzhiyun p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
1620*4882a593Smuzhiyun bucket = (struct rt6_exception_bucket *)p;
1621*4882a593Smuzhiyun }
1622*4882a593Smuzhiyun
1623*4882a593Smuzhiyun return bucket;
1624*4882a593Smuzhiyun }
1625*4882a593Smuzhiyun
fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket * bucket)1626*4882a593Smuzhiyun static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
1627*4882a593Smuzhiyun {
1628*4882a593Smuzhiyun unsigned long p = (unsigned long)bucket;
1629*4882a593Smuzhiyun
1630*4882a593Smuzhiyun return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
1631*4882a593Smuzhiyun }
1632*4882a593Smuzhiyun
1633*4882a593Smuzhiyun /* called with rt6_exception_lock held */
fib6_nh_excptn_bucket_set_flushed(struct fib6_nh * nh,spinlock_t * lock)1634*4882a593Smuzhiyun static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
1635*4882a593Smuzhiyun spinlock_t *lock)
1636*4882a593Smuzhiyun {
1637*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
1638*4882a593Smuzhiyun unsigned long p;
1639*4882a593Smuzhiyun
1640*4882a593Smuzhiyun bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1641*4882a593Smuzhiyun lockdep_is_held(lock));
1642*4882a593Smuzhiyun
1643*4882a593Smuzhiyun p = (unsigned long)bucket;
1644*4882a593Smuzhiyun p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
1645*4882a593Smuzhiyun bucket = (struct rt6_exception_bucket *)p;
1646*4882a593Smuzhiyun rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1647*4882a593Smuzhiyun }
1648*4882a593Smuzhiyun
rt6_insert_exception(struct rt6_info * nrt,const struct fib6_result * res)1649*4882a593Smuzhiyun static int rt6_insert_exception(struct rt6_info *nrt,
1650*4882a593Smuzhiyun const struct fib6_result *res)
1651*4882a593Smuzhiyun {
1652*4882a593Smuzhiyun struct net *net = dev_net(nrt->dst.dev);
1653*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
1654*4882a593Smuzhiyun struct fib6_info *f6i = res->f6i;
1655*4882a593Smuzhiyun struct in6_addr *src_key = NULL;
1656*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
1657*4882a593Smuzhiyun struct fib6_nh *nh = res->nh;
1658*4882a593Smuzhiyun int max_depth;
1659*4882a593Smuzhiyun int err = 0;
1660*4882a593Smuzhiyun
1661*4882a593Smuzhiyun spin_lock_bh(&rt6_exception_lock);
1662*4882a593Smuzhiyun
1663*4882a593Smuzhiyun bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
1664*4882a593Smuzhiyun lockdep_is_held(&rt6_exception_lock));
1665*4882a593Smuzhiyun if (!bucket) {
1666*4882a593Smuzhiyun bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1667*4882a593Smuzhiyun GFP_ATOMIC);
1668*4882a593Smuzhiyun if (!bucket) {
1669*4882a593Smuzhiyun err = -ENOMEM;
1670*4882a593Smuzhiyun goto out;
1671*4882a593Smuzhiyun }
1672*4882a593Smuzhiyun rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
1673*4882a593Smuzhiyun } else if (fib6_nh_excptn_bucket_flushed(bucket)) {
1674*4882a593Smuzhiyun err = -EINVAL;
1675*4882a593Smuzhiyun goto out;
1676*4882a593Smuzhiyun }
1677*4882a593Smuzhiyun
1678*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1679*4882a593Smuzhiyun /* fib6_src.plen != 0 indicates f6i is in subtree
1680*4882a593Smuzhiyun * and exception table is indexed by a hash of
1681*4882a593Smuzhiyun * both fib6_dst and fib6_src.
1682*4882a593Smuzhiyun * Otherwise, the exception table is indexed by
1683*4882a593Smuzhiyun * a hash of only fib6_dst.
1684*4882a593Smuzhiyun */
1685*4882a593Smuzhiyun if (f6i->fib6_src.plen)
1686*4882a593Smuzhiyun src_key = &nrt->rt6i_src.addr;
1687*4882a593Smuzhiyun #endif
1688*4882a593Smuzhiyun /* rt6_mtu_change() might lower mtu on f6i.
1689*4882a593Smuzhiyun * Only insert this exception route if its mtu
1690*4882a593Smuzhiyun * is less than f6i's mtu value.
1691*4882a593Smuzhiyun */
1692*4882a593Smuzhiyun if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1693*4882a593Smuzhiyun err = -EINVAL;
1694*4882a593Smuzhiyun goto out;
1695*4882a593Smuzhiyun }
1696*4882a593Smuzhiyun
1697*4882a593Smuzhiyun rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1698*4882a593Smuzhiyun src_key);
1699*4882a593Smuzhiyun if (rt6_ex)
1700*4882a593Smuzhiyun rt6_remove_exception(bucket, rt6_ex);
1701*4882a593Smuzhiyun
1702*4882a593Smuzhiyun rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1703*4882a593Smuzhiyun if (!rt6_ex) {
1704*4882a593Smuzhiyun err = -ENOMEM;
1705*4882a593Smuzhiyun goto out;
1706*4882a593Smuzhiyun }
1707*4882a593Smuzhiyun rt6_ex->rt6i = nrt;
1708*4882a593Smuzhiyun rt6_ex->stamp = jiffies;
1709*4882a593Smuzhiyun hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1710*4882a593Smuzhiyun bucket->depth++;
1711*4882a593Smuzhiyun net->ipv6.rt6_stats->fib_rt_cache++;
1712*4882a593Smuzhiyun
1713*4882a593Smuzhiyun /* Randomize max depth to avoid some side channels attacks. */
1714*4882a593Smuzhiyun max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH);
1715*4882a593Smuzhiyun while (bucket->depth > max_depth)
1716*4882a593Smuzhiyun rt6_exception_remove_oldest(bucket);
1717*4882a593Smuzhiyun
1718*4882a593Smuzhiyun out:
1719*4882a593Smuzhiyun spin_unlock_bh(&rt6_exception_lock);
1720*4882a593Smuzhiyun
1721*4882a593Smuzhiyun /* Update fn->fn_sernum to invalidate all cached dst */
1722*4882a593Smuzhiyun if (!err) {
1723*4882a593Smuzhiyun spin_lock_bh(&f6i->fib6_table->tb6_lock);
1724*4882a593Smuzhiyun fib6_update_sernum(net, f6i);
1725*4882a593Smuzhiyun spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1726*4882a593Smuzhiyun fib6_force_start_gc(net);
1727*4882a593Smuzhiyun }
1728*4882a593Smuzhiyun
1729*4882a593Smuzhiyun return err;
1730*4882a593Smuzhiyun }
1731*4882a593Smuzhiyun
fib6_nh_flush_exceptions(struct fib6_nh * nh,struct fib6_info * from)1732*4882a593Smuzhiyun static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
1733*4882a593Smuzhiyun {
1734*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
1735*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
1736*4882a593Smuzhiyun struct hlist_node *tmp;
1737*4882a593Smuzhiyun int i;
1738*4882a593Smuzhiyun
1739*4882a593Smuzhiyun spin_lock_bh(&rt6_exception_lock);
1740*4882a593Smuzhiyun
1741*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1742*4882a593Smuzhiyun if (!bucket)
1743*4882a593Smuzhiyun goto out;
1744*4882a593Smuzhiyun
1745*4882a593Smuzhiyun /* Prevent rt6_insert_exception() to recreate the bucket list */
1746*4882a593Smuzhiyun if (!from)
1747*4882a593Smuzhiyun fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
1748*4882a593Smuzhiyun
1749*4882a593Smuzhiyun for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1750*4882a593Smuzhiyun hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
1751*4882a593Smuzhiyun if (!from ||
1752*4882a593Smuzhiyun rcu_access_pointer(rt6_ex->rt6i->from) == from)
1753*4882a593Smuzhiyun rt6_remove_exception(bucket, rt6_ex);
1754*4882a593Smuzhiyun }
1755*4882a593Smuzhiyun WARN_ON_ONCE(!from && bucket->depth);
1756*4882a593Smuzhiyun bucket++;
1757*4882a593Smuzhiyun }
1758*4882a593Smuzhiyun out:
1759*4882a593Smuzhiyun spin_unlock_bh(&rt6_exception_lock);
1760*4882a593Smuzhiyun }
1761*4882a593Smuzhiyun
rt6_nh_flush_exceptions(struct fib6_nh * nh,void * arg)1762*4882a593Smuzhiyun static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
1763*4882a593Smuzhiyun {
1764*4882a593Smuzhiyun struct fib6_info *f6i = arg;
1765*4882a593Smuzhiyun
1766*4882a593Smuzhiyun fib6_nh_flush_exceptions(nh, f6i);
1767*4882a593Smuzhiyun
1768*4882a593Smuzhiyun return 0;
1769*4882a593Smuzhiyun }
1770*4882a593Smuzhiyun
rt6_flush_exceptions(struct fib6_info * f6i)1771*4882a593Smuzhiyun void rt6_flush_exceptions(struct fib6_info *f6i)
1772*4882a593Smuzhiyun {
1773*4882a593Smuzhiyun if (f6i->nh)
1774*4882a593Smuzhiyun nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
1775*4882a593Smuzhiyun f6i);
1776*4882a593Smuzhiyun else
1777*4882a593Smuzhiyun fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
1778*4882a593Smuzhiyun }
1779*4882a593Smuzhiyun
1780*4882a593Smuzhiyun /* Find cached rt in the hash table inside passed in rt
1781*4882a593Smuzhiyun * Caller has to hold rcu_read_lock()
1782*4882a593Smuzhiyun */
rt6_find_cached_rt(const struct fib6_result * res,const struct in6_addr * daddr,const struct in6_addr * saddr)1783*4882a593Smuzhiyun static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1784*4882a593Smuzhiyun const struct in6_addr *daddr,
1785*4882a593Smuzhiyun const struct in6_addr *saddr)
1786*4882a593Smuzhiyun {
1787*4882a593Smuzhiyun const struct in6_addr *src_key = NULL;
1788*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
1789*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
1790*4882a593Smuzhiyun struct rt6_info *ret = NULL;
1791*4882a593Smuzhiyun
1792*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1793*4882a593Smuzhiyun /* fib6i_src.plen != 0 indicates f6i is in subtree
1794*4882a593Smuzhiyun * and exception table is indexed by a hash of
1795*4882a593Smuzhiyun * both fib6_dst and fib6_src.
1796*4882a593Smuzhiyun * However, the src addr used to create the hash
1797*4882a593Smuzhiyun * might not be exactly the passed in saddr which
1798*4882a593Smuzhiyun * is a /128 addr from the flow.
1799*4882a593Smuzhiyun * So we need to use f6i->fib6_src to redo lookup
1800*4882a593Smuzhiyun * if the passed in saddr does not find anything.
1801*4882a593Smuzhiyun * (See the logic in ip6_rt_cache_alloc() on how
1802*4882a593Smuzhiyun * rt->rt6i_src is updated.)
1803*4882a593Smuzhiyun */
1804*4882a593Smuzhiyun if (res->f6i->fib6_src.plen)
1805*4882a593Smuzhiyun src_key = saddr;
1806*4882a593Smuzhiyun find_ex:
1807*4882a593Smuzhiyun #endif
1808*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
1809*4882a593Smuzhiyun rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1810*4882a593Smuzhiyun
1811*4882a593Smuzhiyun if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1812*4882a593Smuzhiyun ret = rt6_ex->rt6i;
1813*4882a593Smuzhiyun
1814*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1815*4882a593Smuzhiyun /* Use fib6_src as src_key and redo lookup */
1816*4882a593Smuzhiyun if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1817*4882a593Smuzhiyun src_key = &res->f6i->fib6_src.addr;
1818*4882a593Smuzhiyun goto find_ex;
1819*4882a593Smuzhiyun }
1820*4882a593Smuzhiyun #endif
1821*4882a593Smuzhiyun
1822*4882a593Smuzhiyun return ret;
1823*4882a593Smuzhiyun }
1824*4882a593Smuzhiyun
1825*4882a593Smuzhiyun /* Remove the passed in cached rt from the hash table that contains it */
fib6_nh_remove_exception(const struct fib6_nh * nh,int plen,const struct rt6_info * rt)1826*4882a593Smuzhiyun static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
1827*4882a593Smuzhiyun const struct rt6_info *rt)
1828*4882a593Smuzhiyun {
1829*4882a593Smuzhiyun const struct in6_addr *src_key = NULL;
1830*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
1831*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
1832*4882a593Smuzhiyun int err;
1833*4882a593Smuzhiyun
1834*4882a593Smuzhiyun if (!rcu_access_pointer(nh->rt6i_exception_bucket))
1835*4882a593Smuzhiyun return -ENOENT;
1836*4882a593Smuzhiyun
1837*4882a593Smuzhiyun spin_lock_bh(&rt6_exception_lock);
1838*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
1839*4882a593Smuzhiyun
1840*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1841*4882a593Smuzhiyun /* rt6i_src.plen != 0 indicates 'from' is in subtree
1842*4882a593Smuzhiyun * and exception table is indexed by a hash of
1843*4882a593Smuzhiyun * both rt6i_dst and rt6i_src.
1844*4882a593Smuzhiyun * Otherwise, the exception table is indexed by
1845*4882a593Smuzhiyun * a hash of only rt6i_dst.
1846*4882a593Smuzhiyun */
1847*4882a593Smuzhiyun if (plen)
1848*4882a593Smuzhiyun src_key = &rt->rt6i_src.addr;
1849*4882a593Smuzhiyun #endif
1850*4882a593Smuzhiyun rt6_ex = __rt6_find_exception_spinlock(&bucket,
1851*4882a593Smuzhiyun &rt->rt6i_dst.addr,
1852*4882a593Smuzhiyun src_key);
1853*4882a593Smuzhiyun if (rt6_ex) {
1854*4882a593Smuzhiyun rt6_remove_exception(bucket, rt6_ex);
1855*4882a593Smuzhiyun err = 0;
1856*4882a593Smuzhiyun } else {
1857*4882a593Smuzhiyun err = -ENOENT;
1858*4882a593Smuzhiyun }
1859*4882a593Smuzhiyun
1860*4882a593Smuzhiyun spin_unlock_bh(&rt6_exception_lock);
1861*4882a593Smuzhiyun return err;
1862*4882a593Smuzhiyun }
1863*4882a593Smuzhiyun
1864*4882a593Smuzhiyun struct fib6_nh_excptn_arg {
1865*4882a593Smuzhiyun struct rt6_info *rt;
1866*4882a593Smuzhiyun int plen;
1867*4882a593Smuzhiyun };
1868*4882a593Smuzhiyun
rt6_nh_remove_exception_rt(struct fib6_nh * nh,void * _arg)1869*4882a593Smuzhiyun static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
1870*4882a593Smuzhiyun {
1871*4882a593Smuzhiyun struct fib6_nh_excptn_arg *arg = _arg;
1872*4882a593Smuzhiyun int err;
1873*4882a593Smuzhiyun
1874*4882a593Smuzhiyun err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
1875*4882a593Smuzhiyun if (err == 0)
1876*4882a593Smuzhiyun return 1;
1877*4882a593Smuzhiyun
1878*4882a593Smuzhiyun return 0;
1879*4882a593Smuzhiyun }
1880*4882a593Smuzhiyun
rt6_remove_exception_rt(struct rt6_info * rt)1881*4882a593Smuzhiyun static int rt6_remove_exception_rt(struct rt6_info *rt)
1882*4882a593Smuzhiyun {
1883*4882a593Smuzhiyun struct fib6_info *from;
1884*4882a593Smuzhiyun
1885*4882a593Smuzhiyun from = rcu_dereference(rt->from);
1886*4882a593Smuzhiyun if (!from || !(rt->rt6i_flags & RTF_CACHE))
1887*4882a593Smuzhiyun return -EINVAL;
1888*4882a593Smuzhiyun
1889*4882a593Smuzhiyun if (from->nh) {
1890*4882a593Smuzhiyun struct fib6_nh_excptn_arg arg = {
1891*4882a593Smuzhiyun .rt = rt,
1892*4882a593Smuzhiyun .plen = from->fib6_src.plen
1893*4882a593Smuzhiyun };
1894*4882a593Smuzhiyun int rc;
1895*4882a593Smuzhiyun
1896*4882a593Smuzhiyun /* rc = 1 means an entry was found */
1897*4882a593Smuzhiyun rc = nexthop_for_each_fib6_nh(from->nh,
1898*4882a593Smuzhiyun rt6_nh_remove_exception_rt,
1899*4882a593Smuzhiyun &arg);
1900*4882a593Smuzhiyun return rc ? 0 : -ENOENT;
1901*4882a593Smuzhiyun }
1902*4882a593Smuzhiyun
1903*4882a593Smuzhiyun return fib6_nh_remove_exception(from->fib6_nh,
1904*4882a593Smuzhiyun from->fib6_src.plen, rt);
1905*4882a593Smuzhiyun }
1906*4882a593Smuzhiyun
1907*4882a593Smuzhiyun /* Find rt6_ex which contains the passed in rt cache and
1908*4882a593Smuzhiyun * refresh its stamp
1909*4882a593Smuzhiyun */
fib6_nh_update_exception(const struct fib6_nh * nh,int plen,const struct rt6_info * rt)1910*4882a593Smuzhiyun static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
1911*4882a593Smuzhiyun const struct rt6_info *rt)
1912*4882a593Smuzhiyun {
1913*4882a593Smuzhiyun const struct in6_addr *src_key = NULL;
1914*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
1915*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
1916*4882a593Smuzhiyun
1917*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(nh, NULL);
1918*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1919*4882a593Smuzhiyun /* rt6i_src.plen != 0 indicates 'from' is in subtree
1920*4882a593Smuzhiyun * and exception table is indexed by a hash of
1921*4882a593Smuzhiyun * both rt6i_dst and rt6i_src.
1922*4882a593Smuzhiyun * Otherwise, the exception table is indexed by
1923*4882a593Smuzhiyun * a hash of only rt6i_dst.
1924*4882a593Smuzhiyun */
1925*4882a593Smuzhiyun if (plen)
1926*4882a593Smuzhiyun src_key = &rt->rt6i_src.addr;
1927*4882a593Smuzhiyun #endif
1928*4882a593Smuzhiyun rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
1929*4882a593Smuzhiyun if (rt6_ex)
1930*4882a593Smuzhiyun rt6_ex->stamp = jiffies;
1931*4882a593Smuzhiyun }
1932*4882a593Smuzhiyun
1933*4882a593Smuzhiyun struct fib6_nh_match_arg {
1934*4882a593Smuzhiyun const struct net_device *dev;
1935*4882a593Smuzhiyun const struct in6_addr *gw;
1936*4882a593Smuzhiyun struct fib6_nh *match;
1937*4882a593Smuzhiyun };
1938*4882a593Smuzhiyun
1939*4882a593Smuzhiyun /* determine if fib6_nh has given device and gateway */
fib6_nh_find_match(struct fib6_nh * nh,void * _arg)1940*4882a593Smuzhiyun static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
1941*4882a593Smuzhiyun {
1942*4882a593Smuzhiyun struct fib6_nh_match_arg *arg = _arg;
1943*4882a593Smuzhiyun
1944*4882a593Smuzhiyun if (arg->dev != nh->fib_nh_dev ||
1945*4882a593Smuzhiyun (arg->gw && !nh->fib_nh_gw_family) ||
1946*4882a593Smuzhiyun (!arg->gw && nh->fib_nh_gw_family) ||
1947*4882a593Smuzhiyun (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
1948*4882a593Smuzhiyun return 0;
1949*4882a593Smuzhiyun
1950*4882a593Smuzhiyun arg->match = nh;
1951*4882a593Smuzhiyun
1952*4882a593Smuzhiyun /* found a match, break the loop */
1953*4882a593Smuzhiyun return 1;
1954*4882a593Smuzhiyun }
1955*4882a593Smuzhiyun
rt6_update_exception_stamp_rt(struct rt6_info * rt)1956*4882a593Smuzhiyun static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1957*4882a593Smuzhiyun {
1958*4882a593Smuzhiyun struct fib6_info *from;
1959*4882a593Smuzhiyun struct fib6_nh *fib6_nh;
1960*4882a593Smuzhiyun
1961*4882a593Smuzhiyun rcu_read_lock();
1962*4882a593Smuzhiyun
1963*4882a593Smuzhiyun from = rcu_dereference(rt->from);
1964*4882a593Smuzhiyun if (!from || !(rt->rt6i_flags & RTF_CACHE))
1965*4882a593Smuzhiyun goto unlock;
1966*4882a593Smuzhiyun
1967*4882a593Smuzhiyun if (from->nh) {
1968*4882a593Smuzhiyun struct fib6_nh_match_arg arg = {
1969*4882a593Smuzhiyun .dev = rt->dst.dev,
1970*4882a593Smuzhiyun .gw = &rt->rt6i_gateway,
1971*4882a593Smuzhiyun };
1972*4882a593Smuzhiyun
1973*4882a593Smuzhiyun nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
1974*4882a593Smuzhiyun
1975*4882a593Smuzhiyun if (!arg.match)
1976*4882a593Smuzhiyun goto unlock;
1977*4882a593Smuzhiyun fib6_nh = arg.match;
1978*4882a593Smuzhiyun } else {
1979*4882a593Smuzhiyun fib6_nh = from->fib6_nh;
1980*4882a593Smuzhiyun }
1981*4882a593Smuzhiyun fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
1982*4882a593Smuzhiyun unlock:
1983*4882a593Smuzhiyun rcu_read_unlock();
1984*4882a593Smuzhiyun }
1985*4882a593Smuzhiyun
rt6_mtu_change_route_allowed(struct inet6_dev * idev,struct rt6_info * rt,int mtu)1986*4882a593Smuzhiyun static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1987*4882a593Smuzhiyun struct rt6_info *rt, int mtu)
1988*4882a593Smuzhiyun {
1989*4882a593Smuzhiyun /* If the new MTU is lower than the route PMTU, this new MTU will be the
1990*4882a593Smuzhiyun * lowest MTU in the path: always allow updating the route PMTU to
1991*4882a593Smuzhiyun * reflect PMTU decreases.
1992*4882a593Smuzhiyun *
1993*4882a593Smuzhiyun * If the new MTU is higher, and the route PMTU is equal to the local
1994*4882a593Smuzhiyun * MTU, this means the old MTU is the lowest in the path, so allow
1995*4882a593Smuzhiyun * updating it: if other nodes now have lower MTUs, PMTU discovery will
1996*4882a593Smuzhiyun * handle this.
1997*4882a593Smuzhiyun */
1998*4882a593Smuzhiyun
1999*4882a593Smuzhiyun if (dst_mtu(&rt->dst) >= mtu)
2000*4882a593Smuzhiyun return true;
2001*4882a593Smuzhiyun
2002*4882a593Smuzhiyun if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
2003*4882a593Smuzhiyun return true;
2004*4882a593Smuzhiyun
2005*4882a593Smuzhiyun return false;
2006*4882a593Smuzhiyun }
2007*4882a593Smuzhiyun
rt6_exceptions_update_pmtu(struct inet6_dev * idev,const struct fib6_nh * nh,int mtu)2008*4882a593Smuzhiyun static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
2009*4882a593Smuzhiyun const struct fib6_nh *nh, int mtu)
2010*4882a593Smuzhiyun {
2011*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
2012*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
2013*4882a593Smuzhiyun int i;
2014*4882a593Smuzhiyun
2015*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2016*4882a593Smuzhiyun if (!bucket)
2017*4882a593Smuzhiyun return;
2018*4882a593Smuzhiyun
2019*4882a593Smuzhiyun for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2020*4882a593Smuzhiyun hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
2021*4882a593Smuzhiyun struct rt6_info *entry = rt6_ex->rt6i;
2022*4882a593Smuzhiyun
2023*4882a593Smuzhiyun /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
2024*4882a593Smuzhiyun * route), the metrics of its rt->from have already
2025*4882a593Smuzhiyun * been updated.
2026*4882a593Smuzhiyun */
2027*4882a593Smuzhiyun if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
2028*4882a593Smuzhiyun rt6_mtu_change_route_allowed(idev, entry, mtu))
2029*4882a593Smuzhiyun dst_metric_set(&entry->dst, RTAX_MTU, mtu);
2030*4882a593Smuzhiyun }
2031*4882a593Smuzhiyun bucket++;
2032*4882a593Smuzhiyun }
2033*4882a593Smuzhiyun }
2034*4882a593Smuzhiyun
2035*4882a593Smuzhiyun #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2036*4882a593Smuzhiyun
fib6_nh_exceptions_clean_tohost(const struct fib6_nh * nh,const struct in6_addr * gateway)2037*4882a593Smuzhiyun static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
2038*4882a593Smuzhiyun const struct in6_addr *gateway)
2039*4882a593Smuzhiyun {
2040*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
2041*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
2042*4882a593Smuzhiyun struct hlist_node *tmp;
2043*4882a593Smuzhiyun int i;
2044*4882a593Smuzhiyun
2045*4882a593Smuzhiyun if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2046*4882a593Smuzhiyun return;
2047*4882a593Smuzhiyun
2048*4882a593Smuzhiyun spin_lock_bh(&rt6_exception_lock);
2049*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2050*4882a593Smuzhiyun if (bucket) {
2051*4882a593Smuzhiyun for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2052*4882a593Smuzhiyun hlist_for_each_entry_safe(rt6_ex, tmp,
2053*4882a593Smuzhiyun &bucket->chain, hlist) {
2054*4882a593Smuzhiyun struct rt6_info *entry = rt6_ex->rt6i;
2055*4882a593Smuzhiyun
2056*4882a593Smuzhiyun if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
2057*4882a593Smuzhiyun RTF_CACHE_GATEWAY &&
2058*4882a593Smuzhiyun ipv6_addr_equal(gateway,
2059*4882a593Smuzhiyun &entry->rt6i_gateway)) {
2060*4882a593Smuzhiyun rt6_remove_exception(bucket, rt6_ex);
2061*4882a593Smuzhiyun }
2062*4882a593Smuzhiyun }
2063*4882a593Smuzhiyun bucket++;
2064*4882a593Smuzhiyun }
2065*4882a593Smuzhiyun }
2066*4882a593Smuzhiyun
2067*4882a593Smuzhiyun spin_unlock_bh(&rt6_exception_lock);
2068*4882a593Smuzhiyun }
2069*4882a593Smuzhiyun
rt6_age_examine_exception(struct rt6_exception_bucket * bucket,struct rt6_exception * rt6_ex,struct fib6_gc_args * gc_args,unsigned long now)2070*4882a593Smuzhiyun static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
2071*4882a593Smuzhiyun struct rt6_exception *rt6_ex,
2072*4882a593Smuzhiyun struct fib6_gc_args *gc_args,
2073*4882a593Smuzhiyun unsigned long now)
2074*4882a593Smuzhiyun {
2075*4882a593Smuzhiyun struct rt6_info *rt = rt6_ex->rt6i;
2076*4882a593Smuzhiyun
2077*4882a593Smuzhiyun /* we are pruning and obsoleting aged-out and non gateway exceptions
2078*4882a593Smuzhiyun * even if others have still references to them, so that on next
2079*4882a593Smuzhiyun * dst_check() such references can be dropped.
2080*4882a593Smuzhiyun * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
2081*4882a593Smuzhiyun * expired, independently from their aging, as per RFC 8201 section 4
2082*4882a593Smuzhiyun */
2083*4882a593Smuzhiyun if (!(rt->rt6i_flags & RTF_EXPIRES)) {
2084*4882a593Smuzhiyun if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
2085*4882a593Smuzhiyun RT6_TRACE("aging clone %p\n", rt);
2086*4882a593Smuzhiyun rt6_remove_exception(bucket, rt6_ex);
2087*4882a593Smuzhiyun return;
2088*4882a593Smuzhiyun }
2089*4882a593Smuzhiyun } else if (time_after(jiffies, rt->dst.expires)) {
2090*4882a593Smuzhiyun RT6_TRACE("purging expired route %p\n", rt);
2091*4882a593Smuzhiyun rt6_remove_exception(bucket, rt6_ex);
2092*4882a593Smuzhiyun return;
2093*4882a593Smuzhiyun }
2094*4882a593Smuzhiyun
2095*4882a593Smuzhiyun if (rt->rt6i_flags & RTF_GATEWAY) {
2096*4882a593Smuzhiyun struct neighbour *neigh;
2097*4882a593Smuzhiyun __u8 neigh_flags = 0;
2098*4882a593Smuzhiyun
2099*4882a593Smuzhiyun neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
2100*4882a593Smuzhiyun if (neigh)
2101*4882a593Smuzhiyun neigh_flags = neigh->flags;
2102*4882a593Smuzhiyun
2103*4882a593Smuzhiyun if (!(neigh_flags & NTF_ROUTER)) {
2104*4882a593Smuzhiyun RT6_TRACE("purging route %p via non-router but gateway\n",
2105*4882a593Smuzhiyun rt);
2106*4882a593Smuzhiyun rt6_remove_exception(bucket, rt6_ex);
2107*4882a593Smuzhiyun return;
2108*4882a593Smuzhiyun }
2109*4882a593Smuzhiyun }
2110*4882a593Smuzhiyun
2111*4882a593Smuzhiyun gc_args->more++;
2112*4882a593Smuzhiyun }
2113*4882a593Smuzhiyun
fib6_nh_age_exceptions(const struct fib6_nh * nh,struct fib6_gc_args * gc_args,unsigned long now)2114*4882a593Smuzhiyun static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
2115*4882a593Smuzhiyun struct fib6_gc_args *gc_args,
2116*4882a593Smuzhiyun unsigned long now)
2117*4882a593Smuzhiyun {
2118*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
2119*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
2120*4882a593Smuzhiyun struct hlist_node *tmp;
2121*4882a593Smuzhiyun int i;
2122*4882a593Smuzhiyun
2123*4882a593Smuzhiyun if (!rcu_access_pointer(nh->rt6i_exception_bucket))
2124*4882a593Smuzhiyun return;
2125*4882a593Smuzhiyun
2126*4882a593Smuzhiyun rcu_read_lock_bh();
2127*4882a593Smuzhiyun spin_lock(&rt6_exception_lock);
2128*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
2129*4882a593Smuzhiyun if (bucket) {
2130*4882a593Smuzhiyun for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
2131*4882a593Smuzhiyun hlist_for_each_entry_safe(rt6_ex, tmp,
2132*4882a593Smuzhiyun &bucket->chain, hlist) {
2133*4882a593Smuzhiyun rt6_age_examine_exception(bucket, rt6_ex,
2134*4882a593Smuzhiyun gc_args, now);
2135*4882a593Smuzhiyun }
2136*4882a593Smuzhiyun bucket++;
2137*4882a593Smuzhiyun }
2138*4882a593Smuzhiyun }
2139*4882a593Smuzhiyun spin_unlock(&rt6_exception_lock);
2140*4882a593Smuzhiyun rcu_read_unlock_bh();
2141*4882a593Smuzhiyun }
2142*4882a593Smuzhiyun
2143*4882a593Smuzhiyun struct fib6_nh_age_excptn_arg {
2144*4882a593Smuzhiyun struct fib6_gc_args *gc_args;
2145*4882a593Smuzhiyun unsigned long now;
2146*4882a593Smuzhiyun };
2147*4882a593Smuzhiyun
rt6_nh_age_exceptions(struct fib6_nh * nh,void * _arg)2148*4882a593Smuzhiyun static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
2149*4882a593Smuzhiyun {
2150*4882a593Smuzhiyun struct fib6_nh_age_excptn_arg *arg = _arg;
2151*4882a593Smuzhiyun
2152*4882a593Smuzhiyun fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
2153*4882a593Smuzhiyun return 0;
2154*4882a593Smuzhiyun }
2155*4882a593Smuzhiyun
rt6_age_exceptions(struct fib6_info * f6i,struct fib6_gc_args * gc_args,unsigned long now)2156*4882a593Smuzhiyun void rt6_age_exceptions(struct fib6_info *f6i,
2157*4882a593Smuzhiyun struct fib6_gc_args *gc_args,
2158*4882a593Smuzhiyun unsigned long now)
2159*4882a593Smuzhiyun {
2160*4882a593Smuzhiyun if (f6i->nh) {
2161*4882a593Smuzhiyun struct fib6_nh_age_excptn_arg arg = {
2162*4882a593Smuzhiyun .gc_args = gc_args,
2163*4882a593Smuzhiyun .now = now
2164*4882a593Smuzhiyun };
2165*4882a593Smuzhiyun
2166*4882a593Smuzhiyun nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
2167*4882a593Smuzhiyun &arg);
2168*4882a593Smuzhiyun } else {
2169*4882a593Smuzhiyun fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
2170*4882a593Smuzhiyun }
2171*4882a593Smuzhiyun }
2172*4882a593Smuzhiyun
2173*4882a593Smuzhiyun /* must be called with rcu lock held */
fib6_table_lookup(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,struct fib6_result * res,int strict)2174*4882a593Smuzhiyun int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
2175*4882a593Smuzhiyun struct flowi6 *fl6, struct fib6_result *res, int strict)
2176*4882a593Smuzhiyun {
2177*4882a593Smuzhiyun struct fib6_node *fn, *saved_fn;
2178*4882a593Smuzhiyun
2179*4882a593Smuzhiyun fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2180*4882a593Smuzhiyun saved_fn = fn;
2181*4882a593Smuzhiyun
2182*4882a593Smuzhiyun if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2183*4882a593Smuzhiyun oif = 0;
2184*4882a593Smuzhiyun
2185*4882a593Smuzhiyun redo_rt6_select:
2186*4882a593Smuzhiyun rt6_select(net, fn, oif, res, strict);
2187*4882a593Smuzhiyun if (res->f6i == net->ipv6.fib6_null_entry) {
2188*4882a593Smuzhiyun fn = fib6_backtrack(fn, &fl6->saddr);
2189*4882a593Smuzhiyun if (fn)
2190*4882a593Smuzhiyun goto redo_rt6_select;
2191*4882a593Smuzhiyun else if (strict & RT6_LOOKUP_F_REACHABLE) {
2192*4882a593Smuzhiyun /* also consider unreachable route */
2193*4882a593Smuzhiyun strict &= ~RT6_LOOKUP_F_REACHABLE;
2194*4882a593Smuzhiyun fn = saved_fn;
2195*4882a593Smuzhiyun goto redo_rt6_select;
2196*4882a593Smuzhiyun }
2197*4882a593Smuzhiyun }
2198*4882a593Smuzhiyun
2199*4882a593Smuzhiyun trace_fib6_table_lookup(net, res, table, fl6);
2200*4882a593Smuzhiyun
2201*4882a593Smuzhiyun return 0;
2202*4882a593Smuzhiyun }
2203*4882a593Smuzhiyun
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,const struct sk_buff * skb,int flags)2204*4882a593Smuzhiyun struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
2205*4882a593Smuzhiyun int oif, struct flowi6 *fl6,
2206*4882a593Smuzhiyun const struct sk_buff *skb, int flags)
2207*4882a593Smuzhiyun {
2208*4882a593Smuzhiyun struct fib6_result res = {};
2209*4882a593Smuzhiyun struct rt6_info *rt = NULL;
2210*4882a593Smuzhiyun int strict = 0;
2211*4882a593Smuzhiyun
2212*4882a593Smuzhiyun WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
2213*4882a593Smuzhiyun !rcu_read_lock_held());
2214*4882a593Smuzhiyun
2215*4882a593Smuzhiyun strict |= flags & RT6_LOOKUP_F_IFACE;
2216*4882a593Smuzhiyun strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
2217*4882a593Smuzhiyun if (net->ipv6.devconf_all->forwarding == 0)
2218*4882a593Smuzhiyun strict |= RT6_LOOKUP_F_REACHABLE;
2219*4882a593Smuzhiyun
2220*4882a593Smuzhiyun rcu_read_lock();
2221*4882a593Smuzhiyun
2222*4882a593Smuzhiyun fib6_table_lookup(net, table, oif, fl6, &res, strict);
2223*4882a593Smuzhiyun if (res.f6i == net->ipv6.fib6_null_entry)
2224*4882a593Smuzhiyun goto out;
2225*4882a593Smuzhiyun
2226*4882a593Smuzhiyun fib6_select_path(net, &res, fl6, oif, false, skb, strict);
2227*4882a593Smuzhiyun
2228*4882a593Smuzhiyun /*Search through exception table */
2229*4882a593Smuzhiyun rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
2230*4882a593Smuzhiyun if (rt) {
2231*4882a593Smuzhiyun goto out;
2232*4882a593Smuzhiyun } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
2233*4882a593Smuzhiyun !res.nh->fib_nh_gw_family)) {
2234*4882a593Smuzhiyun /* Create a RTF_CACHE clone which will not be
2235*4882a593Smuzhiyun * owned by the fib6 tree. It is for the special case where
2236*4882a593Smuzhiyun * the daddr in the skb during the neighbor look-up is different
2237*4882a593Smuzhiyun * from the fl6->daddr used to look-up route here.
2238*4882a593Smuzhiyun */
2239*4882a593Smuzhiyun rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
2240*4882a593Smuzhiyun
2241*4882a593Smuzhiyun if (rt) {
2242*4882a593Smuzhiyun /* 1 refcnt is taken during ip6_rt_cache_alloc().
2243*4882a593Smuzhiyun * As rt6_uncached_list_add() does not consume refcnt,
2244*4882a593Smuzhiyun * this refcnt is always returned to the caller even
2245*4882a593Smuzhiyun * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
2246*4882a593Smuzhiyun */
2247*4882a593Smuzhiyun rt6_uncached_list_add(rt);
2248*4882a593Smuzhiyun atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2249*4882a593Smuzhiyun rcu_read_unlock();
2250*4882a593Smuzhiyun
2251*4882a593Smuzhiyun return rt;
2252*4882a593Smuzhiyun }
2253*4882a593Smuzhiyun } else {
2254*4882a593Smuzhiyun /* Get a percpu copy */
2255*4882a593Smuzhiyun local_bh_disable();
2256*4882a593Smuzhiyun rt = rt6_get_pcpu_route(&res);
2257*4882a593Smuzhiyun
2258*4882a593Smuzhiyun if (!rt)
2259*4882a593Smuzhiyun rt = rt6_make_pcpu_route(net, &res);
2260*4882a593Smuzhiyun
2261*4882a593Smuzhiyun local_bh_enable();
2262*4882a593Smuzhiyun }
2263*4882a593Smuzhiyun out:
2264*4882a593Smuzhiyun if (!rt)
2265*4882a593Smuzhiyun rt = net->ipv6.ip6_null_entry;
2266*4882a593Smuzhiyun if (!(flags & RT6_LOOKUP_F_DST_NOREF))
2267*4882a593Smuzhiyun ip6_hold_safe(net, &rt);
2268*4882a593Smuzhiyun rcu_read_unlock();
2269*4882a593Smuzhiyun
2270*4882a593Smuzhiyun return rt;
2271*4882a593Smuzhiyun }
2272*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_pol_route);
2273*4882a593Smuzhiyun
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,const struct sk_buff * skb,int flags)2274*4882a593Smuzhiyun INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_input(struct net *net,
2275*4882a593Smuzhiyun struct fib6_table *table,
2276*4882a593Smuzhiyun struct flowi6 *fl6,
2277*4882a593Smuzhiyun const struct sk_buff *skb,
2278*4882a593Smuzhiyun int flags)
2279*4882a593Smuzhiyun {
2280*4882a593Smuzhiyun return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
2281*4882a593Smuzhiyun }
2282*4882a593Smuzhiyun
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,const struct sk_buff * skb,int flags)2283*4882a593Smuzhiyun struct dst_entry *ip6_route_input_lookup(struct net *net,
2284*4882a593Smuzhiyun struct net_device *dev,
2285*4882a593Smuzhiyun struct flowi6 *fl6,
2286*4882a593Smuzhiyun const struct sk_buff *skb,
2287*4882a593Smuzhiyun int flags)
2288*4882a593Smuzhiyun {
2289*4882a593Smuzhiyun if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
2290*4882a593Smuzhiyun flags |= RT6_LOOKUP_F_IFACE;
2291*4882a593Smuzhiyun
2292*4882a593Smuzhiyun return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
2293*4882a593Smuzhiyun }
2294*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
2295*4882a593Smuzhiyun
ip6_multipath_l3_keys(const struct sk_buff * skb,struct flow_keys * keys,struct flow_keys * flkeys)2296*4882a593Smuzhiyun static void ip6_multipath_l3_keys(const struct sk_buff *skb,
2297*4882a593Smuzhiyun struct flow_keys *keys,
2298*4882a593Smuzhiyun struct flow_keys *flkeys)
2299*4882a593Smuzhiyun {
2300*4882a593Smuzhiyun const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2301*4882a593Smuzhiyun const struct ipv6hdr *key_iph = outer_iph;
2302*4882a593Smuzhiyun struct flow_keys *_flkeys = flkeys;
2303*4882a593Smuzhiyun const struct ipv6hdr *inner_iph;
2304*4882a593Smuzhiyun const struct icmp6hdr *icmph;
2305*4882a593Smuzhiyun struct ipv6hdr _inner_iph;
2306*4882a593Smuzhiyun struct icmp6hdr _icmph;
2307*4882a593Smuzhiyun
2308*4882a593Smuzhiyun if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2309*4882a593Smuzhiyun goto out;
2310*4882a593Smuzhiyun
2311*4882a593Smuzhiyun icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2312*4882a593Smuzhiyun sizeof(_icmph), &_icmph);
2313*4882a593Smuzhiyun if (!icmph)
2314*4882a593Smuzhiyun goto out;
2315*4882a593Smuzhiyun
2316*4882a593Smuzhiyun if (!icmpv6_is_err(icmph->icmp6_type))
2317*4882a593Smuzhiyun goto out;
2318*4882a593Smuzhiyun
2319*4882a593Smuzhiyun inner_iph = skb_header_pointer(skb,
2320*4882a593Smuzhiyun skb_transport_offset(skb) + sizeof(*icmph),
2321*4882a593Smuzhiyun sizeof(_inner_iph), &_inner_iph);
2322*4882a593Smuzhiyun if (!inner_iph)
2323*4882a593Smuzhiyun goto out;
2324*4882a593Smuzhiyun
2325*4882a593Smuzhiyun key_iph = inner_iph;
2326*4882a593Smuzhiyun _flkeys = NULL;
2327*4882a593Smuzhiyun out:
2328*4882a593Smuzhiyun if (_flkeys) {
2329*4882a593Smuzhiyun keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2330*4882a593Smuzhiyun keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2331*4882a593Smuzhiyun keys->tags.flow_label = _flkeys->tags.flow_label;
2332*4882a593Smuzhiyun keys->basic.ip_proto = _flkeys->basic.ip_proto;
2333*4882a593Smuzhiyun } else {
2334*4882a593Smuzhiyun keys->addrs.v6addrs.src = key_iph->saddr;
2335*4882a593Smuzhiyun keys->addrs.v6addrs.dst = key_iph->daddr;
2336*4882a593Smuzhiyun keys->tags.flow_label = ip6_flowlabel(key_iph);
2337*4882a593Smuzhiyun keys->basic.ip_proto = key_iph->nexthdr;
2338*4882a593Smuzhiyun }
2339*4882a593Smuzhiyun }
2340*4882a593Smuzhiyun
2341*4882a593Smuzhiyun /* if skb is set it will be used and fl6 can be NULL */
rt6_multipath_hash(const struct net * net,const struct flowi6 * fl6,const struct sk_buff * skb,struct flow_keys * flkeys)2342*4882a593Smuzhiyun u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2343*4882a593Smuzhiyun const struct sk_buff *skb, struct flow_keys *flkeys)
2344*4882a593Smuzhiyun {
2345*4882a593Smuzhiyun struct flow_keys hash_keys;
2346*4882a593Smuzhiyun u32 mhash;
2347*4882a593Smuzhiyun
2348*4882a593Smuzhiyun switch (ip6_multipath_hash_policy(net)) {
2349*4882a593Smuzhiyun case 0:
2350*4882a593Smuzhiyun memset(&hash_keys, 0, sizeof(hash_keys));
2351*4882a593Smuzhiyun hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2352*4882a593Smuzhiyun if (skb) {
2353*4882a593Smuzhiyun ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2354*4882a593Smuzhiyun } else {
2355*4882a593Smuzhiyun hash_keys.addrs.v6addrs.src = fl6->saddr;
2356*4882a593Smuzhiyun hash_keys.addrs.v6addrs.dst = fl6->daddr;
2357*4882a593Smuzhiyun hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2358*4882a593Smuzhiyun hash_keys.basic.ip_proto = fl6->flowi6_proto;
2359*4882a593Smuzhiyun }
2360*4882a593Smuzhiyun break;
2361*4882a593Smuzhiyun case 1:
2362*4882a593Smuzhiyun if (skb) {
2363*4882a593Smuzhiyun unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2364*4882a593Smuzhiyun struct flow_keys keys;
2365*4882a593Smuzhiyun
2366*4882a593Smuzhiyun /* short-circuit if we already have L4 hash present */
2367*4882a593Smuzhiyun if (skb->l4_hash)
2368*4882a593Smuzhiyun return skb_get_hash_raw(skb) >> 1;
2369*4882a593Smuzhiyun
2370*4882a593Smuzhiyun memset(&hash_keys, 0, sizeof(hash_keys));
2371*4882a593Smuzhiyun
2372*4882a593Smuzhiyun if (!flkeys) {
2373*4882a593Smuzhiyun skb_flow_dissect_flow_keys(skb, &keys, flag);
2374*4882a593Smuzhiyun flkeys = &keys;
2375*4882a593Smuzhiyun }
2376*4882a593Smuzhiyun hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2377*4882a593Smuzhiyun hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2378*4882a593Smuzhiyun hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2379*4882a593Smuzhiyun hash_keys.ports.src = flkeys->ports.src;
2380*4882a593Smuzhiyun hash_keys.ports.dst = flkeys->ports.dst;
2381*4882a593Smuzhiyun hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2382*4882a593Smuzhiyun } else {
2383*4882a593Smuzhiyun memset(&hash_keys, 0, sizeof(hash_keys));
2384*4882a593Smuzhiyun hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2385*4882a593Smuzhiyun hash_keys.addrs.v6addrs.src = fl6->saddr;
2386*4882a593Smuzhiyun hash_keys.addrs.v6addrs.dst = fl6->daddr;
2387*4882a593Smuzhiyun hash_keys.ports.src = fl6->fl6_sport;
2388*4882a593Smuzhiyun hash_keys.ports.dst = fl6->fl6_dport;
2389*4882a593Smuzhiyun hash_keys.basic.ip_proto = fl6->flowi6_proto;
2390*4882a593Smuzhiyun }
2391*4882a593Smuzhiyun break;
2392*4882a593Smuzhiyun case 2:
2393*4882a593Smuzhiyun memset(&hash_keys, 0, sizeof(hash_keys));
2394*4882a593Smuzhiyun hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2395*4882a593Smuzhiyun if (skb) {
2396*4882a593Smuzhiyun struct flow_keys keys;
2397*4882a593Smuzhiyun
2398*4882a593Smuzhiyun if (!flkeys) {
2399*4882a593Smuzhiyun skb_flow_dissect_flow_keys(skb, &keys, 0);
2400*4882a593Smuzhiyun flkeys = &keys;
2401*4882a593Smuzhiyun }
2402*4882a593Smuzhiyun
2403*4882a593Smuzhiyun /* Inner can be v4 or v6 */
2404*4882a593Smuzhiyun if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2405*4882a593Smuzhiyun hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
2406*4882a593Smuzhiyun hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
2407*4882a593Smuzhiyun hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
2408*4882a593Smuzhiyun } else if (flkeys->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2409*4882a593Smuzhiyun hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2410*4882a593Smuzhiyun hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2411*4882a593Smuzhiyun hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2412*4882a593Smuzhiyun hash_keys.tags.flow_label = flkeys->tags.flow_label;
2413*4882a593Smuzhiyun hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2414*4882a593Smuzhiyun } else {
2415*4882a593Smuzhiyun /* Same as case 0 */
2416*4882a593Smuzhiyun hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2417*4882a593Smuzhiyun ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2418*4882a593Smuzhiyun }
2419*4882a593Smuzhiyun } else {
2420*4882a593Smuzhiyun /* Same as case 0 */
2421*4882a593Smuzhiyun hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2422*4882a593Smuzhiyun hash_keys.addrs.v6addrs.src = fl6->saddr;
2423*4882a593Smuzhiyun hash_keys.addrs.v6addrs.dst = fl6->daddr;
2424*4882a593Smuzhiyun hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2425*4882a593Smuzhiyun hash_keys.basic.ip_proto = fl6->flowi6_proto;
2426*4882a593Smuzhiyun }
2427*4882a593Smuzhiyun break;
2428*4882a593Smuzhiyun }
2429*4882a593Smuzhiyun mhash = flow_hash_from_keys(&hash_keys);
2430*4882a593Smuzhiyun
2431*4882a593Smuzhiyun return mhash >> 1;
2432*4882a593Smuzhiyun }
2433*4882a593Smuzhiyun
2434*4882a593Smuzhiyun /* Called with rcu held */
ip6_route_input(struct sk_buff * skb)2435*4882a593Smuzhiyun void ip6_route_input(struct sk_buff *skb)
2436*4882a593Smuzhiyun {
2437*4882a593Smuzhiyun const struct ipv6hdr *iph = ipv6_hdr(skb);
2438*4882a593Smuzhiyun struct net *net = dev_net(skb->dev);
2439*4882a593Smuzhiyun int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
2440*4882a593Smuzhiyun struct ip_tunnel_info *tun_info;
2441*4882a593Smuzhiyun struct flowi6 fl6 = {
2442*4882a593Smuzhiyun .flowi6_iif = skb->dev->ifindex,
2443*4882a593Smuzhiyun .daddr = iph->daddr,
2444*4882a593Smuzhiyun .saddr = iph->saddr,
2445*4882a593Smuzhiyun .flowlabel = ip6_flowinfo(iph),
2446*4882a593Smuzhiyun .flowi6_mark = skb->mark,
2447*4882a593Smuzhiyun .flowi6_proto = iph->nexthdr,
2448*4882a593Smuzhiyun };
2449*4882a593Smuzhiyun struct flow_keys *flkeys = NULL, _flkeys;
2450*4882a593Smuzhiyun
2451*4882a593Smuzhiyun tun_info = skb_tunnel_info(skb);
2452*4882a593Smuzhiyun if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2453*4882a593Smuzhiyun fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2454*4882a593Smuzhiyun
2455*4882a593Smuzhiyun if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2456*4882a593Smuzhiyun flkeys = &_flkeys;
2457*4882a593Smuzhiyun
2458*4882a593Smuzhiyun if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2459*4882a593Smuzhiyun fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2460*4882a593Smuzhiyun skb_dst_drop(skb);
2461*4882a593Smuzhiyun skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
2462*4882a593Smuzhiyun &fl6, skb, flags));
2463*4882a593Smuzhiyun }
2464*4882a593Smuzhiyun
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,const struct sk_buff * skb,int flags)2465*4882a593Smuzhiyun INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
2466*4882a593Smuzhiyun struct fib6_table *table,
2467*4882a593Smuzhiyun struct flowi6 *fl6,
2468*4882a593Smuzhiyun const struct sk_buff *skb,
2469*4882a593Smuzhiyun int flags)
2470*4882a593Smuzhiyun {
2471*4882a593Smuzhiyun return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2472*4882a593Smuzhiyun }
2473*4882a593Smuzhiyun
ip6_route_output_flags_noref(struct net * net,const struct sock * sk,struct flowi6 * fl6,int flags)2474*4882a593Smuzhiyun struct dst_entry *ip6_route_output_flags_noref(struct net *net,
2475*4882a593Smuzhiyun const struct sock *sk,
2476*4882a593Smuzhiyun struct flowi6 *fl6, int flags)
2477*4882a593Smuzhiyun {
2478*4882a593Smuzhiyun bool any_src;
2479*4882a593Smuzhiyun
2480*4882a593Smuzhiyun if (ipv6_addr_type(&fl6->daddr) &
2481*4882a593Smuzhiyun (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2482*4882a593Smuzhiyun struct dst_entry *dst;
2483*4882a593Smuzhiyun
2484*4882a593Smuzhiyun /* This function does not take refcnt on the dst */
2485*4882a593Smuzhiyun dst = l3mdev_link_scope_lookup(net, fl6);
2486*4882a593Smuzhiyun if (dst)
2487*4882a593Smuzhiyun return dst;
2488*4882a593Smuzhiyun }
2489*4882a593Smuzhiyun
2490*4882a593Smuzhiyun fl6->flowi6_iif = LOOPBACK_IFINDEX;
2491*4882a593Smuzhiyun
2492*4882a593Smuzhiyun flags |= RT6_LOOKUP_F_DST_NOREF;
2493*4882a593Smuzhiyun any_src = ipv6_addr_any(&fl6->saddr);
2494*4882a593Smuzhiyun if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2495*4882a593Smuzhiyun (fl6->flowi6_oif && any_src))
2496*4882a593Smuzhiyun flags |= RT6_LOOKUP_F_IFACE;
2497*4882a593Smuzhiyun
2498*4882a593Smuzhiyun if (!any_src)
2499*4882a593Smuzhiyun flags |= RT6_LOOKUP_F_HAS_SADDR;
2500*4882a593Smuzhiyun else if (sk)
2501*4882a593Smuzhiyun flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2502*4882a593Smuzhiyun
2503*4882a593Smuzhiyun return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2504*4882a593Smuzhiyun }
2505*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
2506*4882a593Smuzhiyun
ip6_route_output_flags(struct net * net,const struct sock * sk,struct flowi6 * fl6,int flags)2507*4882a593Smuzhiyun struct dst_entry *ip6_route_output_flags(struct net *net,
2508*4882a593Smuzhiyun const struct sock *sk,
2509*4882a593Smuzhiyun struct flowi6 *fl6,
2510*4882a593Smuzhiyun int flags)
2511*4882a593Smuzhiyun {
2512*4882a593Smuzhiyun struct dst_entry *dst;
2513*4882a593Smuzhiyun struct rt6_info *rt6;
2514*4882a593Smuzhiyun
2515*4882a593Smuzhiyun rcu_read_lock();
2516*4882a593Smuzhiyun dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
2517*4882a593Smuzhiyun rt6 = (struct rt6_info *)dst;
2518*4882a593Smuzhiyun /* For dst cached in uncached_list, refcnt is already taken. */
2519*4882a593Smuzhiyun if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
2520*4882a593Smuzhiyun dst = &net->ipv6.ip6_null_entry->dst;
2521*4882a593Smuzhiyun dst_hold(dst);
2522*4882a593Smuzhiyun }
2523*4882a593Smuzhiyun rcu_read_unlock();
2524*4882a593Smuzhiyun
2525*4882a593Smuzhiyun return dst;
2526*4882a593Smuzhiyun }
2527*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2528*4882a593Smuzhiyun
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)2529*4882a593Smuzhiyun struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2530*4882a593Smuzhiyun {
2531*4882a593Smuzhiyun struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2532*4882a593Smuzhiyun struct net_device *loopback_dev = net->loopback_dev;
2533*4882a593Smuzhiyun struct dst_entry *new = NULL;
2534*4882a593Smuzhiyun
2535*4882a593Smuzhiyun rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2536*4882a593Smuzhiyun DST_OBSOLETE_DEAD, 0);
2537*4882a593Smuzhiyun if (rt) {
2538*4882a593Smuzhiyun rt6_info_init(rt);
2539*4882a593Smuzhiyun atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2540*4882a593Smuzhiyun
2541*4882a593Smuzhiyun new = &rt->dst;
2542*4882a593Smuzhiyun new->__use = 1;
2543*4882a593Smuzhiyun new->input = dst_discard;
2544*4882a593Smuzhiyun new->output = dst_discard_out;
2545*4882a593Smuzhiyun
2546*4882a593Smuzhiyun dst_copy_metrics(new, &ort->dst);
2547*4882a593Smuzhiyun
2548*4882a593Smuzhiyun rt->rt6i_idev = in6_dev_get(loopback_dev);
2549*4882a593Smuzhiyun rt->rt6i_gateway = ort->rt6i_gateway;
2550*4882a593Smuzhiyun rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2551*4882a593Smuzhiyun
2552*4882a593Smuzhiyun memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2553*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
2554*4882a593Smuzhiyun memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2555*4882a593Smuzhiyun #endif
2556*4882a593Smuzhiyun }
2557*4882a593Smuzhiyun
2558*4882a593Smuzhiyun dst_release(dst_orig);
2559*4882a593Smuzhiyun return new ? new : ERR_PTR(-ENOMEM);
2560*4882a593Smuzhiyun }
2561*4882a593Smuzhiyun
2562*4882a593Smuzhiyun /*
2563*4882a593Smuzhiyun * Destination cache support functions
2564*4882a593Smuzhiyun */
2565*4882a593Smuzhiyun
fib6_check(struct fib6_info * f6i,u32 cookie)2566*4882a593Smuzhiyun static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2567*4882a593Smuzhiyun {
2568*4882a593Smuzhiyun u32 rt_cookie = 0;
2569*4882a593Smuzhiyun
2570*4882a593Smuzhiyun if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2571*4882a593Smuzhiyun return false;
2572*4882a593Smuzhiyun
2573*4882a593Smuzhiyun if (fib6_check_expired(f6i))
2574*4882a593Smuzhiyun return false;
2575*4882a593Smuzhiyun
2576*4882a593Smuzhiyun return true;
2577*4882a593Smuzhiyun }
2578*4882a593Smuzhiyun
rt6_check(struct rt6_info * rt,struct fib6_info * from,u32 cookie)2579*4882a593Smuzhiyun static struct dst_entry *rt6_check(struct rt6_info *rt,
2580*4882a593Smuzhiyun struct fib6_info *from,
2581*4882a593Smuzhiyun u32 cookie)
2582*4882a593Smuzhiyun {
2583*4882a593Smuzhiyun u32 rt_cookie = 0;
2584*4882a593Smuzhiyun
2585*4882a593Smuzhiyun if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
2586*4882a593Smuzhiyun rt_cookie != cookie)
2587*4882a593Smuzhiyun return NULL;
2588*4882a593Smuzhiyun
2589*4882a593Smuzhiyun if (rt6_check_expired(rt))
2590*4882a593Smuzhiyun return NULL;
2591*4882a593Smuzhiyun
2592*4882a593Smuzhiyun return &rt->dst;
2593*4882a593Smuzhiyun }
2594*4882a593Smuzhiyun
rt6_dst_from_check(struct rt6_info * rt,struct fib6_info * from,u32 cookie)2595*4882a593Smuzhiyun static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2596*4882a593Smuzhiyun struct fib6_info *from,
2597*4882a593Smuzhiyun u32 cookie)
2598*4882a593Smuzhiyun {
2599*4882a593Smuzhiyun if (!__rt6_check_expired(rt) &&
2600*4882a593Smuzhiyun rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2601*4882a593Smuzhiyun fib6_check(from, cookie))
2602*4882a593Smuzhiyun return &rt->dst;
2603*4882a593Smuzhiyun else
2604*4882a593Smuzhiyun return NULL;
2605*4882a593Smuzhiyun }
2606*4882a593Smuzhiyun
ip6_dst_check(struct dst_entry * dst,u32 cookie)2607*4882a593Smuzhiyun static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2608*4882a593Smuzhiyun {
2609*4882a593Smuzhiyun struct dst_entry *dst_ret;
2610*4882a593Smuzhiyun struct fib6_info *from;
2611*4882a593Smuzhiyun struct rt6_info *rt;
2612*4882a593Smuzhiyun
2613*4882a593Smuzhiyun rt = container_of(dst, struct rt6_info, dst);
2614*4882a593Smuzhiyun
2615*4882a593Smuzhiyun if (rt->sernum)
2616*4882a593Smuzhiyun return rt6_is_valid(rt) ? dst : NULL;
2617*4882a593Smuzhiyun
2618*4882a593Smuzhiyun rcu_read_lock();
2619*4882a593Smuzhiyun
2620*4882a593Smuzhiyun /* All IPV6 dsts are created with ->obsolete set to the value
2621*4882a593Smuzhiyun * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2622*4882a593Smuzhiyun * into this function always.
2623*4882a593Smuzhiyun */
2624*4882a593Smuzhiyun
2625*4882a593Smuzhiyun from = rcu_dereference(rt->from);
2626*4882a593Smuzhiyun
2627*4882a593Smuzhiyun if (from && (rt->rt6i_flags & RTF_PCPU ||
2628*4882a593Smuzhiyun unlikely(!list_empty(&rt->rt6i_uncached))))
2629*4882a593Smuzhiyun dst_ret = rt6_dst_from_check(rt, from, cookie);
2630*4882a593Smuzhiyun else
2631*4882a593Smuzhiyun dst_ret = rt6_check(rt, from, cookie);
2632*4882a593Smuzhiyun
2633*4882a593Smuzhiyun rcu_read_unlock();
2634*4882a593Smuzhiyun
2635*4882a593Smuzhiyun return dst_ret;
2636*4882a593Smuzhiyun }
2637*4882a593Smuzhiyun
ip6_negative_advice(struct dst_entry * dst)2638*4882a593Smuzhiyun static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2639*4882a593Smuzhiyun {
2640*4882a593Smuzhiyun struct rt6_info *rt = (struct rt6_info *) dst;
2641*4882a593Smuzhiyun
2642*4882a593Smuzhiyun if (rt) {
2643*4882a593Smuzhiyun if (rt->rt6i_flags & RTF_CACHE) {
2644*4882a593Smuzhiyun rcu_read_lock();
2645*4882a593Smuzhiyun if (rt6_check_expired(rt)) {
2646*4882a593Smuzhiyun rt6_remove_exception_rt(rt);
2647*4882a593Smuzhiyun dst = NULL;
2648*4882a593Smuzhiyun }
2649*4882a593Smuzhiyun rcu_read_unlock();
2650*4882a593Smuzhiyun } else {
2651*4882a593Smuzhiyun dst_release(dst);
2652*4882a593Smuzhiyun dst = NULL;
2653*4882a593Smuzhiyun }
2654*4882a593Smuzhiyun }
2655*4882a593Smuzhiyun return dst;
2656*4882a593Smuzhiyun }
2657*4882a593Smuzhiyun
ip6_link_failure(struct sk_buff * skb)2658*4882a593Smuzhiyun static void ip6_link_failure(struct sk_buff *skb)
2659*4882a593Smuzhiyun {
2660*4882a593Smuzhiyun struct rt6_info *rt;
2661*4882a593Smuzhiyun
2662*4882a593Smuzhiyun icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2663*4882a593Smuzhiyun
2664*4882a593Smuzhiyun rt = (struct rt6_info *) skb_dst(skb);
2665*4882a593Smuzhiyun if (rt) {
2666*4882a593Smuzhiyun rcu_read_lock();
2667*4882a593Smuzhiyun if (rt->rt6i_flags & RTF_CACHE) {
2668*4882a593Smuzhiyun rt6_remove_exception_rt(rt);
2669*4882a593Smuzhiyun } else {
2670*4882a593Smuzhiyun struct fib6_info *from;
2671*4882a593Smuzhiyun struct fib6_node *fn;
2672*4882a593Smuzhiyun
2673*4882a593Smuzhiyun from = rcu_dereference(rt->from);
2674*4882a593Smuzhiyun if (from) {
2675*4882a593Smuzhiyun fn = rcu_dereference(from->fib6_node);
2676*4882a593Smuzhiyun if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2677*4882a593Smuzhiyun WRITE_ONCE(fn->fn_sernum, -1);
2678*4882a593Smuzhiyun }
2679*4882a593Smuzhiyun }
2680*4882a593Smuzhiyun rcu_read_unlock();
2681*4882a593Smuzhiyun }
2682*4882a593Smuzhiyun }
2683*4882a593Smuzhiyun
rt6_update_expires(struct rt6_info * rt0,int timeout)2684*4882a593Smuzhiyun static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2685*4882a593Smuzhiyun {
2686*4882a593Smuzhiyun if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2687*4882a593Smuzhiyun struct fib6_info *from;
2688*4882a593Smuzhiyun
2689*4882a593Smuzhiyun rcu_read_lock();
2690*4882a593Smuzhiyun from = rcu_dereference(rt0->from);
2691*4882a593Smuzhiyun if (from)
2692*4882a593Smuzhiyun rt0->dst.expires = from->expires;
2693*4882a593Smuzhiyun rcu_read_unlock();
2694*4882a593Smuzhiyun }
2695*4882a593Smuzhiyun
2696*4882a593Smuzhiyun dst_set_expires(&rt0->dst, timeout);
2697*4882a593Smuzhiyun rt0->rt6i_flags |= RTF_EXPIRES;
2698*4882a593Smuzhiyun }
2699*4882a593Smuzhiyun
rt6_do_update_pmtu(struct rt6_info * rt,u32 mtu)2700*4882a593Smuzhiyun static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2701*4882a593Smuzhiyun {
2702*4882a593Smuzhiyun struct net *net = dev_net(rt->dst.dev);
2703*4882a593Smuzhiyun
2704*4882a593Smuzhiyun dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2705*4882a593Smuzhiyun rt->rt6i_flags |= RTF_MODIFIED;
2706*4882a593Smuzhiyun rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2707*4882a593Smuzhiyun }
2708*4882a593Smuzhiyun
rt6_cache_allowed_for_pmtu(const struct rt6_info * rt)2709*4882a593Smuzhiyun static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2710*4882a593Smuzhiyun {
2711*4882a593Smuzhiyun return !(rt->rt6i_flags & RTF_CACHE) &&
2712*4882a593Smuzhiyun (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2713*4882a593Smuzhiyun }
2714*4882a593Smuzhiyun
__ip6_rt_update_pmtu(struct dst_entry * dst,const struct sock * sk,const struct ipv6hdr * iph,u32 mtu,bool confirm_neigh)2715*4882a593Smuzhiyun static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2716*4882a593Smuzhiyun const struct ipv6hdr *iph, u32 mtu,
2717*4882a593Smuzhiyun bool confirm_neigh)
2718*4882a593Smuzhiyun {
2719*4882a593Smuzhiyun const struct in6_addr *daddr, *saddr;
2720*4882a593Smuzhiyun struct rt6_info *rt6 = (struct rt6_info *)dst;
2721*4882a593Smuzhiyun
2722*4882a593Smuzhiyun /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
2723*4882a593Smuzhiyun * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
2724*4882a593Smuzhiyun * [see also comment in rt6_mtu_change_route()]
2725*4882a593Smuzhiyun */
2726*4882a593Smuzhiyun
2727*4882a593Smuzhiyun if (iph) {
2728*4882a593Smuzhiyun daddr = &iph->daddr;
2729*4882a593Smuzhiyun saddr = &iph->saddr;
2730*4882a593Smuzhiyun } else if (sk) {
2731*4882a593Smuzhiyun daddr = &sk->sk_v6_daddr;
2732*4882a593Smuzhiyun saddr = &inet6_sk(sk)->saddr;
2733*4882a593Smuzhiyun } else {
2734*4882a593Smuzhiyun daddr = NULL;
2735*4882a593Smuzhiyun saddr = NULL;
2736*4882a593Smuzhiyun }
2737*4882a593Smuzhiyun
2738*4882a593Smuzhiyun if (confirm_neigh)
2739*4882a593Smuzhiyun dst_confirm_neigh(dst, daddr);
2740*4882a593Smuzhiyun
2741*4882a593Smuzhiyun if (mtu < IPV6_MIN_MTU)
2742*4882a593Smuzhiyun return;
2743*4882a593Smuzhiyun if (mtu >= dst_mtu(dst))
2744*4882a593Smuzhiyun return;
2745*4882a593Smuzhiyun
2746*4882a593Smuzhiyun if (!rt6_cache_allowed_for_pmtu(rt6)) {
2747*4882a593Smuzhiyun rt6_do_update_pmtu(rt6, mtu);
2748*4882a593Smuzhiyun /* update rt6_ex->stamp for cache */
2749*4882a593Smuzhiyun if (rt6->rt6i_flags & RTF_CACHE)
2750*4882a593Smuzhiyun rt6_update_exception_stamp_rt(rt6);
2751*4882a593Smuzhiyun } else if (daddr) {
2752*4882a593Smuzhiyun struct fib6_result res = {};
2753*4882a593Smuzhiyun struct rt6_info *nrt6;
2754*4882a593Smuzhiyun
2755*4882a593Smuzhiyun rcu_read_lock();
2756*4882a593Smuzhiyun res.f6i = rcu_dereference(rt6->from);
2757*4882a593Smuzhiyun if (!res.f6i)
2758*4882a593Smuzhiyun goto out_unlock;
2759*4882a593Smuzhiyun
2760*4882a593Smuzhiyun res.fib6_flags = res.f6i->fib6_flags;
2761*4882a593Smuzhiyun res.fib6_type = res.f6i->fib6_type;
2762*4882a593Smuzhiyun
2763*4882a593Smuzhiyun if (res.f6i->nh) {
2764*4882a593Smuzhiyun struct fib6_nh_match_arg arg = {
2765*4882a593Smuzhiyun .dev = dst->dev,
2766*4882a593Smuzhiyun .gw = &rt6->rt6i_gateway,
2767*4882a593Smuzhiyun };
2768*4882a593Smuzhiyun
2769*4882a593Smuzhiyun nexthop_for_each_fib6_nh(res.f6i->nh,
2770*4882a593Smuzhiyun fib6_nh_find_match, &arg);
2771*4882a593Smuzhiyun
2772*4882a593Smuzhiyun /* fib6_info uses a nexthop that does not have fib6_nh
2773*4882a593Smuzhiyun * using the dst->dev + gw. Should be impossible.
2774*4882a593Smuzhiyun */
2775*4882a593Smuzhiyun if (!arg.match)
2776*4882a593Smuzhiyun goto out_unlock;
2777*4882a593Smuzhiyun
2778*4882a593Smuzhiyun res.nh = arg.match;
2779*4882a593Smuzhiyun } else {
2780*4882a593Smuzhiyun res.nh = res.f6i->fib6_nh;
2781*4882a593Smuzhiyun }
2782*4882a593Smuzhiyun
2783*4882a593Smuzhiyun nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
2784*4882a593Smuzhiyun if (nrt6) {
2785*4882a593Smuzhiyun rt6_do_update_pmtu(nrt6, mtu);
2786*4882a593Smuzhiyun if (rt6_insert_exception(nrt6, &res))
2787*4882a593Smuzhiyun dst_release_immediate(&nrt6->dst);
2788*4882a593Smuzhiyun }
2789*4882a593Smuzhiyun out_unlock:
2790*4882a593Smuzhiyun rcu_read_unlock();
2791*4882a593Smuzhiyun }
2792*4882a593Smuzhiyun }
2793*4882a593Smuzhiyun
ip6_rt_update_pmtu(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb,u32 mtu,bool confirm_neigh)2794*4882a593Smuzhiyun static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2795*4882a593Smuzhiyun struct sk_buff *skb, u32 mtu,
2796*4882a593Smuzhiyun bool confirm_neigh)
2797*4882a593Smuzhiyun {
2798*4882a593Smuzhiyun __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
2799*4882a593Smuzhiyun confirm_neigh);
2800*4882a593Smuzhiyun }
2801*4882a593Smuzhiyun
ip6_update_pmtu(struct sk_buff * skb,struct net * net,__be32 mtu,int oif,u32 mark,kuid_t uid)2802*4882a593Smuzhiyun void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2803*4882a593Smuzhiyun int oif, u32 mark, kuid_t uid)
2804*4882a593Smuzhiyun {
2805*4882a593Smuzhiyun const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2806*4882a593Smuzhiyun struct dst_entry *dst;
2807*4882a593Smuzhiyun struct flowi6 fl6 = {
2808*4882a593Smuzhiyun .flowi6_oif = oif,
2809*4882a593Smuzhiyun .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2810*4882a593Smuzhiyun .daddr = iph->daddr,
2811*4882a593Smuzhiyun .saddr = iph->saddr,
2812*4882a593Smuzhiyun .flowlabel = ip6_flowinfo(iph),
2813*4882a593Smuzhiyun .flowi6_uid = uid,
2814*4882a593Smuzhiyun };
2815*4882a593Smuzhiyun
2816*4882a593Smuzhiyun dst = ip6_route_output(net, NULL, &fl6);
2817*4882a593Smuzhiyun if (!dst->error)
2818*4882a593Smuzhiyun __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
2819*4882a593Smuzhiyun dst_release(dst);
2820*4882a593Smuzhiyun }
2821*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2822*4882a593Smuzhiyun
ip6_sk_update_pmtu(struct sk_buff * skb,struct sock * sk,__be32 mtu)2823*4882a593Smuzhiyun void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2824*4882a593Smuzhiyun {
2825*4882a593Smuzhiyun int oif = sk->sk_bound_dev_if;
2826*4882a593Smuzhiyun struct dst_entry *dst;
2827*4882a593Smuzhiyun
2828*4882a593Smuzhiyun if (!oif && skb->dev)
2829*4882a593Smuzhiyun oif = l3mdev_master_ifindex(skb->dev);
2830*4882a593Smuzhiyun
2831*4882a593Smuzhiyun ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2832*4882a593Smuzhiyun
2833*4882a593Smuzhiyun dst = __sk_dst_get(sk);
2834*4882a593Smuzhiyun if (!dst || !dst->obsolete ||
2835*4882a593Smuzhiyun dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2836*4882a593Smuzhiyun return;
2837*4882a593Smuzhiyun
2838*4882a593Smuzhiyun bh_lock_sock(sk);
2839*4882a593Smuzhiyun if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2840*4882a593Smuzhiyun ip6_datagram_dst_update(sk, false);
2841*4882a593Smuzhiyun bh_unlock_sock(sk);
2842*4882a593Smuzhiyun }
2843*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2844*4882a593Smuzhiyun
ip6_sk_dst_store_flow(struct sock * sk,struct dst_entry * dst,const struct flowi6 * fl6)2845*4882a593Smuzhiyun void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2846*4882a593Smuzhiyun const struct flowi6 *fl6)
2847*4882a593Smuzhiyun {
2848*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
2849*4882a593Smuzhiyun struct ipv6_pinfo *np = inet6_sk(sk);
2850*4882a593Smuzhiyun #endif
2851*4882a593Smuzhiyun
2852*4882a593Smuzhiyun ip6_dst_store(sk, dst,
2853*4882a593Smuzhiyun ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2854*4882a593Smuzhiyun &sk->sk_v6_daddr : NULL,
2855*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
2856*4882a593Smuzhiyun ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2857*4882a593Smuzhiyun &np->saddr :
2858*4882a593Smuzhiyun #endif
2859*4882a593Smuzhiyun NULL);
2860*4882a593Smuzhiyun }
2861*4882a593Smuzhiyun
ip6_redirect_nh_match(const struct fib6_result * res,struct flowi6 * fl6,const struct in6_addr * gw,struct rt6_info ** ret)2862*4882a593Smuzhiyun static bool ip6_redirect_nh_match(const struct fib6_result *res,
2863*4882a593Smuzhiyun struct flowi6 *fl6,
2864*4882a593Smuzhiyun const struct in6_addr *gw,
2865*4882a593Smuzhiyun struct rt6_info **ret)
2866*4882a593Smuzhiyun {
2867*4882a593Smuzhiyun const struct fib6_nh *nh = res->nh;
2868*4882a593Smuzhiyun
2869*4882a593Smuzhiyun if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2870*4882a593Smuzhiyun fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2871*4882a593Smuzhiyun return false;
2872*4882a593Smuzhiyun
2873*4882a593Smuzhiyun /* rt_cache's gateway might be different from its 'parent'
2874*4882a593Smuzhiyun * in the case of an ip redirect.
2875*4882a593Smuzhiyun * So we keep searching in the exception table if the gateway
2876*4882a593Smuzhiyun * is different.
2877*4882a593Smuzhiyun */
2878*4882a593Smuzhiyun if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2879*4882a593Smuzhiyun struct rt6_info *rt_cache;
2880*4882a593Smuzhiyun
2881*4882a593Smuzhiyun rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
2882*4882a593Smuzhiyun if (rt_cache &&
2883*4882a593Smuzhiyun ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2884*4882a593Smuzhiyun *ret = rt_cache;
2885*4882a593Smuzhiyun return true;
2886*4882a593Smuzhiyun }
2887*4882a593Smuzhiyun return false;
2888*4882a593Smuzhiyun }
2889*4882a593Smuzhiyun return true;
2890*4882a593Smuzhiyun }
2891*4882a593Smuzhiyun
2892*4882a593Smuzhiyun struct fib6_nh_rd_arg {
2893*4882a593Smuzhiyun struct fib6_result *res;
2894*4882a593Smuzhiyun struct flowi6 *fl6;
2895*4882a593Smuzhiyun const struct in6_addr *gw;
2896*4882a593Smuzhiyun struct rt6_info **ret;
2897*4882a593Smuzhiyun };
2898*4882a593Smuzhiyun
fib6_nh_redirect_match(struct fib6_nh * nh,void * _arg)2899*4882a593Smuzhiyun static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
2900*4882a593Smuzhiyun {
2901*4882a593Smuzhiyun struct fib6_nh_rd_arg *arg = _arg;
2902*4882a593Smuzhiyun
2903*4882a593Smuzhiyun arg->res->nh = nh;
2904*4882a593Smuzhiyun return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
2905*4882a593Smuzhiyun }
2906*4882a593Smuzhiyun
2907*4882a593Smuzhiyun /* Handle redirects */
2908*4882a593Smuzhiyun struct ip6rd_flowi {
2909*4882a593Smuzhiyun struct flowi6 fl6;
2910*4882a593Smuzhiyun struct in6_addr gateway;
2911*4882a593Smuzhiyun };
2912*4882a593Smuzhiyun
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,const struct sk_buff * skb,int flags)2913*4882a593Smuzhiyun INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net,
2914*4882a593Smuzhiyun struct fib6_table *table,
2915*4882a593Smuzhiyun struct flowi6 *fl6,
2916*4882a593Smuzhiyun const struct sk_buff *skb,
2917*4882a593Smuzhiyun int flags)
2918*4882a593Smuzhiyun {
2919*4882a593Smuzhiyun struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2920*4882a593Smuzhiyun struct rt6_info *ret = NULL;
2921*4882a593Smuzhiyun struct fib6_result res = {};
2922*4882a593Smuzhiyun struct fib6_nh_rd_arg arg = {
2923*4882a593Smuzhiyun .res = &res,
2924*4882a593Smuzhiyun .fl6 = fl6,
2925*4882a593Smuzhiyun .gw = &rdfl->gateway,
2926*4882a593Smuzhiyun .ret = &ret
2927*4882a593Smuzhiyun };
2928*4882a593Smuzhiyun struct fib6_info *rt;
2929*4882a593Smuzhiyun struct fib6_node *fn;
2930*4882a593Smuzhiyun
2931*4882a593Smuzhiyun /* l3mdev_update_flow overrides oif if the device is enslaved; in
2932*4882a593Smuzhiyun * this case we must match on the real ingress device, so reset it
2933*4882a593Smuzhiyun */
2934*4882a593Smuzhiyun if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2935*4882a593Smuzhiyun fl6->flowi6_oif = skb->dev->ifindex;
2936*4882a593Smuzhiyun
2937*4882a593Smuzhiyun /* Get the "current" route for this destination and
2938*4882a593Smuzhiyun * check if the redirect has come from appropriate router.
2939*4882a593Smuzhiyun *
2940*4882a593Smuzhiyun * RFC 4861 specifies that redirects should only be
2941*4882a593Smuzhiyun * accepted if they come from the nexthop to the target.
2942*4882a593Smuzhiyun * Due to the way the routes are chosen, this notion
2943*4882a593Smuzhiyun * is a bit fuzzy and one might need to check all possible
2944*4882a593Smuzhiyun * routes.
2945*4882a593Smuzhiyun */
2946*4882a593Smuzhiyun
2947*4882a593Smuzhiyun rcu_read_lock();
2948*4882a593Smuzhiyun fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2949*4882a593Smuzhiyun restart:
2950*4882a593Smuzhiyun for_each_fib6_node_rt_rcu(fn) {
2951*4882a593Smuzhiyun res.f6i = rt;
2952*4882a593Smuzhiyun if (fib6_check_expired(rt))
2953*4882a593Smuzhiyun continue;
2954*4882a593Smuzhiyun if (rt->fib6_flags & RTF_REJECT)
2955*4882a593Smuzhiyun break;
2956*4882a593Smuzhiyun if (unlikely(rt->nh)) {
2957*4882a593Smuzhiyun if (nexthop_is_blackhole(rt->nh))
2958*4882a593Smuzhiyun continue;
2959*4882a593Smuzhiyun /* on match, res->nh is filled in and potentially ret */
2960*4882a593Smuzhiyun if (nexthop_for_each_fib6_nh(rt->nh,
2961*4882a593Smuzhiyun fib6_nh_redirect_match,
2962*4882a593Smuzhiyun &arg))
2963*4882a593Smuzhiyun goto out;
2964*4882a593Smuzhiyun } else {
2965*4882a593Smuzhiyun res.nh = rt->fib6_nh;
2966*4882a593Smuzhiyun if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
2967*4882a593Smuzhiyun &ret))
2968*4882a593Smuzhiyun goto out;
2969*4882a593Smuzhiyun }
2970*4882a593Smuzhiyun }
2971*4882a593Smuzhiyun
2972*4882a593Smuzhiyun if (!rt)
2973*4882a593Smuzhiyun rt = net->ipv6.fib6_null_entry;
2974*4882a593Smuzhiyun else if (rt->fib6_flags & RTF_REJECT) {
2975*4882a593Smuzhiyun ret = net->ipv6.ip6_null_entry;
2976*4882a593Smuzhiyun goto out;
2977*4882a593Smuzhiyun }
2978*4882a593Smuzhiyun
2979*4882a593Smuzhiyun if (rt == net->ipv6.fib6_null_entry) {
2980*4882a593Smuzhiyun fn = fib6_backtrack(fn, &fl6->saddr);
2981*4882a593Smuzhiyun if (fn)
2982*4882a593Smuzhiyun goto restart;
2983*4882a593Smuzhiyun }
2984*4882a593Smuzhiyun
2985*4882a593Smuzhiyun res.f6i = rt;
2986*4882a593Smuzhiyun res.nh = rt->fib6_nh;
2987*4882a593Smuzhiyun out:
2988*4882a593Smuzhiyun if (ret) {
2989*4882a593Smuzhiyun ip6_hold_safe(net, &ret);
2990*4882a593Smuzhiyun } else {
2991*4882a593Smuzhiyun res.fib6_flags = res.f6i->fib6_flags;
2992*4882a593Smuzhiyun res.fib6_type = res.f6i->fib6_type;
2993*4882a593Smuzhiyun ret = ip6_create_rt_rcu(&res);
2994*4882a593Smuzhiyun }
2995*4882a593Smuzhiyun
2996*4882a593Smuzhiyun rcu_read_unlock();
2997*4882a593Smuzhiyun
2998*4882a593Smuzhiyun trace_fib6_table_lookup(net, &res, table, fl6);
2999*4882a593Smuzhiyun return ret;
3000*4882a593Smuzhiyun };
3001*4882a593Smuzhiyun
ip6_route_redirect(struct net * net,const struct flowi6 * fl6,const struct sk_buff * skb,const struct in6_addr * gateway)3002*4882a593Smuzhiyun static struct dst_entry *ip6_route_redirect(struct net *net,
3003*4882a593Smuzhiyun const struct flowi6 *fl6,
3004*4882a593Smuzhiyun const struct sk_buff *skb,
3005*4882a593Smuzhiyun const struct in6_addr *gateway)
3006*4882a593Smuzhiyun {
3007*4882a593Smuzhiyun int flags = RT6_LOOKUP_F_HAS_SADDR;
3008*4882a593Smuzhiyun struct ip6rd_flowi rdfl;
3009*4882a593Smuzhiyun
3010*4882a593Smuzhiyun rdfl.fl6 = *fl6;
3011*4882a593Smuzhiyun rdfl.gateway = *gateway;
3012*4882a593Smuzhiyun
3013*4882a593Smuzhiyun return fib6_rule_lookup(net, &rdfl.fl6, skb,
3014*4882a593Smuzhiyun flags, __ip6_route_redirect);
3015*4882a593Smuzhiyun }
3016*4882a593Smuzhiyun
ip6_redirect(struct sk_buff * skb,struct net * net,int oif,u32 mark,kuid_t uid)3017*4882a593Smuzhiyun void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
3018*4882a593Smuzhiyun kuid_t uid)
3019*4882a593Smuzhiyun {
3020*4882a593Smuzhiyun const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
3021*4882a593Smuzhiyun struct dst_entry *dst;
3022*4882a593Smuzhiyun struct flowi6 fl6 = {
3023*4882a593Smuzhiyun .flowi6_iif = LOOPBACK_IFINDEX,
3024*4882a593Smuzhiyun .flowi6_oif = oif,
3025*4882a593Smuzhiyun .flowi6_mark = mark,
3026*4882a593Smuzhiyun .daddr = iph->daddr,
3027*4882a593Smuzhiyun .saddr = iph->saddr,
3028*4882a593Smuzhiyun .flowlabel = ip6_flowinfo(iph),
3029*4882a593Smuzhiyun .flowi6_uid = uid,
3030*4882a593Smuzhiyun };
3031*4882a593Smuzhiyun
3032*4882a593Smuzhiyun dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
3033*4882a593Smuzhiyun rt6_do_redirect(dst, NULL, skb);
3034*4882a593Smuzhiyun dst_release(dst);
3035*4882a593Smuzhiyun }
3036*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_redirect);
3037*4882a593Smuzhiyun
ip6_redirect_no_header(struct sk_buff * skb,struct net * net,int oif)3038*4882a593Smuzhiyun void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
3039*4882a593Smuzhiyun {
3040*4882a593Smuzhiyun const struct ipv6hdr *iph = ipv6_hdr(skb);
3041*4882a593Smuzhiyun const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
3042*4882a593Smuzhiyun struct dst_entry *dst;
3043*4882a593Smuzhiyun struct flowi6 fl6 = {
3044*4882a593Smuzhiyun .flowi6_iif = LOOPBACK_IFINDEX,
3045*4882a593Smuzhiyun .flowi6_oif = oif,
3046*4882a593Smuzhiyun .daddr = msg->dest,
3047*4882a593Smuzhiyun .saddr = iph->daddr,
3048*4882a593Smuzhiyun .flowi6_uid = sock_net_uid(net, NULL),
3049*4882a593Smuzhiyun };
3050*4882a593Smuzhiyun
3051*4882a593Smuzhiyun dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
3052*4882a593Smuzhiyun rt6_do_redirect(dst, NULL, skb);
3053*4882a593Smuzhiyun dst_release(dst);
3054*4882a593Smuzhiyun }
3055*4882a593Smuzhiyun
ip6_sk_redirect(struct sk_buff * skb,struct sock * sk)3056*4882a593Smuzhiyun void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
3057*4882a593Smuzhiyun {
3058*4882a593Smuzhiyun ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
3059*4882a593Smuzhiyun sk->sk_uid);
3060*4882a593Smuzhiyun }
3061*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(ip6_sk_redirect);
3062*4882a593Smuzhiyun
ip6_default_advmss(const struct dst_entry * dst)3063*4882a593Smuzhiyun static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3064*4882a593Smuzhiyun {
3065*4882a593Smuzhiyun struct net_device *dev = dst->dev;
3066*4882a593Smuzhiyun unsigned int mtu = dst_mtu(dst);
3067*4882a593Smuzhiyun struct net *net = dev_net(dev);
3068*4882a593Smuzhiyun
3069*4882a593Smuzhiyun mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
3070*4882a593Smuzhiyun
3071*4882a593Smuzhiyun if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
3072*4882a593Smuzhiyun mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
3073*4882a593Smuzhiyun
3074*4882a593Smuzhiyun /*
3075*4882a593Smuzhiyun * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
3076*4882a593Smuzhiyun * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
3077*4882a593Smuzhiyun * IPV6_MAXPLEN is also valid and means: "any MSS,
3078*4882a593Smuzhiyun * rely only on pmtu discovery"
3079*4882a593Smuzhiyun */
3080*4882a593Smuzhiyun if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
3081*4882a593Smuzhiyun mtu = IPV6_MAXPLEN;
3082*4882a593Smuzhiyun return mtu;
3083*4882a593Smuzhiyun }
3084*4882a593Smuzhiyun
ip6_mtu(const struct dst_entry * dst)3085*4882a593Smuzhiyun static unsigned int ip6_mtu(const struct dst_entry *dst)
3086*4882a593Smuzhiyun {
3087*4882a593Smuzhiyun struct inet6_dev *idev;
3088*4882a593Smuzhiyun unsigned int mtu;
3089*4882a593Smuzhiyun
3090*4882a593Smuzhiyun mtu = dst_metric_raw(dst, RTAX_MTU);
3091*4882a593Smuzhiyun if (mtu)
3092*4882a593Smuzhiyun goto out;
3093*4882a593Smuzhiyun
3094*4882a593Smuzhiyun mtu = IPV6_MIN_MTU;
3095*4882a593Smuzhiyun
3096*4882a593Smuzhiyun rcu_read_lock();
3097*4882a593Smuzhiyun idev = __in6_dev_get(dst->dev);
3098*4882a593Smuzhiyun if (idev)
3099*4882a593Smuzhiyun mtu = idev->cnf.mtu6;
3100*4882a593Smuzhiyun rcu_read_unlock();
3101*4882a593Smuzhiyun
3102*4882a593Smuzhiyun out:
3103*4882a593Smuzhiyun mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3104*4882a593Smuzhiyun
3105*4882a593Smuzhiyun return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3106*4882a593Smuzhiyun }
3107*4882a593Smuzhiyun
3108*4882a593Smuzhiyun /* MTU selection:
3109*4882a593Smuzhiyun * 1. mtu on route is locked - use it
3110*4882a593Smuzhiyun * 2. mtu from nexthop exception
3111*4882a593Smuzhiyun * 3. mtu from egress device
3112*4882a593Smuzhiyun *
3113*4882a593Smuzhiyun * based on ip6_dst_mtu_forward and exception logic of
3114*4882a593Smuzhiyun * rt6_find_cached_rt; called with rcu_read_lock
3115*4882a593Smuzhiyun */
ip6_mtu_from_fib6(const struct fib6_result * res,const struct in6_addr * daddr,const struct in6_addr * saddr)3116*4882a593Smuzhiyun u32 ip6_mtu_from_fib6(const struct fib6_result *res,
3117*4882a593Smuzhiyun const struct in6_addr *daddr,
3118*4882a593Smuzhiyun const struct in6_addr *saddr)
3119*4882a593Smuzhiyun {
3120*4882a593Smuzhiyun const struct fib6_nh *nh = res->nh;
3121*4882a593Smuzhiyun struct fib6_info *f6i = res->f6i;
3122*4882a593Smuzhiyun struct inet6_dev *idev;
3123*4882a593Smuzhiyun struct rt6_info *rt;
3124*4882a593Smuzhiyun u32 mtu = 0;
3125*4882a593Smuzhiyun
3126*4882a593Smuzhiyun if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
3127*4882a593Smuzhiyun mtu = f6i->fib6_pmtu;
3128*4882a593Smuzhiyun if (mtu)
3129*4882a593Smuzhiyun goto out;
3130*4882a593Smuzhiyun }
3131*4882a593Smuzhiyun
3132*4882a593Smuzhiyun rt = rt6_find_cached_rt(res, daddr, saddr);
3133*4882a593Smuzhiyun if (unlikely(rt)) {
3134*4882a593Smuzhiyun mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
3135*4882a593Smuzhiyun } else {
3136*4882a593Smuzhiyun struct net_device *dev = nh->fib_nh_dev;
3137*4882a593Smuzhiyun
3138*4882a593Smuzhiyun mtu = IPV6_MIN_MTU;
3139*4882a593Smuzhiyun idev = __in6_dev_get(dev);
3140*4882a593Smuzhiyun if (idev && idev->cnf.mtu6 > mtu)
3141*4882a593Smuzhiyun mtu = idev->cnf.mtu6;
3142*4882a593Smuzhiyun }
3143*4882a593Smuzhiyun
3144*4882a593Smuzhiyun mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3145*4882a593Smuzhiyun out:
3146*4882a593Smuzhiyun return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
3147*4882a593Smuzhiyun }
3148*4882a593Smuzhiyun
icmp6_dst_alloc(struct net_device * dev,struct flowi6 * fl6)3149*4882a593Smuzhiyun struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
3150*4882a593Smuzhiyun struct flowi6 *fl6)
3151*4882a593Smuzhiyun {
3152*4882a593Smuzhiyun struct dst_entry *dst;
3153*4882a593Smuzhiyun struct rt6_info *rt;
3154*4882a593Smuzhiyun struct inet6_dev *idev = in6_dev_get(dev);
3155*4882a593Smuzhiyun struct net *net = dev_net(dev);
3156*4882a593Smuzhiyun
3157*4882a593Smuzhiyun if (unlikely(!idev))
3158*4882a593Smuzhiyun return ERR_PTR(-ENODEV);
3159*4882a593Smuzhiyun
3160*4882a593Smuzhiyun rt = ip6_dst_alloc(net, dev, 0);
3161*4882a593Smuzhiyun if (unlikely(!rt)) {
3162*4882a593Smuzhiyun in6_dev_put(idev);
3163*4882a593Smuzhiyun dst = ERR_PTR(-ENOMEM);
3164*4882a593Smuzhiyun goto out;
3165*4882a593Smuzhiyun }
3166*4882a593Smuzhiyun
3167*4882a593Smuzhiyun rt->dst.input = ip6_input;
3168*4882a593Smuzhiyun rt->dst.output = ip6_output;
3169*4882a593Smuzhiyun rt->rt6i_gateway = fl6->daddr;
3170*4882a593Smuzhiyun rt->rt6i_dst.addr = fl6->daddr;
3171*4882a593Smuzhiyun rt->rt6i_dst.plen = 128;
3172*4882a593Smuzhiyun rt->rt6i_idev = idev;
3173*4882a593Smuzhiyun dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
3174*4882a593Smuzhiyun
3175*4882a593Smuzhiyun /* Add this dst into uncached_list so that rt6_disable_ip() can
3176*4882a593Smuzhiyun * do proper release of the net_device
3177*4882a593Smuzhiyun */
3178*4882a593Smuzhiyun rt6_uncached_list_add(rt);
3179*4882a593Smuzhiyun atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
3180*4882a593Smuzhiyun
3181*4882a593Smuzhiyun dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
3182*4882a593Smuzhiyun
3183*4882a593Smuzhiyun out:
3184*4882a593Smuzhiyun return dst;
3185*4882a593Smuzhiyun }
3186*4882a593Smuzhiyun
ip6_dst_gc(struct dst_ops * ops)3187*4882a593Smuzhiyun static int ip6_dst_gc(struct dst_ops *ops)
3188*4882a593Smuzhiyun {
3189*4882a593Smuzhiyun struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
3190*4882a593Smuzhiyun int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
3191*4882a593Smuzhiyun int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
3192*4882a593Smuzhiyun int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
3193*4882a593Smuzhiyun int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
3194*4882a593Smuzhiyun unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
3195*4882a593Smuzhiyun int entries;
3196*4882a593Smuzhiyun
3197*4882a593Smuzhiyun entries = dst_entries_get_fast(ops);
3198*4882a593Smuzhiyun if (entries > rt_max_size)
3199*4882a593Smuzhiyun entries = dst_entries_get_slow(ops);
3200*4882a593Smuzhiyun
3201*4882a593Smuzhiyun if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
3202*4882a593Smuzhiyun entries <= rt_max_size)
3203*4882a593Smuzhiyun goto out;
3204*4882a593Smuzhiyun
3205*4882a593Smuzhiyun net->ipv6.ip6_rt_gc_expire++;
3206*4882a593Smuzhiyun fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
3207*4882a593Smuzhiyun entries = dst_entries_get_slow(ops);
3208*4882a593Smuzhiyun if (entries < ops->gc_thresh)
3209*4882a593Smuzhiyun net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
3210*4882a593Smuzhiyun out:
3211*4882a593Smuzhiyun net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
3212*4882a593Smuzhiyun return entries > rt_max_size;
3213*4882a593Smuzhiyun }
3214*4882a593Smuzhiyun
ip6_nh_lookup_table(struct net * net,struct fib6_config * cfg,const struct in6_addr * gw_addr,u32 tbid,int flags,struct fib6_result * res)3215*4882a593Smuzhiyun static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
3216*4882a593Smuzhiyun const struct in6_addr *gw_addr, u32 tbid,
3217*4882a593Smuzhiyun int flags, struct fib6_result *res)
3218*4882a593Smuzhiyun {
3219*4882a593Smuzhiyun struct flowi6 fl6 = {
3220*4882a593Smuzhiyun .flowi6_oif = cfg->fc_ifindex,
3221*4882a593Smuzhiyun .daddr = *gw_addr,
3222*4882a593Smuzhiyun .saddr = cfg->fc_prefsrc,
3223*4882a593Smuzhiyun };
3224*4882a593Smuzhiyun struct fib6_table *table;
3225*4882a593Smuzhiyun int err;
3226*4882a593Smuzhiyun
3227*4882a593Smuzhiyun table = fib6_get_table(net, tbid);
3228*4882a593Smuzhiyun if (!table)
3229*4882a593Smuzhiyun return -EINVAL;
3230*4882a593Smuzhiyun
3231*4882a593Smuzhiyun if (!ipv6_addr_any(&cfg->fc_prefsrc))
3232*4882a593Smuzhiyun flags |= RT6_LOOKUP_F_HAS_SADDR;
3233*4882a593Smuzhiyun
3234*4882a593Smuzhiyun flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
3235*4882a593Smuzhiyun
3236*4882a593Smuzhiyun err = fib6_table_lookup(net, table, cfg->fc_ifindex, &fl6, res, flags);
3237*4882a593Smuzhiyun if (!err && res->f6i != net->ipv6.fib6_null_entry)
3238*4882a593Smuzhiyun fib6_select_path(net, res, &fl6, cfg->fc_ifindex,
3239*4882a593Smuzhiyun cfg->fc_ifindex != 0, NULL, flags);
3240*4882a593Smuzhiyun
3241*4882a593Smuzhiyun return err;
3242*4882a593Smuzhiyun }
3243*4882a593Smuzhiyun
ip6_route_check_nh_onlink(struct net * net,struct fib6_config * cfg,const struct net_device * dev,struct netlink_ext_ack * extack)3244*4882a593Smuzhiyun static int ip6_route_check_nh_onlink(struct net *net,
3245*4882a593Smuzhiyun struct fib6_config *cfg,
3246*4882a593Smuzhiyun const struct net_device *dev,
3247*4882a593Smuzhiyun struct netlink_ext_ack *extack)
3248*4882a593Smuzhiyun {
3249*4882a593Smuzhiyun u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
3250*4882a593Smuzhiyun const struct in6_addr *gw_addr = &cfg->fc_gateway;
3251*4882a593Smuzhiyun struct fib6_result res = {};
3252*4882a593Smuzhiyun int err;
3253*4882a593Smuzhiyun
3254*4882a593Smuzhiyun err = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0, &res);
3255*4882a593Smuzhiyun if (!err && !(res.fib6_flags & RTF_REJECT) &&
3256*4882a593Smuzhiyun /* ignore match if it is the default route */
3257*4882a593Smuzhiyun !ipv6_addr_any(&res.f6i->fib6_dst.addr) &&
3258*4882a593Smuzhiyun (res.fib6_type != RTN_UNICAST || dev != res.nh->fib_nh_dev)) {
3259*4882a593Smuzhiyun NL_SET_ERR_MSG(extack,
3260*4882a593Smuzhiyun "Nexthop has invalid gateway or device mismatch");
3261*4882a593Smuzhiyun err = -EINVAL;
3262*4882a593Smuzhiyun }
3263*4882a593Smuzhiyun
3264*4882a593Smuzhiyun return err;
3265*4882a593Smuzhiyun }
3266*4882a593Smuzhiyun
ip6_route_check_nh(struct net * net,struct fib6_config * cfg,struct net_device ** _dev,struct inet6_dev ** idev)3267*4882a593Smuzhiyun static int ip6_route_check_nh(struct net *net,
3268*4882a593Smuzhiyun struct fib6_config *cfg,
3269*4882a593Smuzhiyun struct net_device **_dev,
3270*4882a593Smuzhiyun struct inet6_dev **idev)
3271*4882a593Smuzhiyun {
3272*4882a593Smuzhiyun const struct in6_addr *gw_addr = &cfg->fc_gateway;
3273*4882a593Smuzhiyun struct net_device *dev = _dev ? *_dev : NULL;
3274*4882a593Smuzhiyun int flags = RT6_LOOKUP_F_IFACE;
3275*4882a593Smuzhiyun struct fib6_result res = {};
3276*4882a593Smuzhiyun int err = -EHOSTUNREACH;
3277*4882a593Smuzhiyun
3278*4882a593Smuzhiyun if (cfg->fc_table) {
3279*4882a593Smuzhiyun err = ip6_nh_lookup_table(net, cfg, gw_addr,
3280*4882a593Smuzhiyun cfg->fc_table, flags, &res);
3281*4882a593Smuzhiyun /* gw_addr can not require a gateway or resolve to a reject
3282*4882a593Smuzhiyun * route. If a device is given, it must match the result.
3283*4882a593Smuzhiyun */
3284*4882a593Smuzhiyun if (err || res.fib6_flags & RTF_REJECT ||
3285*4882a593Smuzhiyun res.nh->fib_nh_gw_family ||
3286*4882a593Smuzhiyun (dev && dev != res.nh->fib_nh_dev))
3287*4882a593Smuzhiyun err = -EHOSTUNREACH;
3288*4882a593Smuzhiyun }
3289*4882a593Smuzhiyun
3290*4882a593Smuzhiyun if (err < 0) {
3291*4882a593Smuzhiyun struct flowi6 fl6 = {
3292*4882a593Smuzhiyun .flowi6_oif = cfg->fc_ifindex,
3293*4882a593Smuzhiyun .daddr = *gw_addr,
3294*4882a593Smuzhiyun };
3295*4882a593Smuzhiyun
3296*4882a593Smuzhiyun err = fib6_lookup(net, cfg->fc_ifindex, &fl6, &res, flags);
3297*4882a593Smuzhiyun if (err || res.fib6_flags & RTF_REJECT ||
3298*4882a593Smuzhiyun res.nh->fib_nh_gw_family)
3299*4882a593Smuzhiyun err = -EHOSTUNREACH;
3300*4882a593Smuzhiyun
3301*4882a593Smuzhiyun if (err)
3302*4882a593Smuzhiyun return err;
3303*4882a593Smuzhiyun
3304*4882a593Smuzhiyun fib6_select_path(net, &res, &fl6, cfg->fc_ifindex,
3305*4882a593Smuzhiyun cfg->fc_ifindex != 0, NULL, flags);
3306*4882a593Smuzhiyun }
3307*4882a593Smuzhiyun
3308*4882a593Smuzhiyun err = 0;
3309*4882a593Smuzhiyun if (dev) {
3310*4882a593Smuzhiyun if (dev != res.nh->fib_nh_dev)
3311*4882a593Smuzhiyun err = -EHOSTUNREACH;
3312*4882a593Smuzhiyun } else {
3313*4882a593Smuzhiyun *_dev = dev = res.nh->fib_nh_dev;
3314*4882a593Smuzhiyun dev_hold(dev);
3315*4882a593Smuzhiyun *idev = in6_dev_get(dev);
3316*4882a593Smuzhiyun }
3317*4882a593Smuzhiyun
3318*4882a593Smuzhiyun return err;
3319*4882a593Smuzhiyun }
3320*4882a593Smuzhiyun
ip6_validate_gw(struct net * net,struct fib6_config * cfg,struct net_device ** _dev,struct inet6_dev ** idev,struct netlink_ext_ack * extack)3321*4882a593Smuzhiyun static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
3322*4882a593Smuzhiyun struct net_device **_dev, struct inet6_dev **idev,
3323*4882a593Smuzhiyun struct netlink_ext_ack *extack)
3324*4882a593Smuzhiyun {
3325*4882a593Smuzhiyun const struct in6_addr *gw_addr = &cfg->fc_gateway;
3326*4882a593Smuzhiyun int gwa_type = ipv6_addr_type(gw_addr);
3327*4882a593Smuzhiyun bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
3328*4882a593Smuzhiyun const struct net_device *dev = *_dev;
3329*4882a593Smuzhiyun bool need_addr_check = !dev;
3330*4882a593Smuzhiyun int err = -EINVAL;
3331*4882a593Smuzhiyun
3332*4882a593Smuzhiyun /* if gw_addr is local we will fail to detect this in case
3333*4882a593Smuzhiyun * address is still TENTATIVE (DAD in progress). rt6_lookup()
3334*4882a593Smuzhiyun * will return already-added prefix route via interface that
3335*4882a593Smuzhiyun * prefix route was assigned to, which might be non-loopback.
3336*4882a593Smuzhiyun */
3337*4882a593Smuzhiyun if (dev &&
3338*4882a593Smuzhiyun ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3339*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3340*4882a593Smuzhiyun goto out;
3341*4882a593Smuzhiyun }
3342*4882a593Smuzhiyun
3343*4882a593Smuzhiyun if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
3344*4882a593Smuzhiyun /* IPv6 strictly inhibits using not link-local
3345*4882a593Smuzhiyun * addresses as nexthop address.
3346*4882a593Smuzhiyun * Otherwise, router will not able to send redirects.
3347*4882a593Smuzhiyun * It is very good, but in some (rare!) circumstances
3348*4882a593Smuzhiyun * (SIT, PtP, NBMA NOARP links) it is handy to allow
3349*4882a593Smuzhiyun * some exceptions. --ANK
3350*4882a593Smuzhiyun * We allow IPv4-mapped nexthops to support RFC4798-type
3351*4882a593Smuzhiyun * addressing
3352*4882a593Smuzhiyun */
3353*4882a593Smuzhiyun if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
3354*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid gateway address");
3355*4882a593Smuzhiyun goto out;
3356*4882a593Smuzhiyun }
3357*4882a593Smuzhiyun
3358*4882a593Smuzhiyun rcu_read_lock();
3359*4882a593Smuzhiyun
3360*4882a593Smuzhiyun if (cfg->fc_flags & RTNH_F_ONLINK)
3361*4882a593Smuzhiyun err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
3362*4882a593Smuzhiyun else
3363*4882a593Smuzhiyun err = ip6_route_check_nh(net, cfg, _dev, idev);
3364*4882a593Smuzhiyun
3365*4882a593Smuzhiyun rcu_read_unlock();
3366*4882a593Smuzhiyun
3367*4882a593Smuzhiyun if (err)
3368*4882a593Smuzhiyun goto out;
3369*4882a593Smuzhiyun }
3370*4882a593Smuzhiyun
3371*4882a593Smuzhiyun /* reload in case device was changed */
3372*4882a593Smuzhiyun dev = *_dev;
3373*4882a593Smuzhiyun
3374*4882a593Smuzhiyun err = -EINVAL;
3375*4882a593Smuzhiyun if (!dev) {
3376*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Egress device not specified");
3377*4882a593Smuzhiyun goto out;
3378*4882a593Smuzhiyun } else if (dev->flags & IFF_LOOPBACK) {
3379*4882a593Smuzhiyun NL_SET_ERR_MSG(extack,
3380*4882a593Smuzhiyun "Egress device can not be loopback device for this route");
3381*4882a593Smuzhiyun goto out;
3382*4882a593Smuzhiyun }
3383*4882a593Smuzhiyun
3384*4882a593Smuzhiyun /* if we did not check gw_addr above, do so now that the
3385*4882a593Smuzhiyun * egress device has been resolved.
3386*4882a593Smuzhiyun */
3387*4882a593Smuzhiyun if (need_addr_check &&
3388*4882a593Smuzhiyun ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
3389*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
3390*4882a593Smuzhiyun goto out;
3391*4882a593Smuzhiyun }
3392*4882a593Smuzhiyun
3393*4882a593Smuzhiyun err = 0;
3394*4882a593Smuzhiyun out:
3395*4882a593Smuzhiyun return err;
3396*4882a593Smuzhiyun }
3397*4882a593Smuzhiyun
fib6_is_reject(u32 flags,struct net_device * dev,int addr_type)3398*4882a593Smuzhiyun static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
3399*4882a593Smuzhiyun {
3400*4882a593Smuzhiyun if ((flags & RTF_REJECT) ||
3401*4882a593Smuzhiyun (dev && (dev->flags & IFF_LOOPBACK) &&
3402*4882a593Smuzhiyun !(addr_type & IPV6_ADDR_LOOPBACK) &&
3403*4882a593Smuzhiyun !(flags & (RTF_ANYCAST | RTF_LOCAL))))
3404*4882a593Smuzhiyun return true;
3405*4882a593Smuzhiyun
3406*4882a593Smuzhiyun return false;
3407*4882a593Smuzhiyun }
3408*4882a593Smuzhiyun
fib6_nh_init(struct net * net,struct fib6_nh * fib6_nh,struct fib6_config * cfg,gfp_t gfp_flags,struct netlink_ext_ack * extack)3409*4882a593Smuzhiyun int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
3410*4882a593Smuzhiyun struct fib6_config *cfg, gfp_t gfp_flags,
3411*4882a593Smuzhiyun struct netlink_ext_ack *extack)
3412*4882a593Smuzhiyun {
3413*4882a593Smuzhiyun struct net_device *dev = NULL;
3414*4882a593Smuzhiyun struct inet6_dev *idev = NULL;
3415*4882a593Smuzhiyun int addr_type;
3416*4882a593Smuzhiyun int err;
3417*4882a593Smuzhiyun
3418*4882a593Smuzhiyun fib6_nh->fib_nh_family = AF_INET6;
3419*4882a593Smuzhiyun #ifdef CONFIG_IPV6_ROUTER_PREF
3420*4882a593Smuzhiyun fib6_nh->last_probe = jiffies;
3421*4882a593Smuzhiyun #endif
3422*4882a593Smuzhiyun if (cfg->fc_is_fdb) {
3423*4882a593Smuzhiyun fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3424*4882a593Smuzhiyun fib6_nh->fib_nh_gw_family = AF_INET6;
3425*4882a593Smuzhiyun return 0;
3426*4882a593Smuzhiyun }
3427*4882a593Smuzhiyun
3428*4882a593Smuzhiyun err = -ENODEV;
3429*4882a593Smuzhiyun if (cfg->fc_ifindex) {
3430*4882a593Smuzhiyun dev = dev_get_by_index(net, cfg->fc_ifindex);
3431*4882a593Smuzhiyun if (!dev)
3432*4882a593Smuzhiyun goto out;
3433*4882a593Smuzhiyun idev = in6_dev_get(dev);
3434*4882a593Smuzhiyun if (!idev)
3435*4882a593Smuzhiyun goto out;
3436*4882a593Smuzhiyun }
3437*4882a593Smuzhiyun
3438*4882a593Smuzhiyun if (cfg->fc_flags & RTNH_F_ONLINK) {
3439*4882a593Smuzhiyun if (!dev) {
3440*4882a593Smuzhiyun NL_SET_ERR_MSG(extack,
3441*4882a593Smuzhiyun "Nexthop device required for onlink");
3442*4882a593Smuzhiyun goto out;
3443*4882a593Smuzhiyun }
3444*4882a593Smuzhiyun
3445*4882a593Smuzhiyun if (!(dev->flags & IFF_UP)) {
3446*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3447*4882a593Smuzhiyun err = -ENETDOWN;
3448*4882a593Smuzhiyun goto out;
3449*4882a593Smuzhiyun }
3450*4882a593Smuzhiyun
3451*4882a593Smuzhiyun fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
3452*4882a593Smuzhiyun }
3453*4882a593Smuzhiyun
3454*4882a593Smuzhiyun fib6_nh->fib_nh_weight = 1;
3455*4882a593Smuzhiyun
3456*4882a593Smuzhiyun /* We cannot add true routes via loopback here,
3457*4882a593Smuzhiyun * they would result in kernel looping; promote them to reject routes
3458*4882a593Smuzhiyun */
3459*4882a593Smuzhiyun addr_type = ipv6_addr_type(&cfg->fc_dst);
3460*4882a593Smuzhiyun if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
3461*4882a593Smuzhiyun /* hold loopback dev/idev if we haven't done so. */
3462*4882a593Smuzhiyun if (dev != net->loopback_dev) {
3463*4882a593Smuzhiyun if (dev) {
3464*4882a593Smuzhiyun dev_put(dev);
3465*4882a593Smuzhiyun in6_dev_put(idev);
3466*4882a593Smuzhiyun }
3467*4882a593Smuzhiyun dev = net->loopback_dev;
3468*4882a593Smuzhiyun dev_hold(dev);
3469*4882a593Smuzhiyun idev = in6_dev_get(dev);
3470*4882a593Smuzhiyun if (!idev) {
3471*4882a593Smuzhiyun err = -ENODEV;
3472*4882a593Smuzhiyun goto out;
3473*4882a593Smuzhiyun }
3474*4882a593Smuzhiyun }
3475*4882a593Smuzhiyun goto pcpu_alloc;
3476*4882a593Smuzhiyun }
3477*4882a593Smuzhiyun
3478*4882a593Smuzhiyun if (cfg->fc_flags & RTF_GATEWAY) {
3479*4882a593Smuzhiyun err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3480*4882a593Smuzhiyun if (err)
3481*4882a593Smuzhiyun goto out;
3482*4882a593Smuzhiyun
3483*4882a593Smuzhiyun fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3484*4882a593Smuzhiyun fib6_nh->fib_nh_gw_family = AF_INET6;
3485*4882a593Smuzhiyun }
3486*4882a593Smuzhiyun
3487*4882a593Smuzhiyun err = -ENODEV;
3488*4882a593Smuzhiyun if (!dev)
3489*4882a593Smuzhiyun goto out;
3490*4882a593Smuzhiyun
3491*4882a593Smuzhiyun if (idev->cnf.disable_ipv6) {
3492*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3493*4882a593Smuzhiyun err = -EACCES;
3494*4882a593Smuzhiyun goto out;
3495*4882a593Smuzhiyun }
3496*4882a593Smuzhiyun
3497*4882a593Smuzhiyun if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3498*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3499*4882a593Smuzhiyun err = -ENETDOWN;
3500*4882a593Smuzhiyun goto out;
3501*4882a593Smuzhiyun }
3502*4882a593Smuzhiyun
3503*4882a593Smuzhiyun if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3504*4882a593Smuzhiyun !netif_carrier_ok(dev))
3505*4882a593Smuzhiyun fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
3506*4882a593Smuzhiyun
3507*4882a593Smuzhiyun err = fib_nh_common_init(net, &fib6_nh->nh_common, cfg->fc_encap,
3508*4882a593Smuzhiyun cfg->fc_encap_type, cfg, gfp_flags, extack);
3509*4882a593Smuzhiyun if (err)
3510*4882a593Smuzhiyun goto out;
3511*4882a593Smuzhiyun
3512*4882a593Smuzhiyun pcpu_alloc:
3513*4882a593Smuzhiyun fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
3514*4882a593Smuzhiyun if (!fib6_nh->rt6i_pcpu) {
3515*4882a593Smuzhiyun err = -ENOMEM;
3516*4882a593Smuzhiyun goto out;
3517*4882a593Smuzhiyun }
3518*4882a593Smuzhiyun
3519*4882a593Smuzhiyun fib6_nh->fib_nh_dev = dev;
3520*4882a593Smuzhiyun fib6_nh->fib_nh_oif = dev->ifindex;
3521*4882a593Smuzhiyun err = 0;
3522*4882a593Smuzhiyun out:
3523*4882a593Smuzhiyun if (idev)
3524*4882a593Smuzhiyun in6_dev_put(idev);
3525*4882a593Smuzhiyun
3526*4882a593Smuzhiyun if (err) {
3527*4882a593Smuzhiyun lwtstate_put(fib6_nh->fib_nh_lws);
3528*4882a593Smuzhiyun fib6_nh->fib_nh_lws = NULL;
3529*4882a593Smuzhiyun if (dev)
3530*4882a593Smuzhiyun dev_put(dev);
3531*4882a593Smuzhiyun }
3532*4882a593Smuzhiyun
3533*4882a593Smuzhiyun return err;
3534*4882a593Smuzhiyun }
3535*4882a593Smuzhiyun
fib6_nh_release(struct fib6_nh * fib6_nh)3536*4882a593Smuzhiyun void fib6_nh_release(struct fib6_nh *fib6_nh)
3537*4882a593Smuzhiyun {
3538*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
3539*4882a593Smuzhiyun
3540*4882a593Smuzhiyun rcu_read_lock();
3541*4882a593Smuzhiyun
3542*4882a593Smuzhiyun fib6_nh_flush_exceptions(fib6_nh, NULL);
3543*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
3544*4882a593Smuzhiyun if (bucket) {
3545*4882a593Smuzhiyun rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
3546*4882a593Smuzhiyun kfree(bucket);
3547*4882a593Smuzhiyun }
3548*4882a593Smuzhiyun
3549*4882a593Smuzhiyun rcu_read_unlock();
3550*4882a593Smuzhiyun
3551*4882a593Smuzhiyun if (fib6_nh->rt6i_pcpu) {
3552*4882a593Smuzhiyun int cpu;
3553*4882a593Smuzhiyun
3554*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
3555*4882a593Smuzhiyun struct rt6_info **ppcpu_rt;
3556*4882a593Smuzhiyun struct rt6_info *pcpu_rt;
3557*4882a593Smuzhiyun
3558*4882a593Smuzhiyun ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
3559*4882a593Smuzhiyun pcpu_rt = *ppcpu_rt;
3560*4882a593Smuzhiyun if (pcpu_rt) {
3561*4882a593Smuzhiyun dst_dev_put(&pcpu_rt->dst);
3562*4882a593Smuzhiyun dst_release(&pcpu_rt->dst);
3563*4882a593Smuzhiyun *ppcpu_rt = NULL;
3564*4882a593Smuzhiyun }
3565*4882a593Smuzhiyun }
3566*4882a593Smuzhiyun
3567*4882a593Smuzhiyun free_percpu(fib6_nh->rt6i_pcpu);
3568*4882a593Smuzhiyun }
3569*4882a593Smuzhiyun
3570*4882a593Smuzhiyun fib_nh_common_release(&fib6_nh->nh_common);
3571*4882a593Smuzhiyun }
3572*4882a593Smuzhiyun
ip6_route_info_create(struct fib6_config * cfg,gfp_t gfp_flags,struct netlink_ext_ack * extack)3573*4882a593Smuzhiyun static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3574*4882a593Smuzhiyun gfp_t gfp_flags,
3575*4882a593Smuzhiyun struct netlink_ext_ack *extack)
3576*4882a593Smuzhiyun {
3577*4882a593Smuzhiyun struct net *net = cfg->fc_nlinfo.nl_net;
3578*4882a593Smuzhiyun struct fib6_info *rt = NULL;
3579*4882a593Smuzhiyun struct nexthop *nh = NULL;
3580*4882a593Smuzhiyun struct fib6_table *table;
3581*4882a593Smuzhiyun struct fib6_nh *fib6_nh;
3582*4882a593Smuzhiyun int err = -EINVAL;
3583*4882a593Smuzhiyun int addr_type;
3584*4882a593Smuzhiyun
3585*4882a593Smuzhiyun /* RTF_PCPU is an internal flag; can not be set by userspace */
3586*4882a593Smuzhiyun if (cfg->fc_flags & RTF_PCPU) {
3587*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3588*4882a593Smuzhiyun goto out;
3589*4882a593Smuzhiyun }
3590*4882a593Smuzhiyun
3591*4882a593Smuzhiyun /* RTF_CACHE is an internal flag; can not be set by userspace */
3592*4882a593Smuzhiyun if (cfg->fc_flags & RTF_CACHE) {
3593*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3594*4882a593Smuzhiyun goto out;
3595*4882a593Smuzhiyun }
3596*4882a593Smuzhiyun
3597*4882a593Smuzhiyun if (cfg->fc_type > RTN_MAX) {
3598*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid route type");
3599*4882a593Smuzhiyun goto out;
3600*4882a593Smuzhiyun }
3601*4882a593Smuzhiyun
3602*4882a593Smuzhiyun if (cfg->fc_dst_len > 128) {
3603*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid prefix length");
3604*4882a593Smuzhiyun goto out;
3605*4882a593Smuzhiyun }
3606*4882a593Smuzhiyun if (cfg->fc_src_len > 128) {
3607*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid source address length");
3608*4882a593Smuzhiyun goto out;
3609*4882a593Smuzhiyun }
3610*4882a593Smuzhiyun #ifndef CONFIG_IPV6_SUBTREES
3611*4882a593Smuzhiyun if (cfg->fc_src_len) {
3612*4882a593Smuzhiyun NL_SET_ERR_MSG(extack,
3613*4882a593Smuzhiyun "Specifying source address requires IPV6_SUBTREES to be enabled");
3614*4882a593Smuzhiyun goto out;
3615*4882a593Smuzhiyun }
3616*4882a593Smuzhiyun #endif
3617*4882a593Smuzhiyun if (cfg->fc_nh_id) {
3618*4882a593Smuzhiyun nh = nexthop_find_by_id(net, cfg->fc_nh_id);
3619*4882a593Smuzhiyun if (!nh) {
3620*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
3621*4882a593Smuzhiyun goto out;
3622*4882a593Smuzhiyun }
3623*4882a593Smuzhiyun err = fib6_check_nexthop(nh, cfg, extack);
3624*4882a593Smuzhiyun if (err)
3625*4882a593Smuzhiyun goto out;
3626*4882a593Smuzhiyun }
3627*4882a593Smuzhiyun
3628*4882a593Smuzhiyun err = -ENOBUFS;
3629*4882a593Smuzhiyun if (cfg->fc_nlinfo.nlh &&
3630*4882a593Smuzhiyun !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3631*4882a593Smuzhiyun table = fib6_get_table(net, cfg->fc_table);
3632*4882a593Smuzhiyun if (!table) {
3633*4882a593Smuzhiyun pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3634*4882a593Smuzhiyun table = fib6_new_table(net, cfg->fc_table);
3635*4882a593Smuzhiyun }
3636*4882a593Smuzhiyun } else {
3637*4882a593Smuzhiyun table = fib6_new_table(net, cfg->fc_table);
3638*4882a593Smuzhiyun }
3639*4882a593Smuzhiyun
3640*4882a593Smuzhiyun if (!table)
3641*4882a593Smuzhiyun goto out;
3642*4882a593Smuzhiyun
3643*4882a593Smuzhiyun err = -ENOMEM;
3644*4882a593Smuzhiyun rt = fib6_info_alloc(gfp_flags, !nh);
3645*4882a593Smuzhiyun if (!rt)
3646*4882a593Smuzhiyun goto out;
3647*4882a593Smuzhiyun
3648*4882a593Smuzhiyun rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3649*4882a593Smuzhiyun extack);
3650*4882a593Smuzhiyun if (IS_ERR(rt->fib6_metrics)) {
3651*4882a593Smuzhiyun err = PTR_ERR(rt->fib6_metrics);
3652*4882a593Smuzhiyun /* Do not leave garbage there. */
3653*4882a593Smuzhiyun rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3654*4882a593Smuzhiyun goto out_free;
3655*4882a593Smuzhiyun }
3656*4882a593Smuzhiyun
3657*4882a593Smuzhiyun if (cfg->fc_flags & RTF_ADDRCONF)
3658*4882a593Smuzhiyun rt->dst_nocount = true;
3659*4882a593Smuzhiyun
3660*4882a593Smuzhiyun if (cfg->fc_flags & RTF_EXPIRES)
3661*4882a593Smuzhiyun fib6_set_expires(rt, jiffies +
3662*4882a593Smuzhiyun clock_t_to_jiffies(cfg->fc_expires));
3663*4882a593Smuzhiyun else
3664*4882a593Smuzhiyun fib6_clean_expires(rt);
3665*4882a593Smuzhiyun
3666*4882a593Smuzhiyun if (cfg->fc_protocol == RTPROT_UNSPEC)
3667*4882a593Smuzhiyun cfg->fc_protocol = RTPROT_BOOT;
3668*4882a593Smuzhiyun rt->fib6_protocol = cfg->fc_protocol;
3669*4882a593Smuzhiyun
3670*4882a593Smuzhiyun rt->fib6_table = table;
3671*4882a593Smuzhiyun rt->fib6_metric = cfg->fc_metric;
3672*4882a593Smuzhiyun rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3673*4882a593Smuzhiyun rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
3674*4882a593Smuzhiyun
3675*4882a593Smuzhiyun ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3676*4882a593Smuzhiyun rt->fib6_dst.plen = cfg->fc_dst_len;
3677*4882a593Smuzhiyun
3678*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
3679*4882a593Smuzhiyun ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3680*4882a593Smuzhiyun rt->fib6_src.plen = cfg->fc_src_len;
3681*4882a593Smuzhiyun #endif
3682*4882a593Smuzhiyun if (nh) {
3683*4882a593Smuzhiyun if (rt->fib6_src.plen) {
3684*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
3685*4882a593Smuzhiyun goto out_free;
3686*4882a593Smuzhiyun }
3687*4882a593Smuzhiyun if (!nexthop_get(nh)) {
3688*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
3689*4882a593Smuzhiyun goto out_free;
3690*4882a593Smuzhiyun }
3691*4882a593Smuzhiyun rt->nh = nh;
3692*4882a593Smuzhiyun fib6_nh = nexthop_fib6_nh(rt->nh);
3693*4882a593Smuzhiyun } else {
3694*4882a593Smuzhiyun err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
3695*4882a593Smuzhiyun if (err)
3696*4882a593Smuzhiyun goto out;
3697*4882a593Smuzhiyun
3698*4882a593Smuzhiyun fib6_nh = rt->fib6_nh;
3699*4882a593Smuzhiyun
3700*4882a593Smuzhiyun /* We cannot add true routes via loopback here, they would
3701*4882a593Smuzhiyun * result in kernel looping; promote them to reject routes
3702*4882a593Smuzhiyun */
3703*4882a593Smuzhiyun addr_type = ipv6_addr_type(&cfg->fc_dst);
3704*4882a593Smuzhiyun if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev,
3705*4882a593Smuzhiyun addr_type))
3706*4882a593Smuzhiyun rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3707*4882a593Smuzhiyun }
3708*4882a593Smuzhiyun
3709*4882a593Smuzhiyun if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3710*4882a593Smuzhiyun struct net_device *dev = fib6_nh->fib_nh_dev;
3711*4882a593Smuzhiyun
3712*4882a593Smuzhiyun if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3713*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid source address");
3714*4882a593Smuzhiyun err = -EINVAL;
3715*4882a593Smuzhiyun goto out;
3716*4882a593Smuzhiyun }
3717*4882a593Smuzhiyun rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3718*4882a593Smuzhiyun rt->fib6_prefsrc.plen = 128;
3719*4882a593Smuzhiyun } else
3720*4882a593Smuzhiyun rt->fib6_prefsrc.plen = 0;
3721*4882a593Smuzhiyun
3722*4882a593Smuzhiyun return rt;
3723*4882a593Smuzhiyun out:
3724*4882a593Smuzhiyun fib6_info_release(rt);
3725*4882a593Smuzhiyun return ERR_PTR(err);
3726*4882a593Smuzhiyun out_free:
3727*4882a593Smuzhiyun ip_fib_metrics_put(rt->fib6_metrics);
3728*4882a593Smuzhiyun kfree(rt);
3729*4882a593Smuzhiyun return ERR_PTR(err);
3730*4882a593Smuzhiyun }
3731*4882a593Smuzhiyun
ip6_route_add(struct fib6_config * cfg,gfp_t gfp_flags,struct netlink_ext_ack * extack)3732*4882a593Smuzhiyun int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3733*4882a593Smuzhiyun struct netlink_ext_ack *extack)
3734*4882a593Smuzhiyun {
3735*4882a593Smuzhiyun struct fib6_info *rt;
3736*4882a593Smuzhiyun int err;
3737*4882a593Smuzhiyun
3738*4882a593Smuzhiyun rt = ip6_route_info_create(cfg, gfp_flags, extack);
3739*4882a593Smuzhiyun if (IS_ERR(rt))
3740*4882a593Smuzhiyun return PTR_ERR(rt);
3741*4882a593Smuzhiyun
3742*4882a593Smuzhiyun err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3743*4882a593Smuzhiyun fib6_info_release(rt);
3744*4882a593Smuzhiyun
3745*4882a593Smuzhiyun return err;
3746*4882a593Smuzhiyun }
3747*4882a593Smuzhiyun
__ip6_del_rt(struct fib6_info * rt,struct nl_info * info)3748*4882a593Smuzhiyun static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3749*4882a593Smuzhiyun {
3750*4882a593Smuzhiyun struct net *net = info->nl_net;
3751*4882a593Smuzhiyun struct fib6_table *table;
3752*4882a593Smuzhiyun int err;
3753*4882a593Smuzhiyun
3754*4882a593Smuzhiyun if (rt == net->ipv6.fib6_null_entry) {
3755*4882a593Smuzhiyun err = -ENOENT;
3756*4882a593Smuzhiyun goto out;
3757*4882a593Smuzhiyun }
3758*4882a593Smuzhiyun
3759*4882a593Smuzhiyun table = rt->fib6_table;
3760*4882a593Smuzhiyun spin_lock_bh(&table->tb6_lock);
3761*4882a593Smuzhiyun err = fib6_del(rt, info);
3762*4882a593Smuzhiyun spin_unlock_bh(&table->tb6_lock);
3763*4882a593Smuzhiyun
3764*4882a593Smuzhiyun out:
3765*4882a593Smuzhiyun fib6_info_release(rt);
3766*4882a593Smuzhiyun return err;
3767*4882a593Smuzhiyun }
3768*4882a593Smuzhiyun
ip6_del_rt(struct net * net,struct fib6_info * rt,bool skip_notify)3769*4882a593Smuzhiyun int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
3770*4882a593Smuzhiyun {
3771*4882a593Smuzhiyun struct nl_info info = {
3772*4882a593Smuzhiyun .nl_net = net,
3773*4882a593Smuzhiyun .skip_notify = skip_notify
3774*4882a593Smuzhiyun };
3775*4882a593Smuzhiyun
3776*4882a593Smuzhiyun return __ip6_del_rt(rt, &info);
3777*4882a593Smuzhiyun }
3778*4882a593Smuzhiyun
__ip6_del_rt_siblings(struct fib6_info * rt,struct fib6_config * cfg)3779*4882a593Smuzhiyun static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3780*4882a593Smuzhiyun {
3781*4882a593Smuzhiyun struct nl_info *info = &cfg->fc_nlinfo;
3782*4882a593Smuzhiyun struct net *net = info->nl_net;
3783*4882a593Smuzhiyun struct sk_buff *skb = NULL;
3784*4882a593Smuzhiyun struct fib6_table *table;
3785*4882a593Smuzhiyun int err = -ENOENT;
3786*4882a593Smuzhiyun
3787*4882a593Smuzhiyun if (rt == net->ipv6.fib6_null_entry)
3788*4882a593Smuzhiyun goto out_put;
3789*4882a593Smuzhiyun table = rt->fib6_table;
3790*4882a593Smuzhiyun spin_lock_bh(&table->tb6_lock);
3791*4882a593Smuzhiyun
3792*4882a593Smuzhiyun if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3793*4882a593Smuzhiyun struct fib6_info *sibling, *next_sibling;
3794*4882a593Smuzhiyun struct fib6_node *fn;
3795*4882a593Smuzhiyun
3796*4882a593Smuzhiyun /* prefer to send a single notification with all hops */
3797*4882a593Smuzhiyun skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3798*4882a593Smuzhiyun if (skb) {
3799*4882a593Smuzhiyun u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3800*4882a593Smuzhiyun
3801*4882a593Smuzhiyun if (rt6_fill_node(net, skb, rt, NULL,
3802*4882a593Smuzhiyun NULL, NULL, 0, RTM_DELROUTE,
3803*4882a593Smuzhiyun info->portid, seq, 0) < 0) {
3804*4882a593Smuzhiyun kfree_skb(skb);
3805*4882a593Smuzhiyun skb = NULL;
3806*4882a593Smuzhiyun } else
3807*4882a593Smuzhiyun info->skip_notify = 1;
3808*4882a593Smuzhiyun }
3809*4882a593Smuzhiyun
3810*4882a593Smuzhiyun /* 'rt' points to the first sibling route. If it is not the
3811*4882a593Smuzhiyun * leaf, then we do not need to send a notification. Otherwise,
3812*4882a593Smuzhiyun * we need to check if the last sibling has a next route or not
3813*4882a593Smuzhiyun * and emit a replace or delete notification, respectively.
3814*4882a593Smuzhiyun */
3815*4882a593Smuzhiyun info->skip_notify_kernel = 1;
3816*4882a593Smuzhiyun fn = rcu_dereference_protected(rt->fib6_node,
3817*4882a593Smuzhiyun lockdep_is_held(&table->tb6_lock));
3818*4882a593Smuzhiyun if (rcu_access_pointer(fn->leaf) == rt) {
3819*4882a593Smuzhiyun struct fib6_info *last_sibling, *replace_rt;
3820*4882a593Smuzhiyun
3821*4882a593Smuzhiyun last_sibling = list_last_entry(&rt->fib6_siblings,
3822*4882a593Smuzhiyun struct fib6_info,
3823*4882a593Smuzhiyun fib6_siblings);
3824*4882a593Smuzhiyun replace_rt = rcu_dereference_protected(
3825*4882a593Smuzhiyun last_sibling->fib6_next,
3826*4882a593Smuzhiyun lockdep_is_held(&table->tb6_lock));
3827*4882a593Smuzhiyun if (replace_rt)
3828*4882a593Smuzhiyun call_fib6_entry_notifiers_replace(net,
3829*4882a593Smuzhiyun replace_rt);
3830*4882a593Smuzhiyun else
3831*4882a593Smuzhiyun call_fib6_multipath_entry_notifiers(net,
3832*4882a593Smuzhiyun FIB_EVENT_ENTRY_DEL,
3833*4882a593Smuzhiyun rt, rt->fib6_nsiblings,
3834*4882a593Smuzhiyun NULL);
3835*4882a593Smuzhiyun }
3836*4882a593Smuzhiyun list_for_each_entry_safe(sibling, next_sibling,
3837*4882a593Smuzhiyun &rt->fib6_siblings,
3838*4882a593Smuzhiyun fib6_siblings) {
3839*4882a593Smuzhiyun err = fib6_del(sibling, info);
3840*4882a593Smuzhiyun if (err)
3841*4882a593Smuzhiyun goto out_unlock;
3842*4882a593Smuzhiyun }
3843*4882a593Smuzhiyun }
3844*4882a593Smuzhiyun
3845*4882a593Smuzhiyun err = fib6_del(rt, info);
3846*4882a593Smuzhiyun out_unlock:
3847*4882a593Smuzhiyun spin_unlock_bh(&table->tb6_lock);
3848*4882a593Smuzhiyun out_put:
3849*4882a593Smuzhiyun fib6_info_release(rt);
3850*4882a593Smuzhiyun
3851*4882a593Smuzhiyun if (skb) {
3852*4882a593Smuzhiyun rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3853*4882a593Smuzhiyun info->nlh, gfp_any());
3854*4882a593Smuzhiyun }
3855*4882a593Smuzhiyun return err;
3856*4882a593Smuzhiyun }
3857*4882a593Smuzhiyun
__ip6_del_cached_rt(struct rt6_info * rt,struct fib6_config * cfg)3858*4882a593Smuzhiyun static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3859*4882a593Smuzhiyun {
3860*4882a593Smuzhiyun int rc = -ESRCH;
3861*4882a593Smuzhiyun
3862*4882a593Smuzhiyun if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3863*4882a593Smuzhiyun goto out;
3864*4882a593Smuzhiyun
3865*4882a593Smuzhiyun if (cfg->fc_flags & RTF_GATEWAY &&
3866*4882a593Smuzhiyun !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3867*4882a593Smuzhiyun goto out;
3868*4882a593Smuzhiyun
3869*4882a593Smuzhiyun rc = rt6_remove_exception_rt(rt);
3870*4882a593Smuzhiyun out:
3871*4882a593Smuzhiyun return rc;
3872*4882a593Smuzhiyun }
3873*4882a593Smuzhiyun
ip6_del_cached_rt(struct fib6_config * cfg,struct fib6_info * rt,struct fib6_nh * nh)3874*4882a593Smuzhiyun static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
3875*4882a593Smuzhiyun struct fib6_nh *nh)
3876*4882a593Smuzhiyun {
3877*4882a593Smuzhiyun struct fib6_result res = {
3878*4882a593Smuzhiyun .f6i = rt,
3879*4882a593Smuzhiyun .nh = nh,
3880*4882a593Smuzhiyun };
3881*4882a593Smuzhiyun struct rt6_info *rt_cache;
3882*4882a593Smuzhiyun
3883*4882a593Smuzhiyun rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
3884*4882a593Smuzhiyun if (rt_cache)
3885*4882a593Smuzhiyun return __ip6_del_cached_rt(rt_cache, cfg);
3886*4882a593Smuzhiyun
3887*4882a593Smuzhiyun return 0;
3888*4882a593Smuzhiyun }
3889*4882a593Smuzhiyun
3890*4882a593Smuzhiyun struct fib6_nh_del_cached_rt_arg {
3891*4882a593Smuzhiyun struct fib6_config *cfg;
3892*4882a593Smuzhiyun struct fib6_info *f6i;
3893*4882a593Smuzhiyun };
3894*4882a593Smuzhiyun
fib6_nh_del_cached_rt(struct fib6_nh * nh,void * _arg)3895*4882a593Smuzhiyun static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
3896*4882a593Smuzhiyun {
3897*4882a593Smuzhiyun struct fib6_nh_del_cached_rt_arg *arg = _arg;
3898*4882a593Smuzhiyun int rc;
3899*4882a593Smuzhiyun
3900*4882a593Smuzhiyun rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
3901*4882a593Smuzhiyun return rc != -ESRCH ? rc : 0;
3902*4882a593Smuzhiyun }
3903*4882a593Smuzhiyun
ip6_del_cached_rt_nh(struct fib6_config * cfg,struct fib6_info * f6i)3904*4882a593Smuzhiyun static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
3905*4882a593Smuzhiyun {
3906*4882a593Smuzhiyun struct fib6_nh_del_cached_rt_arg arg = {
3907*4882a593Smuzhiyun .cfg = cfg,
3908*4882a593Smuzhiyun .f6i = f6i
3909*4882a593Smuzhiyun };
3910*4882a593Smuzhiyun
3911*4882a593Smuzhiyun return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
3912*4882a593Smuzhiyun }
3913*4882a593Smuzhiyun
ip6_route_del(struct fib6_config * cfg,struct netlink_ext_ack * extack)3914*4882a593Smuzhiyun static int ip6_route_del(struct fib6_config *cfg,
3915*4882a593Smuzhiyun struct netlink_ext_ack *extack)
3916*4882a593Smuzhiyun {
3917*4882a593Smuzhiyun struct fib6_table *table;
3918*4882a593Smuzhiyun struct fib6_info *rt;
3919*4882a593Smuzhiyun struct fib6_node *fn;
3920*4882a593Smuzhiyun int err = -ESRCH;
3921*4882a593Smuzhiyun
3922*4882a593Smuzhiyun table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3923*4882a593Smuzhiyun if (!table) {
3924*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "FIB table does not exist");
3925*4882a593Smuzhiyun return err;
3926*4882a593Smuzhiyun }
3927*4882a593Smuzhiyun
3928*4882a593Smuzhiyun rcu_read_lock();
3929*4882a593Smuzhiyun
3930*4882a593Smuzhiyun fn = fib6_locate(&table->tb6_root,
3931*4882a593Smuzhiyun &cfg->fc_dst, cfg->fc_dst_len,
3932*4882a593Smuzhiyun &cfg->fc_src, cfg->fc_src_len,
3933*4882a593Smuzhiyun !(cfg->fc_flags & RTF_CACHE));
3934*4882a593Smuzhiyun
3935*4882a593Smuzhiyun if (fn) {
3936*4882a593Smuzhiyun for_each_fib6_node_rt_rcu(fn) {
3937*4882a593Smuzhiyun struct fib6_nh *nh;
3938*4882a593Smuzhiyun
3939*4882a593Smuzhiyun if (rt->nh && cfg->fc_nh_id &&
3940*4882a593Smuzhiyun rt->nh->id != cfg->fc_nh_id)
3941*4882a593Smuzhiyun continue;
3942*4882a593Smuzhiyun
3943*4882a593Smuzhiyun if (cfg->fc_flags & RTF_CACHE) {
3944*4882a593Smuzhiyun int rc = 0;
3945*4882a593Smuzhiyun
3946*4882a593Smuzhiyun if (rt->nh) {
3947*4882a593Smuzhiyun rc = ip6_del_cached_rt_nh(cfg, rt);
3948*4882a593Smuzhiyun } else if (cfg->fc_nh_id) {
3949*4882a593Smuzhiyun continue;
3950*4882a593Smuzhiyun } else {
3951*4882a593Smuzhiyun nh = rt->fib6_nh;
3952*4882a593Smuzhiyun rc = ip6_del_cached_rt(cfg, rt, nh);
3953*4882a593Smuzhiyun }
3954*4882a593Smuzhiyun if (rc != -ESRCH) {
3955*4882a593Smuzhiyun rcu_read_unlock();
3956*4882a593Smuzhiyun return rc;
3957*4882a593Smuzhiyun }
3958*4882a593Smuzhiyun continue;
3959*4882a593Smuzhiyun }
3960*4882a593Smuzhiyun
3961*4882a593Smuzhiyun if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3962*4882a593Smuzhiyun continue;
3963*4882a593Smuzhiyun if (cfg->fc_protocol &&
3964*4882a593Smuzhiyun cfg->fc_protocol != rt->fib6_protocol)
3965*4882a593Smuzhiyun continue;
3966*4882a593Smuzhiyun
3967*4882a593Smuzhiyun if (rt->nh) {
3968*4882a593Smuzhiyun if (!fib6_info_hold_safe(rt))
3969*4882a593Smuzhiyun continue;
3970*4882a593Smuzhiyun rcu_read_unlock();
3971*4882a593Smuzhiyun
3972*4882a593Smuzhiyun return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3973*4882a593Smuzhiyun }
3974*4882a593Smuzhiyun if (cfg->fc_nh_id)
3975*4882a593Smuzhiyun continue;
3976*4882a593Smuzhiyun
3977*4882a593Smuzhiyun nh = rt->fib6_nh;
3978*4882a593Smuzhiyun if (cfg->fc_ifindex &&
3979*4882a593Smuzhiyun (!nh->fib_nh_dev ||
3980*4882a593Smuzhiyun nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
3981*4882a593Smuzhiyun continue;
3982*4882a593Smuzhiyun if (cfg->fc_flags & RTF_GATEWAY &&
3983*4882a593Smuzhiyun !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
3984*4882a593Smuzhiyun continue;
3985*4882a593Smuzhiyun if (!fib6_info_hold_safe(rt))
3986*4882a593Smuzhiyun continue;
3987*4882a593Smuzhiyun rcu_read_unlock();
3988*4882a593Smuzhiyun
3989*4882a593Smuzhiyun /* if gateway was specified only delete the one hop */
3990*4882a593Smuzhiyun if (cfg->fc_flags & RTF_GATEWAY)
3991*4882a593Smuzhiyun return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3992*4882a593Smuzhiyun
3993*4882a593Smuzhiyun return __ip6_del_rt_siblings(rt, cfg);
3994*4882a593Smuzhiyun }
3995*4882a593Smuzhiyun }
3996*4882a593Smuzhiyun rcu_read_unlock();
3997*4882a593Smuzhiyun
3998*4882a593Smuzhiyun return err;
3999*4882a593Smuzhiyun }
4000*4882a593Smuzhiyun
rt6_do_redirect(struct dst_entry * dst,struct sock * sk,struct sk_buff * skb)4001*4882a593Smuzhiyun static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
4002*4882a593Smuzhiyun {
4003*4882a593Smuzhiyun struct netevent_redirect netevent;
4004*4882a593Smuzhiyun struct rt6_info *rt, *nrt = NULL;
4005*4882a593Smuzhiyun struct fib6_result res = {};
4006*4882a593Smuzhiyun struct ndisc_options ndopts;
4007*4882a593Smuzhiyun struct inet6_dev *in6_dev;
4008*4882a593Smuzhiyun struct neighbour *neigh;
4009*4882a593Smuzhiyun struct rd_msg *msg;
4010*4882a593Smuzhiyun int optlen, on_link;
4011*4882a593Smuzhiyun u8 *lladdr;
4012*4882a593Smuzhiyun
4013*4882a593Smuzhiyun optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
4014*4882a593Smuzhiyun optlen -= sizeof(*msg);
4015*4882a593Smuzhiyun
4016*4882a593Smuzhiyun if (optlen < 0) {
4017*4882a593Smuzhiyun net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
4018*4882a593Smuzhiyun return;
4019*4882a593Smuzhiyun }
4020*4882a593Smuzhiyun
4021*4882a593Smuzhiyun msg = (struct rd_msg *)icmp6_hdr(skb);
4022*4882a593Smuzhiyun
4023*4882a593Smuzhiyun if (ipv6_addr_is_multicast(&msg->dest)) {
4024*4882a593Smuzhiyun net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
4025*4882a593Smuzhiyun return;
4026*4882a593Smuzhiyun }
4027*4882a593Smuzhiyun
4028*4882a593Smuzhiyun on_link = 0;
4029*4882a593Smuzhiyun if (ipv6_addr_equal(&msg->dest, &msg->target)) {
4030*4882a593Smuzhiyun on_link = 1;
4031*4882a593Smuzhiyun } else if (ipv6_addr_type(&msg->target) !=
4032*4882a593Smuzhiyun (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
4033*4882a593Smuzhiyun net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
4034*4882a593Smuzhiyun return;
4035*4882a593Smuzhiyun }
4036*4882a593Smuzhiyun
4037*4882a593Smuzhiyun in6_dev = __in6_dev_get(skb->dev);
4038*4882a593Smuzhiyun if (!in6_dev)
4039*4882a593Smuzhiyun return;
4040*4882a593Smuzhiyun if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
4041*4882a593Smuzhiyun return;
4042*4882a593Smuzhiyun
4043*4882a593Smuzhiyun /* RFC2461 8.1:
4044*4882a593Smuzhiyun * The IP source address of the Redirect MUST be the same as the current
4045*4882a593Smuzhiyun * first-hop router for the specified ICMP Destination Address.
4046*4882a593Smuzhiyun */
4047*4882a593Smuzhiyun
4048*4882a593Smuzhiyun if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
4049*4882a593Smuzhiyun net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
4050*4882a593Smuzhiyun return;
4051*4882a593Smuzhiyun }
4052*4882a593Smuzhiyun
4053*4882a593Smuzhiyun lladdr = NULL;
4054*4882a593Smuzhiyun if (ndopts.nd_opts_tgt_lladdr) {
4055*4882a593Smuzhiyun lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
4056*4882a593Smuzhiyun skb->dev);
4057*4882a593Smuzhiyun if (!lladdr) {
4058*4882a593Smuzhiyun net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
4059*4882a593Smuzhiyun return;
4060*4882a593Smuzhiyun }
4061*4882a593Smuzhiyun }
4062*4882a593Smuzhiyun
4063*4882a593Smuzhiyun rt = (struct rt6_info *) dst;
4064*4882a593Smuzhiyun if (rt->rt6i_flags & RTF_REJECT) {
4065*4882a593Smuzhiyun net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
4066*4882a593Smuzhiyun return;
4067*4882a593Smuzhiyun }
4068*4882a593Smuzhiyun
4069*4882a593Smuzhiyun /* Redirect received -> path was valid.
4070*4882a593Smuzhiyun * Look, redirects are sent only in response to data packets,
4071*4882a593Smuzhiyun * so that this nexthop apparently is reachable. --ANK
4072*4882a593Smuzhiyun */
4073*4882a593Smuzhiyun dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
4074*4882a593Smuzhiyun
4075*4882a593Smuzhiyun neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
4076*4882a593Smuzhiyun if (!neigh)
4077*4882a593Smuzhiyun return;
4078*4882a593Smuzhiyun
4079*4882a593Smuzhiyun /*
4080*4882a593Smuzhiyun * We have finally decided to accept it.
4081*4882a593Smuzhiyun */
4082*4882a593Smuzhiyun
4083*4882a593Smuzhiyun ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
4084*4882a593Smuzhiyun NEIGH_UPDATE_F_WEAK_OVERRIDE|
4085*4882a593Smuzhiyun NEIGH_UPDATE_F_OVERRIDE|
4086*4882a593Smuzhiyun (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
4087*4882a593Smuzhiyun NEIGH_UPDATE_F_ISROUTER)),
4088*4882a593Smuzhiyun NDISC_REDIRECT, &ndopts);
4089*4882a593Smuzhiyun
4090*4882a593Smuzhiyun rcu_read_lock();
4091*4882a593Smuzhiyun res.f6i = rcu_dereference(rt->from);
4092*4882a593Smuzhiyun if (!res.f6i)
4093*4882a593Smuzhiyun goto out;
4094*4882a593Smuzhiyun
4095*4882a593Smuzhiyun if (res.f6i->nh) {
4096*4882a593Smuzhiyun struct fib6_nh_match_arg arg = {
4097*4882a593Smuzhiyun .dev = dst->dev,
4098*4882a593Smuzhiyun .gw = &rt->rt6i_gateway,
4099*4882a593Smuzhiyun };
4100*4882a593Smuzhiyun
4101*4882a593Smuzhiyun nexthop_for_each_fib6_nh(res.f6i->nh,
4102*4882a593Smuzhiyun fib6_nh_find_match, &arg);
4103*4882a593Smuzhiyun
4104*4882a593Smuzhiyun /* fib6_info uses a nexthop that does not have fib6_nh
4105*4882a593Smuzhiyun * using the dst->dev. Should be impossible
4106*4882a593Smuzhiyun */
4107*4882a593Smuzhiyun if (!arg.match)
4108*4882a593Smuzhiyun goto out;
4109*4882a593Smuzhiyun res.nh = arg.match;
4110*4882a593Smuzhiyun } else {
4111*4882a593Smuzhiyun res.nh = res.f6i->fib6_nh;
4112*4882a593Smuzhiyun }
4113*4882a593Smuzhiyun
4114*4882a593Smuzhiyun res.fib6_flags = res.f6i->fib6_flags;
4115*4882a593Smuzhiyun res.fib6_type = res.f6i->fib6_type;
4116*4882a593Smuzhiyun nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
4117*4882a593Smuzhiyun if (!nrt)
4118*4882a593Smuzhiyun goto out;
4119*4882a593Smuzhiyun
4120*4882a593Smuzhiyun nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
4121*4882a593Smuzhiyun if (on_link)
4122*4882a593Smuzhiyun nrt->rt6i_flags &= ~RTF_GATEWAY;
4123*4882a593Smuzhiyun
4124*4882a593Smuzhiyun nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
4125*4882a593Smuzhiyun
4126*4882a593Smuzhiyun /* rt6_insert_exception() will take care of duplicated exceptions */
4127*4882a593Smuzhiyun if (rt6_insert_exception(nrt, &res)) {
4128*4882a593Smuzhiyun dst_release_immediate(&nrt->dst);
4129*4882a593Smuzhiyun goto out;
4130*4882a593Smuzhiyun }
4131*4882a593Smuzhiyun
4132*4882a593Smuzhiyun netevent.old = &rt->dst;
4133*4882a593Smuzhiyun netevent.new = &nrt->dst;
4134*4882a593Smuzhiyun netevent.daddr = &msg->dest;
4135*4882a593Smuzhiyun netevent.neigh = neigh;
4136*4882a593Smuzhiyun call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
4137*4882a593Smuzhiyun
4138*4882a593Smuzhiyun out:
4139*4882a593Smuzhiyun rcu_read_unlock();
4140*4882a593Smuzhiyun neigh_release(neigh);
4141*4882a593Smuzhiyun }
4142*4882a593Smuzhiyun
4143*4882a593Smuzhiyun #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,struct net_device * dev)4144*4882a593Smuzhiyun static struct fib6_info *rt6_get_route_info(struct net *net,
4145*4882a593Smuzhiyun const struct in6_addr *prefix, int prefixlen,
4146*4882a593Smuzhiyun const struct in6_addr *gwaddr,
4147*4882a593Smuzhiyun struct net_device *dev)
4148*4882a593Smuzhiyun {
4149*4882a593Smuzhiyun u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
4150*4882a593Smuzhiyun int ifindex = dev->ifindex;
4151*4882a593Smuzhiyun struct fib6_node *fn;
4152*4882a593Smuzhiyun struct fib6_info *rt = NULL;
4153*4882a593Smuzhiyun struct fib6_table *table;
4154*4882a593Smuzhiyun
4155*4882a593Smuzhiyun table = fib6_get_table(net, tb_id);
4156*4882a593Smuzhiyun if (!table)
4157*4882a593Smuzhiyun return NULL;
4158*4882a593Smuzhiyun
4159*4882a593Smuzhiyun rcu_read_lock();
4160*4882a593Smuzhiyun fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
4161*4882a593Smuzhiyun if (!fn)
4162*4882a593Smuzhiyun goto out;
4163*4882a593Smuzhiyun
4164*4882a593Smuzhiyun for_each_fib6_node_rt_rcu(fn) {
4165*4882a593Smuzhiyun /* these routes do not use nexthops */
4166*4882a593Smuzhiyun if (rt->nh)
4167*4882a593Smuzhiyun continue;
4168*4882a593Smuzhiyun if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
4169*4882a593Smuzhiyun continue;
4170*4882a593Smuzhiyun if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
4171*4882a593Smuzhiyun !rt->fib6_nh->fib_nh_gw_family)
4172*4882a593Smuzhiyun continue;
4173*4882a593Smuzhiyun if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
4174*4882a593Smuzhiyun continue;
4175*4882a593Smuzhiyun if (!fib6_info_hold_safe(rt))
4176*4882a593Smuzhiyun continue;
4177*4882a593Smuzhiyun break;
4178*4882a593Smuzhiyun }
4179*4882a593Smuzhiyun out:
4180*4882a593Smuzhiyun rcu_read_unlock();
4181*4882a593Smuzhiyun return rt;
4182*4882a593Smuzhiyun }
4183*4882a593Smuzhiyun
rt6_add_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)4184*4882a593Smuzhiyun static struct fib6_info *rt6_add_route_info(struct net *net,
4185*4882a593Smuzhiyun const struct in6_addr *prefix, int prefixlen,
4186*4882a593Smuzhiyun const struct in6_addr *gwaddr,
4187*4882a593Smuzhiyun struct net_device *dev,
4188*4882a593Smuzhiyun unsigned int pref)
4189*4882a593Smuzhiyun {
4190*4882a593Smuzhiyun struct fib6_config cfg = {
4191*4882a593Smuzhiyun .fc_metric = IP6_RT_PRIO_USER,
4192*4882a593Smuzhiyun .fc_ifindex = dev->ifindex,
4193*4882a593Smuzhiyun .fc_dst_len = prefixlen,
4194*4882a593Smuzhiyun .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
4195*4882a593Smuzhiyun RTF_UP | RTF_PREF(pref),
4196*4882a593Smuzhiyun .fc_protocol = RTPROT_RA,
4197*4882a593Smuzhiyun .fc_type = RTN_UNICAST,
4198*4882a593Smuzhiyun .fc_nlinfo.portid = 0,
4199*4882a593Smuzhiyun .fc_nlinfo.nlh = NULL,
4200*4882a593Smuzhiyun .fc_nlinfo.nl_net = net,
4201*4882a593Smuzhiyun };
4202*4882a593Smuzhiyun
4203*4882a593Smuzhiyun cfg.fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_INFO);
4204*4882a593Smuzhiyun cfg.fc_dst = *prefix;
4205*4882a593Smuzhiyun cfg.fc_gateway = *gwaddr;
4206*4882a593Smuzhiyun
4207*4882a593Smuzhiyun /* We should treat it as a default route if prefix length is 0. */
4208*4882a593Smuzhiyun if (!prefixlen)
4209*4882a593Smuzhiyun cfg.fc_flags |= RTF_DEFAULT;
4210*4882a593Smuzhiyun
4211*4882a593Smuzhiyun ip6_route_add(&cfg, GFP_ATOMIC, NULL);
4212*4882a593Smuzhiyun
4213*4882a593Smuzhiyun return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
4214*4882a593Smuzhiyun }
4215*4882a593Smuzhiyun #endif
4216*4882a593Smuzhiyun
rt6_get_dflt_router(struct net * net,const struct in6_addr * addr,struct net_device * dev)4217*4882a593Smuzhiyun struct fib6_info *rt6_get_dflt_router(struct net *net,
4218*4882a593Smuzhiyun const struct in6_addr *addr,
4219*4882a593Smuzhiyun struct net_device *dev)
4220*4882a593Smuzhiyun {
4221*4882a593Smuzhiyun u32 tb_id = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT);
4222*4882a593Smuzhiyun struct fib6_info *rt;
4223*4882a593Smuzhiyun struct fib6_table *table;
4224*4882a593Smuzhiyun
4225*4882a593Smuzhiyun table = fib6_get_table(net, tb_id);
4226*4882a593Smuzhiyun if (!table)
4227*4882a593Smuzhiyun return NULL;
4228*4882a593Smuzhiyun
4229*4882a593Smuzhiyun rcu_read_lock();
4230*4882a593Smuzhiyun for_each_fib6_node_rt_rcu(&table->tb6_root) {
4231*4882a593Smuzhiyun struct fib6_nh *nh;
4232*4882a593Smuzhiyun
4233*4882a593Smuzhiyun /* RA routes do not use nexthops */
4234*4882a593Smuzhiyun if (rt->nh)
4235*4882a593Smuzhiyun continue;
4236*4882a593Smuzhiyun
4237*4882a593Smuzhiyun nh = rt->fib6_nh;
4238*4882a593Smuzhiyun if (dev == nh->fib_nh_dev &&
4239*4882a593Smuzhiyun ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
4240*4882a593Smuzhiyun ipv6_addr_equal(&nh->fib_nh_gw6, addr))
4241*4882a593Smuzhiyun break;
4242*4882a593Smuzhiyun }
4243*4882a593Smuzhiyun if (rt && !fib6_info_hold_safe(rt))
4244*4882a593Smuzhiyun rt = NULL;
4245*4882a593Smuzhiyun rcu_read_unlock();
4246*4882a593Smuzhiyun return rt;
4247*4882a593Smuzhiyun }
4248*4882a593Smuzhiyun
rt6_add_dflt_router(struct net * net,const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)4249*4882a593Smuzhiyun struct fib6_info *rt6_add_dflt_router(struct net *net,
4250*4882a593Smuzhiyun const struct in6_addr *gwaddr,
4251*4882a593Smuzhiyun struct net_device *dev,
4252*4882a593Smuzhiyun unsigned int pref)
4253*4882a593Smuzhiyun {
4254*4882a593Smuzhiyun struct fib6_config cfg = {
4255*4882a593Smuzhiyun .fc_table = l3mdev_fib_table(dev) ? : addrconf_rt_table(dev, RT6_TABLE_DFLT),
4256*4882a593Smuzhiyun .fc_metric = IP6_RT_PRIO_USER,
4257*4882a593Smuzhiyun .fc_ifindex = dev->ifindex,
4258*4882a593Smuzhiyun .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
4259*4882a593Smuzhiyun RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
4260*4882a593Smuzhiyun .fc_protocol = RTPROT_RA,
4261*4882a593Smuzhiyun .fc_type = RTN_UNICAST,
4262*4882a593Smuzhiyun .fc_nlinfo.portid = 0,
4263*4882a593Smuzhiyun .fc_nlinfo.nlh = NULL,
4264*4882a593Smuzhiyun .fc_nlinfo.nl_net = net,
4265*4882a593Smuzhiyun };
4266*4882a593Smuzhiyun
4267*4882a593Smuzhiyun cfg.fc_gateway = *gwaddr;
4268*4882a593Smuzhiyun
4269*4882a593Smuzhiyun if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
4270*4882a593Smuzhiyun struct fib6_table *table;
4271*4882a593Smuzhiyun
4272*4882a593Smuzhiyun table = fib6_get_table(dev_net(dev), cfg.fc_table);
4273*4882a593Smuzhiyun if (table)
4274*4882a593Smuzhiyun table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
4275*4882a593Smuzhiyun }
4276*4882a593Smuzhiyun
4277*4882a593Smuzhiyun return rt6_get_dflt_router(net, gwaddr, dev);
4278*4882a593Smuzhiyun }
4279*4882a593Smuzhiyun
rt6_addrconf_purge(struct fib6_info * rt,void * arg)4280*4882a593Smuzhiyun static int rt6_addrconf_purge(struct fib6_info *rt, void *arg)
4281*4882a593Smuzhiyun {
4282*4882a593Smuzhiyun struct net_device *dev = fib6_info_nh_dev(rt);
4283*4882a593Smuzhiyun struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
4284*4882a593Smuzhiyun
4285*4882a593Smuzhiyun if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
4286*4882a593Smuzhiyun (!idev || idev->cnf.accept_ra != 2)) {
4287*4882a593Smuzhiyun /* Delete this route. See fib6_clean_tree() */
4288*4882a593Smuzhiyun return -1;
4289*4882a593Smuzhiyun }
4290*4882a593Smuzhiyun
4291*4882a593Smuzhiyun /* Continue walking */
4292*4882a593Smuzhiyun return 0;
4293*4882a593Smuzhiyun }
4294*4882a593Smuzhiyun
rt6_purge_dflt_routers(struct net * net)4295*4882a593Smuzhiyun void rt6_purge_dflt_routers(struct net *net)
4296*4882a593Smuzhiyun {
4297*4882a593Smuzhiyun fib6_clean_all(net, rt6_addrconf_purge, NULL);
4298*4882a593Smuzhiyun }
4299*4882a593Smuzhiyun
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)4300*4882a593Smuzhiyun static void rtmsg_to_fib6_config(struct net *net,
4301*4882a593Smuzhiyun struct in6_rtmsg *rtmsg,
4302*4882a593Smuzhiyun struct fib6_config *cfg)
4303*4882a593Smuzhiyun {
4304*4882a593Smuzhiyun *cfg = (struct fib6_config){
4305*4882a593Smuzhiyun .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
4306*4882a593Smuzhiyun : RT6_TABLE_MAIN,
4307*4882a593Smuzhiyun .fc_ifindex = rtmsg->rtmsg_ifindex,
4308*4882a593Smuzhiyun .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
4309*4882a593Smuzhiyun .fc_expires = rtmsg->rtmsg_info,
4310*4882a593Smuzhiyun .fc_dst_len = rtmsg->rtmsg_dst_len,
4311*4882a593Smuzhiyun .fc_src_len = rtmsg->rtmsg_src_len,
4312*4882a593Smuzhiyun .fc_flags = rtmsg->rtmsg_flags,
4313*4882a593Smuzhiyun .fc_type = rtmsg->rtmsg_type,
4314*4882a593Smuzhiyun
4315*4882a593Smuzhiyun .fc_nlinfo.nl_net = net,
4316*4882a593Smuzhiyun
4317*4882a593Smuzhiyun .fc_dst = rtmsg->rtmsg_dst,
4318*4882a593Smuzhiyun .fc_src = rtmsg->rtmsg_src,
4319*4882a593Smuzhiyun .fc_gateway = rtmsg->rtmsg_gateway,
4320*4882a593Smuzhiyun };
4321*4882a593Smuzhiyun }
4322*4882a593Smuzhiyun
ipv6_route_ioctl(struct net * net,unsigned int cmd,struct in6_rtmsg * rtmsg)4323*4882a593Smuzhiyun int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
4324*4882a593Smuzhiyun {
4325*4882a593Smuzhiyun struct fib6_config cfg;
4326*4882a593Smuzhiyun int err;
4327*4882a593Smuzhiyun
4328*4882a593Smuzhiyun if (cmd != SIOCADDRT && cmd != SIOCDELRT)
4329*4882a593Smuzhiyun return -EINVAL;
4330*4882a593Smuzhiyun if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
4331*4882a593Smuzhiyun return -EPERM;
4332*4882a593Smuzhiyun
4333*4882a593Smuzhiyun rtmsg_to_fib6_config(net, rtmsg, &cfg);
4334*4882a593Smuzhiyun
4335*4882a593Smuzhiyun rtnl_lock();
4336*4882a593Smuzhiyun switch (cmd) {
4337*4882a593Smuzhiyun case SIOCADDRT:
4338*4882a593Smuzhiyun err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
4339*4882a593Smuzhiyun break;
4340*4882a593Smuzhiyun case SIOCDELRT:
4341*4882a593Smuzhiyun err = ip6_route_del(&cfg, NULL);
4342*4882a593Smuzhiyun break;
4343*4882a593Smuzhiyun }
4344*4882a593Smuzhiyun rtnl_unlock();
4345*4882a593Smuzhiyun return err;
4346*4882a593Smuzhiyun }
4347*4882a593Smuzhiyun
4348*4882a593Smuzhiyun /*
4349*4882a593Smuzhiyun * Drop the packet on the floor
4350*4882a593Smuzhiyun */
4351*4882a593Smuzhiyun
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)4352*4882a593Smuzhiyun static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
4353*4882a593Smuzhiyun {
4354*4882a593Smuzhiyun struct dst_entry *dst = skb_dst(skb);
4355*4882a593Smuzhiyun struct net *net = dev_net(dst->dev);
4356*4882a593Smuzhiyun struct inet6_dev *idev;
4357*4882a593Smuzhiyun int type;
4358*4882a593Smuzhiyun
4359*4882a593Smuzhiyun if (netif_is_l3_master(skb->dev) ||
4360*4882a593Smuzhiyun dst->dev == net->loopback_dev)
4361*4882a593Smuzhiyun idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
4362*4882a593Smuzhiyun else
4363*4882a593Smuzhiyun idev = ip6_dst_idev(dst);
4364*4882a593Smuzhiyun
4365*4882a593Smuzhiyun switch (ipstats_mib_noroutes) {
4366*4882a593Smuzhiyun case IPSTATS_MIB_INNOROUTES:
4367*4882a593Smuzhiyun type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
4368*4882a593Smuzhiyun if (type == IPV6_ADDR_ANY) {
4369*4882a593Smuzhiyun IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
4370*4882a593Smuzhiyun break;
4371*4882a593Smuzhiyun }
4372*4882a593Smuzhiyun fallthrough;
4373*4882a593Smuzhiyun case IPSTATS_MIB_OUTNOROUTES:
4374*4882a593Smuzhiyun IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
4375*4882a593Smuzhiyun break;
4376*4882a593Smuzhiyun }
4377*4882a593Smuzhiyun
4378*4882a593Smuzhiyun /* Start over by dropping the dst for l3mdev case */
4379*4882a593Smuzhiyun if (netif_is_l3_master(skb->dev))
4380*4882a593Smuzhiyun skb_dst_drop(skb);
4381*4882a593Smuzhiyun
4382*4882a593Smuzhiyun icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
4383*4882a593Smuzhiyun kfree_skb(skb);
4384*4882a593Smuzhiyun return 0;
4385*4882a593Smuzhiyun }
4386*4882a593Smuzhiyun
ip6_pkt_discard(struct sk_buff * skb)4387*4882a593Smuzhiyun static int ip6_pkt_discard(struct sk_buff *skb)
4388*4882a593Smuzhiyun {
4389*4882a593Smuzhiyun return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
4390*4882a593Smuzhiyun }
4391*4882a593Smuzhiyun
ip6_pkt_discard_out(struct net * net,struct sock * sk,struct sk_buff * skb)4392*4882a593Smuzhiyun static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4393*4882a593Smuzhiyun {
4394*4882a593Smuzhiyun skb->dev = skb_dst(skb)->dev;
4395*4882a593Smuzhiyun return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
4396*4882a593Smuzhiyun }
4397*4882a593Smuzhiyun
ip6_pkt_prohibit(struct sk_buff * skb)4398*4882a593Smuzhiyun static int ip6_pkt_prohibit(struct sk_buff *skb)
4399*4882a593Smuzhiyun {
4400*4882a593Smuzhiyun return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
4401*4882a593Smuzhiyun }
4402*4882a593Smuzhiyun
ip6_pkt_prohibit_out(struct net * net,struct sock * sk,struct sk_buff * skb)4403*4882a593Smuzhiyun static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
4404*4882a593Smuzhiyun {
4405*4882a593Smuzhiyun skb->dev = skb_dst(skb)->dev;
4406*4882a593Smuzhiyun return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
4407*4882a593Smuzhiyun }
4408*4882a593Smuzhiyun
4409*4882a593Smuzhiyun /*
4410*4882a593Smuzhiyun * Allocate a dst for local (unicast / anycast) address.
4411*4882a593Smuzhiyun */
4412*4882a593Smuzhiyun
addrconf_f6i_alloc(struct net * net,struct inet6_dev * idev,const struct in6_addr * addr,bool anycast,gfp_t gfp_flags)4413*4882a593Smuzhiyun struct fib6_info *addrconf_f6i_alloc(struct net *net,
4414*4882a593Smuzhiyun struct inet6_dev *idev,
4415*4882a593Smuzhiyun const struct in6_addr *addr,
4416*4882a593Smuzhiyun bool anycast, gfp_t gfp_flags)
4417*4882a593Smuzhiyun {
4418*4882a593Smuzhiyun struct fib6_config cfg = {
4419*4882a593Smuzhiyun .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
4420*4882a593Smuzhiyun .fc_ifindex = idev->dev->ifindex,
4421*4882a593Smuzhiyun .fc_flags = RTF_UP | RTF_NONEXTHOP,
4422*4882a593Smuzhiyun .fc_dst = *addr,
4423*4882a593Smuzhiyun .fc_dst_len = 128,
4424*4882a593Smuzhiyun .fc_protocol = RTPROT_KERNEL,
4425*4882a593Smuzhiyun .fc_nlinfo.nl_net = net,
4426*4882a593Smuzhiyun .fc_ignore_dev_down = true,
4427*4882a593Smuzhiyun };
4428*4882a593Smuzhiyun struct fib6_info *f6i;
4429*4882a593Smuzhiyun
4430*4882a593Smuzhiyun if (anycast) {
4431*4882a593Smuzhiyun cfg.fc_type = RTN_ANYCAST;
4432*4882a593Smuzhiyun cfg.fc_flags |= RTF_ANYCAST;
4433*4882a593Smuzhiyun } else {
4434*4882a593Smuzhiyun cfg.fc_type = RTN_LOCAL;
4435*4882a593Smuzhiyun cfg.fc_flags |= RTF_LOCAL;
4436*4882a593Smuzhiyun }
4437*4882a593Smuzhiyun
4438*4882a593Smuzhiyun f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
4439*4882a593Smuzhiyun if (!IS_ERR(f6i)) {
4440*4882a593Smuzhiyun f6i->dst_nocount = true;
4441*4882a593Smuzhiyun
4442*4882a593Smuzhiyun if (!anycast &&
4443*4882a593Smuzhiyun (net->ipv6.devconf_all->disable_policy ||
4444*4882a593Smuzhiyun idev->cnf.disable_policy))
4445*4882a593Smuzhiyun f6i->dst_nopolicy = true;
4446*4882a593Smuzhiyun }
4447*4882a593Smuzhiyun
4448*4882a593Smuzhiyun return f6i;
4449*4882a593Smuzhiyun }
4450*4882a593Smuzhiyun
4451*4882a593Smuzhiyun /* remove deleted ip from prefsrc entries */
4452*4882a593Smuzhiyun struct arg_dev_net_ip {
4453*4882a593Smuzhiyun struct net_device *dev;
4454*4882a593Smuzhiyun struct net *net;
4455*4882a593Smuzhiyun struct in6_addr *addr;
4456*4882a593Smuzhiyun };
4457*4882a593Smuzhiyun
fib6_remove_prefsrc(struct fib6_info * rt,void * arg)4458*4882a593Smuzhiyun static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
4459*4882a593Smuzhiyun {
4460*4882a593Smuzhiyun struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
4461*4882a593Smuzhiyun struct net *net = ((struct arg_dev_net_ip *)arg)->net;
4462*4882a593Smuzhiyun struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
4463*4882a593Smuzhiyun
4464*4882a593Smuzhiyun if (!rt->nh &&
4465*4882a593Smuzhiyun ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
4466*4882a593Smuzhiyun rt != net->ipv6.fib6_null_entry &&
4467*4882a593Smuzhiyun ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
4468*4882a593Smuzhiyun spin_lock_bh(&rt6_exception_lock);
4469*4882a593Smuzhiyun /* remove prefsrc entry */
4470*4882a593Smuzhiyun rt->fib6_prefsrc.plen = 0;
4471*4882a593Smuzhiyun spin_unlock_bh(&rt6_exception_lock);
4472*4882a593Smuzhiyun }
4473*4882a593Smuzhiyun return 0;
4474*4882a593Smuzhiyun }
4475*4882a593Smuzhiyun
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)4476*4882a593Smuzhiyun void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
4477*4882a593Smuzhiyun {
4478*4882a593Smuzhiyun struct net *net = dev_net(ifp->idev->dev);
4479*4882a593Smuzhiyun struct arg_dev_net_ip adni = {
4480*4882a593Smuzhiyun .dev = ifp->idev->dev,
4481*4882a593Smuzhiyun .net = net,
4482*4882a593Smuzhiyun .addr = &ifp->addr,
4483*4882a593Smuzhiyun };
4484*4882a593Smuzhiyun fib6_clean_all(net, fib6_remove_prefsrc, &adni);
4485*4882a593Smuzhiyun }
4486*4882a593Smuzhiyun
4487*4882a593Smuzhiyun #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
4488*4882a593Smuzhiyun
4489*4882a593Smuzhiyun /* Remove routers and update dst entries when gateway turn into host. */
fib6_clean_tohost(struct fib6_info * rt,void * arg)4490*4882a593Smuzhiyun static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
4491*4882a593Smuzhiyun {
4492*4882a593Smuzhiyun struct in6_addr *gateway = (struct in6_addr *)arg;
4493*4882a593Smuzhiyun struct fib6_nh *nh;
4494*4882a593Smuzhiyun
4495*4882a593Smuzhiyun /* RA routes do not use nexthops */
4496*4882a593Smuzhiyun if (rt->nh)
4497*4882a593Smuzhiyun return 0;
4498*4882a593Smuzhiyun
4499*4882a593Smuzhiyun nh = rt->fib6_nh;
4500*4882a593Smuzhiyun if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
4501*4882a593Smuzhiyun nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
4502*4882a593Smuzhiyun return -1;
4503*4882a593Smuzhiyun
4504*4882a593Smuzhiyun /* Further clean up cached routes in exception table.
4505*4882a593Smuzhiyun * This is needed because cached route may have a different
4506*4882a593Smuzhiyun * gateway than its 'parent' in the case of an ip redirect.
4507*4882a593Smuzhiyun */
4508*4882a593Smuzhiyun fib6_nh_exceptions_clean_tohost(nh, gateway);
4509*4882a593Smuzhiyun
4510*4882a593Smuzhiyun return 0;
4511*4882a593Smuzhiyun }
4512*4882a593Smuzhiyun
rt6_clean_tohost(struct net * net,struct in6_addr * gateway)4513*4882a593Smuzhiyun void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
4514*4882a593Smuzhiyun {
4515*4882a593Smuzhiyun fib6_clean_all(net, fib6_clean_tohost, gateway);
4516*4882a593Smuzhiyun }
4517*4882a593Smuzhiyun
4518*4882a593Smuzhiyun struct arg_netdev_event {
4519*4882a593Smuzhiyun const struct net_device *dev;
4520*4882a593Smuzhiyun union {
4521*4882a593Smuzhiyun unsigned char nh_flags;
4522*4882a593Smuzhiyun unsigned long event;
4523*4882a593Smuzhiyun };
4524*4882a593Smuzhiyun };
4525*4882a593Smuzhiyun
rt6_multipath_first_sibling(const struct fib6_info * rt)4526*4882a593Smuzhiyun static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
4527*4882a593Smuzhiyun {
4528*4882a593Smuzhiyun struct fib6_info *iter;
4529*4882a593Smuzhiyun struct fib6_node *fn;
4530*4882a593Smuzhiyun
4531*4882a593Smuzhiyun fn = rcu_dereference_protected(rt->fib6_node,
4532*4882a593Smuzhiyun lockdep_is_held(&rt->fib6_table->tb6_lock));
4533*4882a593Smuzhiyun iter = rcu_dereference_protected(fn->leaf,
4534*4882a593Smuzhiyun lockdep_is_held(&rt->fib6_table->tb6_lock));
4535*4882a593Smuzhiyun while (iter) {
4536*4882a593Smuzhiyun if (iter->fib6_metric == rt->fib6_metric &&
4537*4882a593Smuzhiyun rt6_qualify_for_ecmp(iter))
4538*4882a593Smuzhiyun return iter;
4539*4882a593Smuzhiyun iter = rcu_dereference_protected(iter->fib6_next,
4540*4882a593Smuzhiyun lockdep_is_held(&rt->fib6_table->tb6_lock));
4541*4882a593Smuzhiyun }
4542*4882a593Smuzhiyun
4543*4882a593Smuzhiyun return NULL;
4544*4882a593Smuzhiyun }
4545*4882a593Smuzhiyun
4546*4882a593Smuzhiyun /* only called for fib entries with builtin fib6_nh */
rt6_is_dead(const struct fib6_info * rt)4547*4882a593Smuzhiyun static bool rt6_is_dead(const struct fib6_info *rt)
4548*4882a593Smuzhiyun {
4549*4882a593Smuzhiyun if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
4550*4882a593Smuzhiyun (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
4551*4882a593Smuzhiyun ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
4552*4882a593Smuzhiyun return true;
4553*4882a593Smuzhiyun
4554*4882a593Smuzhiyun return false;
4555*4882a593Smuzhiyun }
4556*4882a593Smuzhiyun
rt6_multipath_total_weight(const struct fib6_info * rt)4557*4882a593Smuzhiyun static int rt6_multipath_total_weight(const struct fib6_info *rt)
4558*4882a593Smuzhiyun {
4559*4882a593Smuzhiyun struct fib6_info *iter;
4560*4882a593Smuzhiyun int total = 0;
4561*4882a593Smuzhiyun
4562*4882a593Smuzhiyun if (!rt6_is_dead(rt))
4563*4882a593Smuzhiyun total += rt->fib6_nh->fib_nh_weight;
4564*4882a593Smuzhiyun
4565*4882a593Smuzhiyun list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
4566*4882a593Smuzhiyun if (!rt6_is_dead(iter))
4567*4882a593Smuzhiyun total += iter->fib6_nh->fib_nh_weight;
4568*4882a593Smuzhiyun }
4569*4882a593Smuzhiyun
4570*4882a593Smuzhiyun return total;
4571*4882a593Smuzhiyun }
4572*4882a593Smuzhiyun
rt6_upper_bound_set(struct fib6_info * rt,int * weight,int total)4573*4882a593Smuzhiyun static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
4574*4882a593Smuzhiyun {
4575*4882a593Smuzhiyun int upper_bound = -1;
4576*4882a593Smuzhiyun
4577*4882a593Smuzhiyun if (!rt6_is_dead(rt)) {
4578*4882a593Smuzhiyun *weight += rt->fib6_nh->fib_nh_weight;
4579*4882a593Smuzhiyun upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
4580*4882a593Smuzhiyun total) - 1;
4581*4882a593Smuzhiyun }
4582*4882a593Smuzhiyun atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
4583*4882a593Smuzhiyun }
4584*4882a593Smuzhiyun
rt6_multipath_upper_bound_set(struct fib6_info * rt,int total)4585*4882a593Smuzhiyun static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
4586*4882a593Smuzhiyun {
4587*4882a593Smuzhiyun struct fib6_info *iter;
4588*4882a593Smuzhiyun int weight = 0;
4589*4882a593Smuzhiyun
4590*4882a593Smuzhiyun rt6_upper_bound_set(rt, &weight, total);
4591*4882a593Smuzhiyun
4592*4882a593Smuzhiyun list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4593*4882a593Smuzhiyun rt6_upper_bound_set(iter, &weight, total);
4594*4882a593Smuzhiyun }
4595*4882a593Smuzhiyun
rt6_multipath_rebalance(struct fib6_info * rt)4596*4882a593Smuzhiyun void rt6_multipath_rebalance(struct fib6_info *rt)
4597*4882a593Smuzhiyun {
4598*4882a593Smuzhiyun struct fib6_info *first;
4599*4882a593Smuzhiyun int total;
4600*4882a593Smuzhiyun
4601*4882a593Smuzhiyun /* In case the entire multipath route was marked for flushing,
4602*4882a593Smuzhiyun * then there is no need to rebalance upon the removal of every
4603*4882a593Smuzhiyun * sibling route.
4604*4882a593Smuzhiyun */
4605*4882a593Smuzhiyun if (!rt->fib6_nsiblings || rt->should_flush)
4606*4882a593Smuzhiyun return;
4607*4882a593Smuzhiyun
4608*4882a593Smuzhiyun /* During lookup routes are evaluated in order, so we need to
4609*4882a593Smuzhiyun * make sure upper bounds are assigned from the first sibling
4610*4882a593Smuzhiyun * onwards.
4611*4882a593Smuzhiyun */
4612*4882a593Smuzhiyun first = rt6_multipath_first_sibling(rt);
4613*4882a593Smuzhiyun if (WARN_ON_ONCE(!first))
4614*4882a593Smuzhiyun return;
4615*4882a593Smuzhiyun
4616*4882a593Smuzhiyun total = rt6_multipath_total_weight(first);
4617*4882a593Smuzhiyun rt6_multipath_upper_bound_set(first, total);
4618*4882a593Smuzhiyun }
4619*4882a593Smuzhiyun
fib6_ifup(struct fib6_info * rt,void * p_arg)4620*4882a593Smuzhiyun static int fib6_ifup(struct fib6_info *rt, void *p_arg)
4621*4882a593Smuzhiyun {
4622*4882a593Smuzhiyun const struct arg_netdev_event *arg = p_arg;
4623*4882a593Smuzhiyun struct net *net = dev_net(arg->dev);
4624*4882a593Smuzhiyun
4625*4882a593Smuzhiyun if (rt != net->ipv6.fib6_null_entry && !rt->nh &&
4626*4882a593Smuzhiyun rt->fib6_nh->fib_nh_dev == arg->dev) {
4627*4882a593Smuzhiyun rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
4628*4882a593Smuzhiyun fib6_update_sernum_upto_root(net, rt);
4629*4882a593Smuzhiyun rt6_multipath_rebalance(rt);
4630*4882a593Smuzhiyun }
4631*4882a593Smuzhiyun
4632*4882a593Smuzhiyun return 0;
4633*4882a593Smuzhiyun }
4634*4882a593Smuzhiyun
rt6_sync_up(struct net_device * dev,unsigned char nh_flags)4635*4882a593Smuzhiyun void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
4636*4882a593Smuzhiyun {
4637*4882a593Smuzhiyun struct arg_netdev_event arg = {
4638*4882a593Smuzhiyun .dev = dev,
4639*4882a593Smuzhiyun {
4640*4882a593Smuzhiyun .nh_flags = nh_flags,
4641*4882a593Smuzhiyun },
4642*4882a593Smuzhiyun };
4643*4882a593Smuzhiyun
4644*4882a593Smuzhiyun if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4645*4882a593Smuzhiyun arg.nh_flags |= RTNH_F_LINKDOWN;
4646*4882a593Smuzhiyun
4647*4882a593Smuzhiyun fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4648*4882a593Smuzhiyun }
4649*4882a593Smuzhiyun
4650*4882a593Smuzhiyun /* only called for fib entries with inline fib6_nh */
rt6_multipath_uses_dev(const struct fib6_info * rt,const struct net_device * dev)4651*4882a593Smuzhiyun static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4652*4882a593Smuzhiyun const struct net_device *dev)
4653*4882a593Smuzhiyun {
4654*4882a593Smuzhiyun struct fib6_info *iter;
4655*4882a593Smuzhiyun
4656*4882a593Smuzhiyun if (rt->fib6_nh->fib_nh_dev == dev)
4657*4882a593Smuzhiyun return true;
4658*4882a593Smuzhiyun list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4659*4882a593Smuzhiyun if (iter->fib6_nh->fib_nh_dev == dev)
4660*4882a593Smuzhiyun return true;
4661*4882a593Smuzhiyun
4662*4882a593Smuzhiyun return false;
4663*4882a593Smuzhiyun }
4664*4882a593Smuzhiyun
rt6_multipath_flush(struct fib6_info * rt)4665*4882a593Smuzhiyun static void rt6_multipath_flush(struct fib6_info *rt)
4666*4882a593Smuzhiyun {
4667*4882a593Smuzhiyun struct fib6_info *iter;
4668*4882a593Smuzhiyun
4669*4882a593Smuzhiyun rt->should_flush = 1;
4670*4882a593Smuzhiyun list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4671*4882a593Smuzhiyun iter->should_flush = 1;
4672*4882a593Smuzhiyun }
4673*4882a593Smuzhiyun
rt6_multipath_dead_count(const struct fib6_info * rt,const struct net_device * down_dev)4674*4882a593Smuzhiyun static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4675*4882a593Smuzhiyun const struct net_device *down_dev)
4676*4882a593Smuzhiyun {
4677*4882a593Smuzhiyun struct fib6_info *iter;
4678*4882a593Smuzhiyun unsigned int dead = 0;
4679*4882a593Smuzhiyun
4680*4882a593Smuzhiyun if (rt->fib6_nh->fib_nh_dev == down_dev ||
4681*4882a593Smuzhiyun rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4682*4882a593Smuzhiyun dead++;
4683*4882a593Smuzhiyun list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4684*4882a593Smuzhiyun if (iter->fib6_nh->fib_nh_dev == down_dev ||
4685*4882a593Smuzhiyun iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4686*4882a593Smuzhiyun dead++;
4687*4882a593Smuzhiyun
4688*4882a593Smuzhiyun return dead;
4689*4882a593Smuzhiyun }
4690*4882a593Smuzhiyun
rt6_multipath_nh_flags_set(struct fib6_info * rt,const struct net_device * dev,unsigned char nh_flags)4691*4882a593Smuzhiyun static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4692*4882a593Smuzhiyun const struct net_device *dev,
4693*4882a593Smuzhiyun unsigned char nh_flags)
4694*4882a593Smuzhiyun {
4695*4882a593Smuzhiyun struct fib6_info *iter;
4696*4882a593Smuzhiyun
4697*4882a593Smuzhiyun if (rt->fib6_nh->fib_nh_dev == dev)
4698*4882a593Smuzhiyun rt->fib6_nh->fib_nh_flags |= nh_flags;
4699*4882a593Smuzhiyun list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4700*4882a593Smuzhiyun if (iter->fib6_nh->fib_nh_dev == dev)
4701*4882a593Smuzhiyun iter->fib6_nh->fib_nh_flags |= nh_flags;
4702*4882a593Smuzhiyun }
4703*4882a593Smuzhiyun
4704*4882a593Smuzhiyun /* called with write lock held for table with rt */
fib6_ifdown(struct fib6_info * rt,void * p_arg)4705*4882a593Smuzhiyun static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4706*4882a593Smuzhiyun {
4707*4882a593Smuzhiyun const struct arg_netdev_event *arg = p_arg;
4708*4882a593Smuzhiyun const struct net_device *dev = arg->dev;
4709*4882a593Smuzhiyun struct net *net = dev_net(dev);
4710*4882a593Smuzhiyun
4711*4882a593Smuzhiyun if (rt == net->ipv6.fib6_null_entry || rt->nh)
4712*4882a593Smuzhiyun return 0;
4713*4882a593Smuzhiyun
4714*4882a593Smuzhiyun switch (arg->event) {
4715*4882a593Smuzhiyun case NETDEV_UNREGISTER:
4716*4882a593Smuzhiyun return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4717*4882a593Smuzhiyun case NETDEV_DOWN:
4718*4882a593Smuzhiyun if (rt->should_flush)
4719*4882a593Smuzhiyun return -1;
4720*4882a593Smuzhiyun if (!rt->fib6_nsiblings)
4721*4882a593Smuzhiyun return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
4722*4882a593Smuzhiyun if (rt6_multipath_uses_dev(rt, dev)) {
4723*4882a593Smuzhiyun unsigned int count;
4724*4882a593Smuzhiyun
4725*4882a593Smuzhiyun count = rt6_multipath_dead_count(rt, dev);
4726*4882a593Smuzhiyun if (rt->fib6_nsiblings + 1 == count) {
4727*4882a593Smuzhiyun rt6_multipath_flush(rt);
4728*4882a593Smuzhiyun return -1;
4729*4882a593Smuzhiyun }
4730*4882a593Smuzhiyun rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4731*4882a593Smuzhiyun RTNH_F_LINKDOWN);
4732*4882a593Smuzhiyun fib6_update_sernum(net, rt);
4733*4882a593Smuzhiyun rt6_multipath_rebalance(rt);
4734*4882a593Smuzhiyun }
4735*4882a593Smuzhiyun return -2;
4736*4882a593Smuzhiyun case NETDEV_CHANGE:
4737*4882a593Smuzhiyun if (rt->fib6_nh->fib_nh_dev != dev ||
4738*4882a593Smuzhiyun rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4739*4882a593Smuzhiyun break;
4740*4882a593Smuzhiyun rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
4741*4882a593Smuzhiyun rt6_multipath_rebalance(rt);
4742*4882a593Smuzhiyun break;
4743*4882a593Smuzhiyun }
4744*4882a593Smuzhiyun
4745*4882a593Smuzhiyun return 0;
4746*4882a593Smuzhiyun }
4747*4882a593Smuzhiyun
rt6_sync_down_dev(struct net_device * dev,unsigned long event)4748*4882a593Smuzhiyun void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4749*4882a593Smuzhiyun {
4750*4882a593Smuzhiyun struct arg_netdev_event arg = {
4751*4882a593Smuzhiyun .dev = dev,
4752*4882a593Smuzhiyun {
4753*4882a593Smuzhiyun .event = event,
4754*4882a593Smuzhiyun },
4755*4882a593Smuzhiyun };
4756*4882a593Smuzhiyun struct net *net = dev_net(dev);
4757*4882a593Smuzhiyun
4758*4882a593Smuzhiyun if (net->ipv6.sysctl.skip_notify_on_dev_down)
4759*4882a593Smuzhiyun fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4760*4882a593Smuzhiyun else
4761*4882a593Smuzhiyun fib6_clean_all(net, fib6_ifdown, &arg);
4762*4882a593Smuzhiyun }
4763*4882a593Smuzhiyun
rt6_disable_ip(struct net_device * dev,unsigned long event)4764*4882a593Smuzhiyun void rt6_disable_ip(struct net_device *dev, unsigned long event)
4765*4882a593Smuzhiyun {
4766*4882a593Smuzhiyun rt6_sync_down_dev(dev, event);
4767*4882a593Smuzhiyun rt6_uncached_list_flush_dev(dev_net(dev), dev);
4768*4882a593Smuzhiyun neigh_ifdown(&nd_tbl, dev);
4769*4882a593Smuzhiyun }
4770*4882a593Smuzhiyun
4771*4882a593Smuzhiyun struct rt6_mtu_change_arg {
4772*4882a593Smuzhiyun struct net_device *dev;
4773*4882a593Smuzhiyun unsigned int mtu;
4774*4882a593Smuzhiyun struct fib6_info *f6i;
4775*4882a593Smuzhiyun };
4776*4882a593Smuzhiyun
fib6_nh_mtu_change(struct fib6_nh * nh,void * _arg)4777*4882a593Smuzhiyun static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
4778*4882a593Smuzhiyun {
4779*4882a593Smuzhiyun struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
4780*4882a593Smuzhiyun struct fib6_info *f6i = arg->f6i;
4781*4882a593Smuzhiyun
4782*4882a593Smuzhiyun /* For administrative MTU increase, there is no way to discover
4783*4882a593Smuzhiyun * IPv6 PMTU increase, so PMTU increase should be updated here.
4784*4882a593Smuzhiyun * Since RFC 1981 doesn't include administrative MTU increase
4785*4882a593Smuzhiyun * update PMTU increase is a MUST. (i.e. jumbo frame)
4786*4882a593Smuzhiyun */
4787*4882a593Smuzhiyun if (nh->fib_nh_dev == arg->dev) {
4788*4882a593Smuzhiyun struct inet6_dev *idev = __in6_dev_get(arg->dev);
4789*4882a593Smuzhiyun u32 mtu = f6i->fib6_pmtu;
4790*4882a593Smuzhiyun
4791*4882a593Smuzhiyun if (mtu >= arg->mtu ||
4792*4882a593Smuzhiyun (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4793*4882a593Smuzhiyun fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
4794*4882a593Smuzhiyun
4795*4882a593Smuzhiyun spin_lock_bh(&rt6_exception_lock);
4796*4882a593Smuzhiyun rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
4797*4882a593Smuzhiyun spin_unlock_bh(&rt6_exception_lock);
4798*4882a593Smuzhiyun }
4799*4882a593Smuzhiyun
4800*4882a593Smuzhiyun return 0;
4801*4882a593Smuzhiyun }
4802*4882a593Smuzhiyun
rt6_mtu_change_route(struct fib6_info * f6i,void * p_arg)4803*4882a593Smuzhiyun static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
4804*4882a593Smuzhiyun {
4805*4882a593Smuzhiyun struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4806*4882a593Smuzhiyun struct inet6_dev *idev;
4807*4882a593Smuzhiyun
4808*4882a593Smuzhiyun /* In IPv6 pmtu discovery is not optional,
4809*4882a593Smuzhiyun so that RTAX_MTU lock cannot disable it.
4810*4882a593Smuzhiyun We still use this lock to block changes
4811*4882a593Smuzhiyun caused by addrconf/ndisc.
4812*4882a593Smuzhiyun */
4813*4882a593Smuzhiyun
4814*4882a593Smuzhiyun idev = __in6_dev_get(arg->dev);
4815*4882a593Smuzhiyun if (!idev)
4816*4882a593Smuzhiyun return 0;
4817*4882a593Smuzhiyun
4818*4882a593Smuzhiyun if (fib6_metric_locked(f6i, RTAX_MTU))
4819*4882a593Smuzhiyun return 0;
4820*4882a593Smuzhiyun
4821*4882a593Smuzhiyun arg->f6i = f6i;
4822*4882a593Smuzhiyun if (f6i->nh) {
4823*4882a593Smuzhiyun /* fib6_nh_mtu_change only returns 0, so this is safe */
4824*4882a593Smuzhiyun return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
4825*4882a593Smuzhiyun arg);
4826*4882a593Smuzhiyun }
4827*4882a593Smuzhiyun
4828*4882a593Smuzhiyun return fib6_nh_mtu_change(f6i->fib6_nh, arg);
4829*4882a593Smuzhiyun }
4830*4882a593Smuzhiyun
rt6_mtu_change(struct net_device * dev,unsigned int mtu)4831*4882a593Smuzhiyun void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4832*4882a593Smuzhiyun {
4833*4882a593Smuzhiyun struct rt6_mtu_change_arg arg = {
4834*4882a593Smuzhiyun .dev = dev,
4835*4882a593Smuzhiyun .mtu = mtu,
4836*4882a593Smuzhiyun };
4837*4882a593Smuzhiyun
4838*4882a593Smuzhiyun fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4839*4882a593Smuzhiyun }
4840*4882a593Smuzhiyun
4841*4882a593Smuzhiyun static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4842*4882a593Smuzhiyun [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
4843*4882a593Smuzhiyun [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
4844*4882a593Smuzhiyun [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
4845*4882a593Smuzhiyun [RTA_OIF] = { .type = NLA_U32 },
4846*4882a593Smuzhiyun [RTA_IIF] = { .type = NLA_U32 },
4847*4882a593Smuzhiyun [RTA_PRIORITY] = { .type = NLA_U32 },
4848*4882a593Smuzhiyun [RTA_METRICS] = { .type = NLA_NESTED },
4849*4882a593Smuzhiyun [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
4850*4882a593Smuzhiyun [RTA_PREF] = { .type = NLA_U8 },
4851*4882a593Smuzhiyun [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4852*4882a593Smuzhiyun [RTA_ENCAP] = { .type = NLA_NESTED },
4853*4882a593Smuzhiyun [RTA_EXPIRES] = { .type = NLA_U32 },
4854*4882a593Smuzhiyun [RTA_UID] = { .type = NLA_U32 },
4855*4882a593Smuzhiyun [RTA_MARK] = { .type = NLA_U32 },
4856*4882a593Smuzhiyun [RTA_TABLE] = { .type = NLA_U32 },
4857*4882a593Smuzhiyun [RTA_IP_PROTO] = { .type = NLA_U8 },
4858*4882a593Smuzhiyun [RTA_SPORT] = { .type = NLA_U16 },
4859*4882a593Smuzhiyun [RTA_DPORT] = { .type = NLA_U16 },
4860*4882a593Smuzhiyun [RTA_NH_ID] = { .type = NLA_U32 },
4861*4882a593Smuzhiyun };
4862*4882a593Smuzhiyun
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg,struct netlink_ext_ack * extack)4863*4882a593Smuzhiyun static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4864*4882a593Smuzhiyun struct fib6_config *cfg,
4865*4882a593Smuzhiyun struct netlink_ext_ack *extack)
4866*4882a593Smuzhiyun {
4867*4882a593Smuzhiyun struct rtmsg *rtm;
4868*4882a593Smuzhiyun struct nlattr *tb[RTA_MAX+1];
4869*4882a593Smuzhiyun unsigned int pref;
4870*4882a593Smuzhiyun int err;
4871*4882a593Smuzhiyun
4872*4882a593Smuzhiyun err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4873*4882a593Smuzhiyun rtm_ipv6_policy, extack);
4874*4882a593Smuzhiyun if (err < 0)
4875*4882a593Smuzhiyun goto errout;
4876*4882a593Smuzhiyun
4877*4882a593Smuzhiyun err = -EINVAL;
4878*4882a593Smuzhiyun rtm = nlmsg_data(nlh);
4879*4882a593Smuzhiyun
4880*4882a593Smuzhiyun *cfg = (struct fib6_config){
4881*4882a593Smuzhiyun .fc_table = rtm->rtm_table,
4882*4882a593Smuzhiyun .fc_dst_len = rtm->rtm_dst_len,
4883*4882a593Smuzhiyun .fc_src_len = rtm->rtm_src_len,
4884*4882a593Smuzhiyun .fc_flags = RTF_UP,
4885*4882a593Smuzhiyun .fc_protocol = rtm->rtm_protocol,
4886*4882a593Smuzhiyun .fc_type = rtm->rtm_type,
4887*4882a593Smuzhiyun
4888*4882a593Smuzhiyun .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4889*4882a593Smuzhiyun .fc_nlinfo.nlh = nlh,
4890*4882a593Smuzhiyun .fc_nlinfo.nl_net = sock_net(skb->sk),
4891*4882a593Smuzhiyun };
4892*4882a593Smuzhiyun
4893*4882a593Smuzhiyun if (rtm->rtm_type == RTN_UNREACHABLE ||
4894*4882a593Smuzhiyun rtm->rtm_type == RTN_BLACKHOLE ||
4895*4882a593Smuzhiyun rtm->rtm_type == RTN_PROHIBIT ||
4896*4882a593Smuzhiyun rtm->rtm_type == RTN_THROW)
4897*4882a593Smuzhiyun cfg->fc_flags |= RTF_REJECT;
4898*4882a593Smuzhiyun
4899*4882a593Smuzhiyun if (rtm->rtm_type == RTN_LOCAL)
4900*4882a593Smuzhiyun cfg->fc_flags |= RTF_LOCAL;
4901*4882a593Smuzhiyun
4902*4882a593Smuzhiyun if (rtm->rtm_flags & RTM_F_CLONED)
4903*4882a593Smuzhiyun cfg->fc_flags |= RTF_CACHE;
4904*4882a593Smuzhiyun
4905*4882a593Smuzhiyun cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4906*4882a593Smuzhiyun
4907*4882a593Smuzhiyun if (tb[RTA_NH_ID]) {
4908*4882a593Smuzhiyun if (tb[RTA_GATEWAY] || tb[RTA_OIF] ||
4909*4882a593Smuzhiyun tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
4910*4882a593Smuzhiyun NL_SET_ERR_MSG(extack,
4911*4882a593Smuzhiyun "Nexthop specification and nexthop id are mutually exclusive");
4912*4882a593Smuzhiyun goto errout;
4913*4882a593Smuzhiyun }
4914*4882a593Smuzhiyun cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
4915*4882a593Smuzhiyun }
4916*4882a593Smuzhiyun
4917*4882a593Smuzhiyun if (tb[RTA_GATEWAY]) {
4918*4882a593Smuzhiyun cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4919*4882a593Smuzhiyun cfg->fc_flags |= RTF_GATEWAY;
4920*4882a593Smuzhiyun }
4921*4882a593Smuzhiyun if (tb[RTA_VIA]) {
4922*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4923*4882a593Smuzhiyun goto errout;
4924*4882a593Smuzhiyun }
4925*4882a593Smuzhiyun
4926*4882a593Smuzhiyun if (tb[RTA_DST]) {
4927*4882a593Smuzhiyun int plen = (rtm->rtm_dst_len + 7) >> 3;
4928*4882a593Smuzhiyun
4929*4882a593Smuzhiyun if (nla_len(tb[RTA_DST]) < plen)
4930*4882a593Smuzhiyun goto errout;
4931*4882a593Smuzhiyun
4932*4882a593Smuzhiyun nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4933*4882a593Smuzhiyun }
4934*4882a593Smuzhiyun
4935*4882a593Smuzhiyun if (tb[RTA_SRC]) {
4936*4882a593Smuzhiyun int plen = (rtm->rtm_src_len + 7) >> 3;
4937*4882a593Smuzhiyun
4938*4882a593Smuzhiyun if (nla_len(tb[RTA_SRC]) < plen)
4939*4882a593Smuzhiyun goto errout;
4940*4882a593Smuzhiyun
4941*4882a593Smuzhiyun nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4942*4882a593Smuzhiyun }
4943*4882a593Smuzhiyun
4944*4882a593Smuzhiyun if (tb[RTA_PREFSRC])
4945*4882a593Smuzhiyun cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4946*4882a593Smuzhiyun
4947*4882a593Smuzhiyun if (tb[RTA_OIF])
4948*4882a593Smuzhiyun cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4949*4882a593Smuzhiyun
4950*4882a593Smuzhiyun if (tb[RTA_PRIORITY])
4951*4882a593Smuzhiyun cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4952*4882a593Smuzhiyun
4953*4882a593Smuzhiyun if (tb[RTA_METRICS]) {
4954*4882a593Smuzhiyun cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4955*4882a593Smuzhiyun cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4956*4882a593Smuzhiyun }
4957*4882a593Smuzhiyun
4958*4882a593Smuzhiyun if (tb[RTA_TABLE])
4959*4882a593Smuzhiyun cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4960*4882a593Smuzhiyun
4961*4882a593Smuzhiyun if (tb[RTA_MULTIPATH]) {
4962*4882a593Smuzhiyun cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4963*4882a593Smuzhiyun cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4964*4882a593Smuzhiyun
4965*4882a593Smuzhiyun err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4966*4882a593Smuzhiyun cfg->fc_mp_len, extack);
4967*4882a593Smuzhiyun if (err < 0)
4968*4882a593Smuzhiyun goto errout;
4969*4882a593Smuzhiyun }
4970*4882a593Smuzhiyun
4971*4882a593Smuzhiyun if (tb[RTA_PREF]) {
4972*4882a593Smuzhiyun pref = nla_get_u8(tb[RTA_PREF]);
4973*4882a593Smuzhiyun if (pref != ICMPV6_ROUTER_PREF_LOW &&
4974*4882a593Smuzhiyun pref != ICMPV6_ROUTER_PREF_HIGH)
4975*4882a593Smuzhiyun pref = ICMPV6_ROUTER_PREF_MEDIUM;
4976*4882a593Smuzhiyun cfg->fc_flags |= RTF_PREF(pref);
4977*4882a593Smuzhiyun }
4978*4882a593Smuzhiyun
4979*4882a593Smuzhiyun if (tb[RTA_ENCAP])
4980*4882a593Smuzhiyun cfg->fc_encap = tb[RTA_ENCAP];
4981*4882a593Smuzhiyun
4982*4882a593Smuzhiyun if (tb[RTA_ENCAP_TYPE]) {
4983*4882a593Smuzhiyun cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4984*4882a593Smuzhiyun
4985*4882a593Smuzhiyun err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4986*4882a593Smuzhiyun if (err < 0)
4987*4882a593Smuzhiyun goto errout;
4988*4882a593Smuzhiyun }
4989*4882a593Smuzhiyun
4990*4882a593Smuzhiyun if (tb[RTA_EXPIRES]) {
4991*4882a593Smuzhiyun unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4992*4882a593Smuzhiyun
4993*4882a593Smuzhiyun if (addrconf_finite_timeout(timeout)) {
4994*4882a593Smuzhiyun cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4995*4882a593Smuzhiyun cfg->fc_flags |= RTF_EXPIRES;
4996*4882a593Smuzhiyun }
4997*4882a593Smuzhiyun }
4998*4882a593Smuzhiyun
4999*4882a593Smuzhiyun err = 0;
5000*4882a593Smuzhiyun errout:
5001*4882a593Smuzhiyun return err;
5002*4882a593Smuzhiyun }
5003*4882a593Smuzhiyun
5004*4882a593Smuzhiyun struct rt6_nh {
5005*4882a593Smuzhiyun struct fib6_info *fib6_info;
5006*4882a593Smuzhiyun struct fib6_config r_cfg;
5007*4882a593Smuzhiyun struct list_head next;
5008*4882a593Smuzhiyun };
5009*4882a593Smuzhiyun
ip6_route_info_append(struct net * net,struct list_head * rt6_nh_list,struct fib6_info * rt,struct fib6_config * r_cfg)5010*4882a593Smuzhiyun static int ip6_route_info_append(struct net *net,
5011*4882a593Smuzhiyun struct list_head *rt6_nh_list,
5012*4882a593Smuzhiyun struct fib6_info *rt,
5013*4882a593Smuzhiyun struct fib6_config *r_cfg)
5014*4882a593Smuzhiyun {
5015*4882a593Smuzhiyun struct rt6_nh *nh;
5016*4882a593Smuzhiyun int err = -EEXIST;
5017*4882a593Smuzhiyun
5018*4882a593Smuzhiyun list_for_each_entry(nh, rt6_nh_list, next) {
5019*4882a593Smuzhiyun /* check if fib6_info already exists */
5020*4882a593Smuzhiyun if (rt6_duplicate_nexthop(nh->fib6_info, rt))
5021*4882a593Smuzhiyun return err;
5022*4882a593Smuzhiyun }
5023*4882a593Smuzhiyun
5024*4882a593Smuzhiyun nh = kzalloc(sizeof(*nh), GFP_KERNEL);
5025*4882a593Smuzhiyun if (!nh)
5026*4882a593Smuzhiyun return -ENOMEM;
5027*4882a593Smuzhiyun nh->fib6_info = rt;
5028*4882a593Smuzhiyun memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
5029*4882a593Smuzhiyun list_add_tail(&nh->next, rt6_nh_list);
5030*4882a593Smuzhiyun
5031*4882a593Smuzhiyun return 0;
5032*4882a593Smuzhiyun }
5033*4882a593Smuzhiyun
ip6_route_mpath_notify(struct fib6_info * rt,struct fib6_info * rt_last,struct nl_info * info,__u16 nlflags)5034*4882a593Smuzhiyun static void ip6_route_mpath_notify(struct fib6_info *rt,
5035*4882a593Smuzhiyun struct fib6_info *rt_last,
5036*4882a593Smuzhiyun struct nl_info *info,
5037*4882a593Smuzhiyun __u16 nlflags)
5038*4882a593Smuzhiyun {
5039*4882a593Smuzhiyun /* if this is an APPEND route, then rt points to the first route
5040*4882a593Smuzhiyun * inserted and rt_last points to last route inserted. Userspace
5041*4882a593Smuzhiyun * wants a consistent dump of the route which starts at the first
5042*4882a593Smuzhiyun * nexthop. Since sibling routes are always added at the end of
5043*4882a593Smuzhiyun * the list, find the first sibling of the last route appended
5044*4882a593Smuzhiyun */
5045*4882a593Smuzhiyun if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
5046*4882a593Smuzhiyun rt = list_first_entry(&rt_last->fib6_siblings,
5047*4882a593Smuzhiyun struct fib6_info,
5048*4882a593Smuzhiyun fib6_siblings);
5049*4882a593Smuzhiyun }
5050*4882a593Smuzhiyun
5051*4882a593Smuzhiyun if (rt)
5052*4882a593Smuzhiyun inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
5053*4882a593Smuzhiyun }
5054*4882a593Smuzhiyun
ip6_route_mpath_should_notify(const struct fib6_info * rt)5055*4882a593Smuzhiyun static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
5056*4882a593Smuzhiyun {
5057*4882a593Smuzhiyun bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
5058*4882a593Smuzhiyun bool should_notify = false;
5059*4882a593Smuzhiyun struct fib6_info *leaf;
5060*4882a593Smuzhiyun struct fib6_node *fn;
5061*4882a593Smuzhiyun
5062*4882a593Smuzhiyun rcu_read_lock();
5063*4882a593Smuzhiyun fn = rcu_dereference(rt->fib6_node);
5064*4882a593Smuzhiyun if (!fn)
5065*4882a593Smuzhiyun goto out;
5066*4882a593Smuzhiyun
5067*4882a593Smuzhiyun leaf = rcu_dereference(fn->leaf);
5068*4882a593Smuzhiyun if (!leaf)
5069*4882a593Smuzhiyun goto out;
5070*4882a593Smuzhiyun
5071*4882a593Smuzhiyun if (rt == leaf ||
5072*4882a593Smuzhiyun (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
5073*4882a593Smuzhiyun rt6_qualify_for_ecmp(leaf)))
5074*4882a593Smuzhiyun should_notify = true;
5075*4882a593Smuzhiyun out:
5076*4882a593Smuzhiyun rcu_read_unlock();
5077*4882a593Smuzhiyun
5078*4882a593Smuzhiyun return should_notify;
5079*4882a593Smuzhiyun }
5080*4882a593Smuzhiyun
fib6_gw_from_attr(struct in6_addr * gw,struct nlattr * nla,struct netlink_ext_ack * extack)5081*4882a593Smuzhiyun static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
5082*4882a593Smuzhiyun struct netlink_ext_ack *extack)
5083*4882a593Smuzhiyun {
5084*4882a593Smuzhiyun if (nla_len(nla) < sizeof(*gw)) {
5085*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
5086*4882a593Smuzhiyun return -EINVAL;
5087*4882a593Smuzhiyun }
5088*4882a593Smuzhiyun
5089*4882a593Smuzhiyun *gw = nla_get_in6_addr(nla);
5090*4882a593Smuzhiyun
5091*4882a593Smuzhiyun return 0;
5092*4882a593Smuzhiyun }
5093*4882a593Smuzhiyun
ip6_route_multipath_add(struct fib6_config * cfg,struct netlink_ext_ack * extack)5094*4882a593Smuzhiyun static int ip6_route_multipath_add(struct fib6_config *cfg,
5095*4882a593Smuzhiyun struct netlink_ext_ack *extack)
5096*4882a593Smuzhiyun {
5097*4882a593Smuzhiyun struct fib6_info *rt_notif = NULL, *rt_last = NULL;
5098*4882a593Smuzhiyun struct nl_info *info = &cfg->fc_nlinfo;
5099*4882a593Smuzhiyun struct fib6_config r_cfg;
5100*4882a593Smuzhiyun struct rtnexthop *rtnh;
5101*4882a593Smuzhiyun struct fib6_info *rt;
5102*4882a593Smuzhiyun struct rt6_nh *err_nh;
5103*4882a593Smuzhiyun struct rt6_nh *nh, *nh_safe;
5104*4882a593Smuzhiyun __u16 nlflags;
5105*4882a593Smuzhiyun int remaining;
5106*4882a593Smuzhiyun int attrlen;
5107*4882a593Smuzhiyun int err = 1;
5108*4882a593Smuzhiyun int nhn = 0;
5109*4882a593Smuzhiyun int replace = (cfg->fc_nlinfo.nlh &&
5110*4882a593Smuzhiyun (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
5111*4882a593Smuzhiyun LIST_HEAD(rt6_nh_list);
5112*4882a593Smuzhiyun
5113*4882a593Smuzhiyun nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
5114*4882a593Smuzhiyun if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
5115*4882a593Smuzhiyun nlflags |= NLM_F_APPEND;
5116*4882a593Smuzhiyun
5117*4882a593Smuzhiyun remaining = cfg->fc_mp_len;
5118*4882a593Smuzhiyun rtnh = (struct rtnexthop *)cfg->fc_mp;
5119*4882a593Smuzhiyun
5120*4882a593Smuzhiyun /* Parse a Multipath Entry and build a list (rt6_nh_list) of
5121*4882a593Smuzhiyun * fib6_info structs per nexthop
5122*4882a593Smuzhiyun */
5123*4882a593Smuzhiyun while (rtnh_ok(rtnh, remaining)) {
5124*4882a593Smuzhiyun memcpy(&r_cfg, cfg, sizeof(*cfg));
5125*4882a593Smuzhiyun if (rtnh->rtnh_ifindex)
5126*4882a593Smuzhiyun r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5127*4882a593Smuzhiyun
5128*4882a593Smuzhiyun attrlen = rtnh_attrlen(rtnh);
5129*4882a593Smuzhiyun if (attrlen > 0) {
5130*4882a593Smuzhiyun struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5131*4882a593Smuzhiyun
5132*4882a593Smuzhiyun nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5133*4882a593Smuzhiyun if (nla) {
5134*4882a593Smuzhiyun err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5135*4882a593Smuzhiyun extack);
5136*4882a593Smuzhiyun if (err)
5137*4882a593Smuzhiyun goto cleanup;
5138*4882a593Smuzhiyun
5139*4882a593Smuzhiyun r_cfg.fc_flags |= RTF_GATEWAY;
5140*4882a593Smuzhiyun }
5141*4882a593Smuzhiyun r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
5142*4882a593Smuzhiyun
5143*4882a593Smuzhiyun /* RTA_ENCAP_TYPE length checked in
5144*4882a593Smuzhiyun * lwtunnel_valid_encap_type_attr
5145*4882a593Smuzhiyun */
5146*4882a593Smuzhiyun nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
5147*4882a593Smuzhiyun if (nla)
5148*4882a593Smuzhiyun r_cfg.fc_encap_type = nla_get_u16(nla);
5149*4882a593Smuzhiyun }
5150*4882a593Smuzhiyun
5151*4882a593Smuzhiyun r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
5152*4882a593Smuzhiyun rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
5153*4882a593Smuzhiyun if (IS_ERR(rt)) {
5154*4882a593Smuzhiyun err = PTR_ERR(rt);
5155*4882a593Smuzhiyun rt = NULL;
5156*4882a593Smuzhiyun goto cleanup;
5157*4882a593Smuzhiyun }
5158*4882a593Smuzhiyun if (!rt6_qualify_for_ecmp(rt)) {
5159*4882a593Smuzhiyun err = -EINVAL;
5160*4882a593Smuzhiyun NL_SET_ERR_MSG(extack,
5161*4882a593Smuzhiyun "Device only routes can not be added for IPv6 using the multipath API.");
5162*4882a593Smuzhiyun fib6_info_release(rt);
5163*4882a593Smuzhiyun goto cleanup;
5164*4882a593Smuzhiyun }
5165*4882a593Smuzhiyun
5166*4882a593Smuzhiyun rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
5167*4882a593Smuzhiyun
5168*4882a593Smuzhiyun err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
5169*4882a593Smuzhiyun rt, &r_cfg);
5170*4882a593Smuzhiyun if (err) {
5171*4882a593Smuzhiyun fib6_info_release(rt);
5172*4882a593Smuzhiyun goto cleanup;
5173*4882a593Smuzhiyun }
5174*4882a593Smuzhiyun
5175*4882a593Smuzhiyun rtnh = rtnh_next(rtnh, &remaining);
5176*4882a593Smuzhiyun }
5177*4882a593Smuzhiyun
5178*4882a593Smuzhiyun if (list_empty(&rt6_nh_list)) {
5179*4882a593Smuzhiyun NL_SET_ERR_MSG(extack,
5180*4882a593Smuzhiyun "Invalid nexthop configuration - no valid nexthops");
5181*4882a593Smuzhiyun return -EINVAL;
5182*4882a593Smuzhiyun }
5183*4882a593Smuzhiyun
5184*4882a593Smuzhiyun /* for add and replace send one notification with all nexthops.
5185*4882a593Smuzhiyun * Skip the notification in fib6_add_rt2node and send one with
5186*4882a593Smuzhiyun * the full route when done
5187*4882a593Smuzhiyun */
5188*4882a593Smuzhiyun info->skip_notify = 1;
5189*4882a593Smuzhiyun
5190*4882a593Smuzhiyun /* For add and replace, send one notification with all nexthops. For
5191*4882a593Smuzhiyun * append, send one notification with all appended nexthops.
5192*4882a593Smuzhiyun */
5193*4882a593Smuzhiyun info->skip_notify_kernel = 1;
5194*4882a593Smuzhiyun
5195*4882a593Smuzhiyun err_nh = NULL;
5196*4882a593Smuzhiyun list_for_each_entry(nh, &rt6_nh_list, next) {
5197*4882a593Smuzhiyun err = __ip6_ins_rt(nh->fib6_info, info, extack);
5198*4882a593Smuzhiyun fib6_info_release(nh->fib6_info);
5199*4882a593Smuzhiyun
5200*4882a593Smuzhiyun if (!err) {
5201*4882a593Smuzhiyun /* save reference to last route successfully inserted */
5202*4882a593Smuzhiyun rt_last = nh->fib6_info;
5203*4882a593Smuzhiyun
5204*4882a593Smuzhiyun /* save reference to first route for notification */
5205*4882a593Smuzhiyun if (!rt_notif)
5206*4882a593Smuzhiyun rt_notif = nh->fib6_info;
5207*4882a593Smuzhiyun }
5208*4882a593Smuzhiyun
5209*4882a593Smuzhiyun /* nh->fib6_info is used or freed at this point, reset to NULL*/
5210*4882a593Smuzhiyun nh->fib6_info = NULL;
5211*4882a593Smuzhiyun if (err) {
5212*4882a593Smuzhiyun if (replace && nhn)
5213*4882a593Smuzhiyun NL_SET_ERR_MSG_MOD(extack,
5214*4882a593Smuzhiyun "multipath route replace failed (check consistency of installed routes)");
5215*4882a593Smuzhiyun err_nh = nh;
5216*4882a593Smuzhiyun goto add_errout;
5217*4882a593Smuzhiyun }
5218*4882a593Smuzhiyun
5219*4882a593Smuzhiyun /* Because each route is added like a single route we remove
5220*4882a593Smuzhiyun * these flags after the first nexthop: if there is a collision,
5221*4882a593Smuzhiyun * we have already failed to add the first nexthop:
5222*4882a593Smuzhiyun * fib6_add_rt2node() has rejected it; when replacing, old
5223*4882a593Smuzhiyun * nexthops have been replaced by first new, the rest should
5224*4882a593Smuzhiyun * be added to it.
5225*4882a593Smuzhiyun */
5226*4882a593Smuzhiyun if (cfg->fc_nlinfo.nlh) {
5227*4882a593Smuzhiyun cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
5228*4882a593Smuzhiyun NLM_F_REPLACE);
5229*4882a593Smuzhiyun cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
5230*4882a593Smuzhiyun }
5231*4882a593Smuzhiyun nhn++;
5232*4882a593Smuzhiyun }
5233*4882a593Smuzhiyun
5234*4882a593Smuzhiyun /* An in-kernel notification should only be sent in case the new
5235*4882a593Smuzhiyun * multipath route is added as the first route in the node, or if
5236*4882a593Smuzhiyun * it was appended to it. We pass 'rt_notif' since it is the first
5237*4882a593Smuzhiyun * sibling and might allow us to skip some checks in the replace case.
5238*4882a593Smuzhiyun */
5239*4882a593Smuzhiyun if (ip6_route_mpath_should_notify(rt_notif)) {
5240*4882a593Smuzhiyun enum fib_event_type fib_event;
5241*4882a593Smuzhiyun
5242*4882a593Smuzhiyun if (rt_notif->fib6_nsiblings != nhn - 1)
5243*4882a593Smuzhiyun fib_event = FIB_EVENT_ENTRY_APPEND;
5244*4882a593Smuzhiyun else
5245*4882a593Smuzhiyun fib_event = FIB_EVENT_ENTRY_REPLACE;
5246*4882a593Smuzhiyun
5247*4882a593Smuzhiyun err = call_fib6_multipath_entry_notifiers(info->nl_net,
5248*4882a593Smuzhiyun fib_event, rt_notif,
5249*4882a593Smuzhiyun nhn - 1, extack);
5250*4882a593Smuzhiyun if (err) {
5251*4882a593Smuzhiyun /* Delete all the siblings that were just added */
5252*4882a593Smuzhiyun err_nh = NULL;
5253*4882a593Smuzhiyun goto add_errout;
5254*4882a593Smuzhiyun }
5255*4882a593Smuzhiyun }
5256*4882a593Smuzhiyun
5257*4882a593Smuzhiyun /* success ... tell user about new route */
5258*4882a593Smuzhiyun ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5259*4882a593Smuzhiyun goto cleanup;
5260*4882a593Smuzhiyun
5261*4882a593Smuzhiyun add_errout:
5262*4882a593Smuzhiyun /* send notification for routes that were added so that
5263*4882a593Smuzhiyun * the delete notifications sent by ip6_route_del are
5264*4882a593Smuzhiyun * coherent
5265*4882a593Smuzhiyun */
5266*4882a593Smuzhiyun if (rt_notif)
5267*4882a593Smuzhiyun ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
5268*4882a593Smuzhiyun
5269*4882a593Smuzhiyun /* Delete routes that were already added */
5270*4882a593Smuzhiyun list_for_each_entry(nh, &rt6_nh_list, next) {
5271*4882a593Smuzhiyun if (err_nh == nh)
5272*4882a593Smuzhiyun break;
5273*4882a593Smuzhiyun ip6_route_del(&nh->r_cfg, extack);
5274*4882a593Smuzhiyun }
5275*4882a593Smuzhiyun
5276*4882a593Smuzhiyun cleanup:
5277*4882a593Smuzhiyun list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
5278*4882a593Smuzhiyun if (nh->fib6_info)
5279*4882a593Smuzhiyun fib6_info_release(nh->fib6_info);
5280*4882a593Smuzhiyun list_del(&nh->next);
5281*4882a593Smuzhiyun kfree(nh);
5282*4882a593Smuzhiyun }
5283*4882a593Smuzhiyun
5284*4882a593Smuzhiyun return err;
5285*4882a593Smuzhiyun }
5286*4882a593Smuzhiyun
ip6_route_multipath_del(struct fib6_config * cfg,struct netlink_ext_ack * extack)5287*4882a593Smuzhiyun static int ip6_route_multipath_del(struct fib6_config *cfg,
5288*4882a593Smuzhiyun struct netlink_ext_ack *extack)
5289*4882a593Smuzhiyun {
5290*4882a593Smuzhiyun struct fib6_config r_cfg;
5291*4882a593Smuzhiyun struct rtnexthop *rtnh;
5292*4882a593Smuzhiyun int last_err = 0;
5293*4882a593Smuzhiyun int remaining;
5294*4882a593Smuzhiyun int attrlen;
5295*4882a593Smuzhiyun int err;
5296*4882a593Smuzhiyun
5297*4882a593Smuzhiyun remaining = cfg->fc_mp_len;
5298*4882a593Smuzhiyun rtnh = (struct rtnexthop *)cfg->fc_mp;
5299*4882a593Smuzhiyun
5300*4882a593Smuzhiyun /* Parse a Multipath Entry */
5301*4882a593Smuzhiyun while (rtnh_ok(rtnh, remaining)) {
5302*4882a593Smuzhiyun memcpy(&r_cfg, cfg, sizeof(*cfg));
5303*4882a593Smuzhiyun if (rtnh->rtnh_ifindex)
5304*4882a593Smuzhiyun r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
5305*4882a593Smuzhiyun
5306*4882a593Smuzhiyun attrlen = rtnh_attrlen(rtnh);
5307*4882a593Smuzhiyun if (attrlen > 0) {
5308*4882a593Smuzhiyun struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
5309*4882a593Smuzhiyun
5310*4882a593Smuzhiyun nla = nla_find(attrs, attrlen, RTA_GATEWAY);
5311*4882a593Smuzhiyun if (nla) {
5312*4882a593Smuzhiyun err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
5313*4882a593Smuzhiyun extack);
5314*4882a593Smuzhiyun if (err) {
5315*4882a593Smuzhiyun last_err = err;
5316*4882a593Smuzhiyun goto next_rtnh;
5317*4882a593Smuzhiyun }
5318*4882a593Smuzhiyun
5319*4882a593Smuzhiyun r_cfg.fc_flags |= RTF_GATEWAY;
5320*4882a593Smuzhiyun }
5321*4882a593Smuzhiyun }
5322*4882a593Smuzhiyun err = ip6_route_del(&r_cfg, extack);
5323*4882a593Smuzhiyun if (err)
5324*4882a593Smuzhiyun last_err = err;
5325*4882a593Smuzhiyun
5326*4882a593Smuzhiyun next_rtnh:
5327*4882a593Smuzhiyun rtnh = rtnh_next(rtnh, &remaining);
5328*4882a593Smuzhiyun }
5329*4882a593Smuzhiyun
5330*4882a593Smuzhiyun return last_err;
5331*4882a593Smuzhiyun }
5332*4882a593Smuzhiyun
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)5333*4882a593Smuzhiyun static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5334*4882a593Smuzhiyun struct netlink_ext_ack *extack)
5335*4882a593Smuzhiyun {
5336*4882a593Smuzhiyun struct fib6_config cfg;
5337*4882a593Smuzhiyun int err;
5338*4882a593Smuzhiyun
5339*4882a593Smuzhiyun err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5340*4882a593Smuzhiyun if (err < 0)
5341*4882a593Smuzhiyun return err;
5342*4882a593Smuzhiyun
5343*4882a593Smuzhiyun if (cfg.fc_nh_id &&
5344*4882a593Smuzhiyun !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
5345*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
5346*4882a593Smuzhiyun return -EINVAL;
5347*4882a593Smuzhiyun }
5348*4882a593Smuzhiyun
5349*4882a593Smuzhiyun if (cfg.fc_mp)
5350*4882a593Smuzhiyun return ip6_route_multipath_del(&cfg, extack);
5351*4882a593Smuzhiyun else {
5352*4882a593Smuzhiyun cfg.fc_delete_all_nh = 1;
5353*4882a593Smuzhiyun return ip6_route_del(&cfg, extack);
5354*4882a593Smuzhiyun }
5355*4882a593Smuzhiyun }
5356*4882a593Smuzhiyun
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)5357*4882a593Smuzhiyun static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
5358*4882a593Smuzhiyun struct netlink_ext_ack *extack)
5359*4882a593Smuzhiyun {
5360*4882a593Smuzhiyun struct fib6_config cfg;
5361*4882a593Smuzhiyun int err;
5362*4882a593Smuzhiyun
5363*4882a593Smuzhiyun err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
5364*4882a593Smuzhiyun if (err < 0)
5365*4882a593Smuzhiyun return err;
5366*4882a593Smuzhiyun
5367*4882a593Smuzhiyun if (cfg.fc_metric == 0)
5368*4882a593Smuzhiyun cfg.fc_metric = IP6_RT_PRIO_USER;
5369*4882a593Smuzhiyun
5370*4882a593Smuzhiyun if (cfg.fc_mp)
5371*4882a593Smuzhiyun return ip6_route_multipath_add(&cfg, extack);
5372*4882a593Smuzhiyun else
5373*4882a593Smuzhiyun return ip6_route_add(&cfg, GFP_KERNEL, extack);
5374*4882a593Smuzhiyun }
5375*4882a593Smuzhiyun
5376*4882a593Smuzhiyun /* add the overhead of this fib6_nh to nexthop_len */
rt6_nh_nlmsg_size(struct fib6_nh * nh,void * arg)5377*4882a593Smuzhiyun static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
5378*4882a593Smuzhiyun {
5379*4882a593Smuzhiyun int *nexthop_len = arg;
5380*4882a593Smuzhiyun
5381*4882a593Smuzhiyun *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */
5382*4882a593Smuzhiyun + NLA_ALIGN(sizeof(struct rtnexthop))
5383*4882a593Smuzhiyun + nla_total_size(16); /* RTA_GATEWAY */
5384*4882a593Smuzhiyun
5385*4882a593Smuzhiyun if (nh->fib_nh_lws) {
5386*4882a593Smuzhiyun /* RTA_ENCAP_TYPE */
5387*4882a593Smuzhiyun *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5388*4882a593Smuzhiyun /* RTA_ENCAP */
5389*4882a593Smuzhiyun *nexthop_len += nla_total_size(2);
5390*4882a593Smuzhiyun }
5391*4882a593Smuzhiyun
5392*4882a593Smuzhiyun return 0;
5393*4882a593Smuzhiyun }
5394*4882a593Smuzhiyun
rt6_nlmsg_size(struct fib6_info * f6i)5395*4882a593Smuzhiyun static size_t rt6_nlmsg_size(struct fib6_info *f6i)
5396*4882a593Smuzhiyun {
5397*4882a593Smuzhiyun int nexthop_len;
5398*4882a593Smuzhiyun
5399*4882a593Smuzhiyun if (f6i->nh) {
5400*4882a593Smuzhiyun nexthop_len = nla_total_size(4); /* RTA_NH_ID */
5401*4882a593Smuzhiyun nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
5402*4882a593Smuzhiyun &nexthop_len);
5403*4882a593Smuzhiyun } else {
5404*4882a593Smuzhiyun struct fib6_nh *nh = f6i->fib6_nh;
5405*4882a593Smuzhiyun
5406*4882a593Smuzhiyun nexthop_len = 0;
5407*4882a593Smuzhiyun if (f6i->fib6_nsiblings) {
5408*4882a593Smuzhiyun nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
5409*4882a593Smuzhiyun + NLA_ALIGN(sizeof(struct rtnexthop))
5410*4882a593Smuzhiyun + nla_total_size(16) /* RTA_GATEWAY */
5411*4882a593Smuzhiyun + lwtunnel_get_encap_size(nh->fib_nh_lws);
5412*4882a593Smuzhiyun
5413*4882a593Smuzhiyun nexthop_len *= f6i->fib6_nsiblings;
5414*4882a593Smuzhiyun }
5415*4882a593Smuzhiyun nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
5416*4882a593Smuzhiyun }
5417*4882a593Smuzhiyun
5418*4882a593Smuzhiyun return NLMSG_ALIGN(sizeof(struct rtmsg))
5419*4882a593Smuzhiyun + nla_total_size(16) /* RTA_SRC */
5420*4882a593Smuzhiyun + nla_total_size(16) /* RTA_DST */
5421*4882a593Smuzhiyun + nla_total_size(16) /* RTA_GATEWAY */
5422*4882a593Smuzhiyun + nla_total_size(16) /* RTA_PREFSRC */
5423*4882a593Smuzhiyun + nla_total_size(4) /* RTA_TABLE */
5424*4882a593Smuzhiyun + nla_total_size(4) /* RTA_IIF */
5425*4882a593Smuzhiyun + nla_total_size(4) /* RTA_OIF */
5426*4882a593Smuzhiyun + nla_total_size(4) /* RTA_PRIORITY */
5427*4882a593Smuzhiyun + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
5428*4882a593Smuzhiyun + nla_total_size(sizeof(struct rta_cacheinfo))
5429*4882a593Smuzhiyun + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
5430*4882a593Smuzhiyun + nla_total_size(1) /* RTA_PREF */
5431*4882a593Smuzhiyun + nexthop_len;
5432*4882a593Smuzhiyun }
5433*4882a593Smuzhiyun
rt6_fill_node_nexthop(struct sk_buff * skb,struct nexthop * nh,unsigned char * flags)5434*4882a593Smuzhiyun static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
5435*4882a593Smuzhiyun unsigned char *flags)
5436*4882a593Smuzhiyun {
5437*4882a593Smuzhiyun if (nexthop_is_multipath(nh)) {
5438*4882a593Smuzhiyun struct nlattr *mp;
5439*4882a593Smuzhiyun
5440*4882a593Smuzhiyun mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5441*4882a593Smuzhiyun if (!mp)
5442*4882a593Smuzhiyun goto nla_put_failure;
5443*4882a593Smuzhiyun
5444*4882a593Smuzhiyun if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
5445*4882a593Smuzhiyun goto nla_put_failure;
5446*4882a593Smuzhiyun
5447*4882a593Smuzhiyun nla_nest_end(skb, mp);
5448*4882a593Smuzhiyun } else {
5449*4882a593Smuzhiyun struct fib6_nh *fib6_nh;
5450*4882a593Smuzhiyun
5451*4882a593Smuzhiyun fib6_nh = nexthop_fib6_nh(nh);
5452*4882a593Smuzhiyun if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
5453*4882a593Smuzhiyun flags, false) < 0)
5454*4882a593Smuzhiyun goto nla_put_failure;
5455*4882a593Smuzhiyun }
5456*4882a593Smuzhiyun
5457*4882a593Smuzhiyun return 0;
5458*4882a593Smuzhiyun
5459*4882a593Smuzhiyun nla_put_failure:
5460*4882a593Smuzhiyun return -EMSGSIZE;
5461*4882a593Smuzhiyun }
5462*4882a593Smuzhiyun
rt6_fill_node(struct net * net,struct sk_buff * skb,struct fib6_info * rt,struct dst_entry * dst,struct in6_addr * dest,struct in6_addr * src,int iif,int type,u32 portid,u32 seq,unsigned int flags)5463*4882a593Smuzhiyun static int rt6_fill_node(struct net *net, struct sk_buff *skb,
5464*4882a593Smuzhiyun struct fib6_info *rt, struct dst_entry *dst,
5465*4882a593Smuzhiyun struct in6_addr *dest, struct in6_addr *src,
5466*4882a593Smuzhiyun int iif, int type, u32 portid, u32 seq,
5467*4882a593Smuzhiyun unsigned int flags)
5468*4882a593Smuzhiyun {
5469*4882a593Smuzhiyun struct rt6_info *rt6 = (struct rt6_info *)dst;
5470*4882a593Smuzhiyun struct rt6key *rt6_dst, *rt6_src;
5471*4882a593Smuzhiyun u32 *pmetrics, table, rt6_flags;
5472*4882a593Smuzhiyun unsigned char nh_flags = 0;
5473*4882a593Smuzhiyun struct nlmsghdr *nlh;
5474*4882a593Smuzhiyun struct rtmsg *rtm;
5475*4882a593Smuzhiyun long expires = 0;
5476*4882a593Smuzhiyun
5477*4882a593Smuzhiyun nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
5478*4882a593Smuzhiyun if (!nlh)
5479*4882a593Smuzhiyun return -EMSGSIZE;
5480*4882a593Smuzhiyun
5481*4882a593Smuzhiyun if (rt6) {
5482*4882a593Smuzhiyun rt6_dst = &rt6->rt6i_dst;
5483*4882a593Smuzhiyun rt6_src = &rt6->rt6i_src;
5484*4882a593Smuzhiyun rt6_flags = rt6->rt6i_flags;
5485*4882a593Smuzhiyun } else {
5486*4882a593Smuzhiyun rt6_dst = &rt->fib6_dst;
5487*4882a593Smuzhiyun rt6_src = &rt->fib6_src;
5488*4882a593Smuzhiyun rt6_flags = rt->fib6_flags;
5489*4882a593Smuzhiyun }
5490*4882a593Smuzhiyun
5491*4882a593Smuzhiyun rtm = nlmsg_data(nlh);
5492*4882a593Smuzhiyun rtm->rtm_family = AF_INET6;
5493*4882a593Smuzhiyun rtm->rtm_dst_len = rt6_dst->plen;
5494*4882a593Smuzhiyun rtm->rtm_src_len = rt6_src->plen;
5495*4882a593Smuzhiyun rtm->rtm_tos = 0;
5496*4882a593Smuzhiyun if (rt->fib6_table)
5497*4882a593Smuzhiyun table = rt->fib6_table->tb6_id;
5498*4882a593Smuzhiyun else
5499*4882a593Smuzhiyun table = RT6_TABLE_UNSPEC;
5500*4882a593Smuzhiyun rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
5501*4882a593Smuzhiyun if (nla_put_u32(skb, RTA_TABLE, table))
5502*4882a593Smuzhiyun goto nla_put_failure;
5503*4882a593Smuzhiyun
5504*4882a593Smuzhiyun rtm->rtm_type = rt->fib6_type;
5505*4882a593Smuzhiyun rtm->rtm_flags = 0;
5506*4882a593Smuzhiyun rtm->rtm_scope = RT_SCOPE_UNIVERSE;
5507*4882a593Smuzhiyun rtm->rtm_protocol = rt->fib6_protocol;
5508*4882a593Smuzhiyun
5509*4882a593Smuzhiyun if (rt6_flags & RTF_CACHE)
5510*4882a593Smuzhiyun rtm->rtm_flags |= RTM_F_CLONED;
5511*4882a593Smuzhiyun
5512*4882a593Smuzhiyun if (dest) {
5513*4882a593Smuzhiyun if (nla_put_in6_addr(skb, RTA_DST, dest))
5514*4882a593Smuzhiyun goto nla_put_failure;
5515*4882a593Smuzhiyun rtm->rtm_dst_len = 128;
5516*4882a593Smuzhiyun } else if (rtm->rtm_dst_len)
5517*4882a593Smuzhiyun if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
5518*4882a593Smuzhiyun goto nla_put_failure;
5519*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
5520*4882a593Smuzhiyun if (src) {
5521*4882a593Smuzhiyun if (nla_put_in6_addr(skb, RTA_SRC, src))
5522*4882a593Smuzhiyun goto nla_put_failure;
5523*4882a593Smuzhiyun rtm->rtm_src_len = 128;
5524*4882a593Smuzhiyun } else if (rtm->rtm_src_len &&
5525*4882a593Smuzhiyun nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
5526*4882a593Smuzhiyun goto nla_put_failure;
5527*4882a593Smuzhiyun #endif
5528*4882a593Smuzhiyun if (iif) {
5529*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MROUTE
5530*4882a593Smuzhiyun if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
5531*4882a593Smuzhiyun int err = ip6mr_get_route(net, skb, rtm, portid);
5532*4882a593Smuzhiyun
5533*4882a593Smuzhiyun if (err == 0)
5534*4882a593Smuzhiyun return 0;
5535*4882a593Smuzhiyun if (err < 0)
5536*4882a593Smuzhiyun goto nla_put_failure;
5537*4882a593Smuzhiyun } else
5538*4882a593Smuzhiyun #endif
5539*4882a593Smuzhiyun if (nla_put_u32(skb, RTA_IIF, iif))
5540*4882a593Smuzhiyun goto nla_put_failure;
5541*4882a593Smuzhiyun } else if (dest) {
5542*4882a593Smuzhiyun struct in6_addr saddr_buf;
5543*4882a593Smuzhiyun if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
5544*4882a593Smuzhiyun nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5545*4882a593Smuzhiyun goto nla_put_failure;
5546*4882a593Smuzhiyun }
5547*4882a593Smuzhiyun
5548*4882a593Smuzhiyun if (rt->fib6_prefsrc.plen) {
5549*4882a593Smuzhiyun struct in6_addr saddr_buf;
5550*4882a593Smuzhiyun saddr_buf = rt->fib6_prefsrc.addr;
5551*4882a593Smuzhiyun if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
5552*4882a593Smuzhiyun goto nla_put_failure;
5553*4882a593Smuzhiyun }
5554*4882a593Smuzhiyun
5555*4882a593Smuzhiyun pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
5556*4882a593Smuzhiyun if (rtnetlink_put_metrics(skb, pmetrics) < 0)
5557*4882a593Smuzhiyun goto nla_put_failure;
5558*4882a593Smuzhiyun
5559*4882a593Smuzhiyun if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
5560*4882a593Smuzhiyun goto nla_put_failure;
5561*4882a593Smuzhiyun
5562*4882a593Smuzhiyun /* For multipath routes, walk the siblings list and add
5563*4882a593Smuzhiyun * each as a nexthop within RTA_MULTIPATH.
5564*4882a593Smuzhiyun */
5565*4882a593Smuzhiyun if (rt6) {
5566*4882a593Smuzhiyun if (rt6_flags & RTF_GATEWAY &&
5567*4882a593Smuzhiyun nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
5568*4882a593Smuzhiyun goto nla_put_failure;
5569*4882a593Smuzhiyun
5570*4882a593Smuzhiyun if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
5571*4882a593Smuzhiyun goto nla_put_failure;
5572*4882a593Smuzhiyun } else if (rt->fib6_nsiblings) {
5573*4882a593Smuzhiyun struct fib6_info *sibling, *next_sibling;
5574*4882a593Smuzhiyun struct nlattr *mp;
5575*4882a593Smuzhiyun
5576*4882a593Smuzhiyun mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
5577*4882a593Smuzhiyun if (!mp)
5578*4882a593Smuzhiyun goto nla_put_failure;
5579*4882a593Smuzhiyun
5580*4882a593Smuzhiyun if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
5581*4882a593Smuzhiyun rt->fib6_nh->fib_nh_weight, AF_INET6,
5582*4882a593Smuzhiyun 0) < 0)
5583*4882a593Smuzhiyun goto nla_put_failure;
5584*4882a593Smuzhiyun
5585*4882a593Smuzhiyun list_for_each_entry_safe(sibling, next_sibling,
5586*4882a593Smuzhiyun &rt->fib6_siblings, fib6_siblings) {
5587*4882a593Smuzhiyun if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
5588*4882a593Smuzhiyun sibling->fib6_nh->fib_nh_weight,
5589*4882a593Smuzhiyun AF_INET6, 0) < 0)
5590*4882a593Smuzhiyun goto nla_put_failure;
5591*4882a593Smuzhiyun }
5592*4882a593Smuzhiyun
5593*4882a593Smuzhiyun nla_nest_end(skb, mp);
5594*4882a593Smuzhiyun } else if (rt->nh) {
5595*4882a593Smuzhiyun if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
5596*4882a593Smuzhiyun goto nla_put_failure;
5597*4882a593Smuzhiyun
5598*4882a593Smuzhiyun if (nexthop_is_blackhole(rt->nh))
5599*4882a593Smuzhiyun rtm->rtm_type = RTN_BLACKHOLE;
5600*4882a593Smuzhiyun
5601*4882a593Smuzhiyun if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
5602*4882a593Smuzhiyun rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
5603*4882a593Smuzhiyun goto nla_put_failure;
5604*4882a593Smuzhiyun
5605*4882a593Smuzhiyun rtm->rtm_flags |= nh_flags;
5606*4882a593Smuzhiyun } else {
5607*4882a593Smuzhiyun if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
5608*4882a593Smuzhiyun &nh_flags, false) < 0)
5609*4882a593Smuzhiyun goto nla_put_failure;
5610*4882a593Smuzhiyun
5611*4882a593Smuzhiyun rtm->rtm_flags |= nh_flags;
5612*4882a593Smuzhiyun }
5613*4882a593Smuzhiyun
5614*4882a593Smuzhiyun if (rt6_flags & RTF_EXPIRES) {
5615*4882a593Smuzhiyun expires = dst ? dst->expires : rt->expires;
5616*4882a593Smuzhiyun expires -= jiffies;
5617*4882a593Smuzhiyun }
5618*4882a593Smuzhiyun
5619*4882a593Smuzhiyun if (!dst) {
5620*4882a593Smuzhiyun if (rt->offload)
5621*4882a593Smuzhiyun rtm->rtm_flags |= RTM_F_OFFLOAD;
5622*4882a593Smuzhiyun if (rt->trap)
5623*4882a593Smuzhiyun rtm->rtm_flags |= RTM_F_TRAP;
5624*4882a593Smuzhiyun }
5625*4882a593Smuzhiyun
5626*4882a593Smuzhiyun if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
5627*4882a593Smuzhiyun goto nla_put_failure;
5628*4882a593Smuzhiyun
5629*4882a593Smuzhiyun if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
5630*4882a593Smuzhiyun goto nla_put_failure;
5631*4882a593Smuzhiyun
5632*4882a593Smuzhiyun
5633*4882a593Smuzhiyun nlmsg_end(skb, nlh);
5634*4882a593Smuzhiyun return 0;
5635*4882a593Smuzhiyun
5636*4882a593Smuzhiyun nla_put_failure:
5637*4882a593Smuzhiyun nlmsg_cancel(skb, nlh);
5638*4882a593Smuzhiyun return -EMSGSIZE;
5639*4882a593Smuzhiyun }
5640*4882a593Smuzhiyun
fib6_info_nh_uses_dev(struct fib6_nh * nh,void * arg)5641*4882a593Smuzhiyun static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
5642*4882a593Smuzhiyun {
5643*4882a593Smuzhiyun const struct net_device *dev = arg;
5644*4882a593Smuzhiyun
5645*4882a593Smuzhiyun if (nh->fib_nh_dev == dev)
5646*4882a593Smuzhiyun return 1;
5647*4882a593Smuzhiyun
5648*4882a593Smuzhiyun return 0;
5649*4882a593Smuzhiyun }
5650*4882a593Smuzhiyun
fib6_info_uses_dev(const struct fib6_info * f6i,const struct net_device * dev)5651*4882a593Smuzhiyun static bool fib6_info_uses_dev(const struct fib6_info *f6i,
5652*4882a593Smuzhiyun const struct net_device *dev)
5653*4882a593Smuzhiyun {
5654*4882a593Smuzhiyun if (f6i->nh) {
5655*4882a593Smuzhiyun struct net_device *_dev = (struct net_device *)dev;
5656*4882a593Smuzhiyun
5657*4882a593Smuzhiyun return !!nexthop_for_each_fib6_nh(f6i->nh,
5658*4882a593Smuzhiyun fib6_info_nh_uses_dev,
5659*4882a593Smuzhiyun _dev);
5660*4882a593Smuzhiyun }
5661*4882a593Smuzhiyun
5662*4882a593Smuzhiyun if (f6i->fib6_nh->fib_nh_dev == dev)
5663*4882a593Smuzhiyun return true;
5664*4882a593Smuzhiyun
5665*4882a593Smuzhiyun if (f6i->fib6_nsiblings) {
5666*4882a593Smuzhiyun struct fib6_info *sibling, *next_sibling;
5667*4882a593Smuzhiyun
5668*4882a593Smuzhiyun list_for_each_entry_safe(sibling, next_sibling,
5669*4882a593Smuzhiyun &f6i->fib6_siblings, fib6_siblings) {
5670*4882a593Smuzhiyun if (sibling->fib6_nh->fib_nh_dev == dev)
5671*4882a593Smuzhiyun return true;
5672*4882a593Smuzhiyun }
5673*4882a593Smuzhiyun }
5674*4882a593Smuzhiyun
5675*4882a593Smuzhiyun return false;
5676*4882a593Smuzhiyun }
5677*4882a593Smuzhiyun
5678*4882a593Smuzhiyun struct fib6_nh_exception_dump_walker {
5679*4882a593Smuzhiyun struct rt6_rtnl_dump_arg *dump;
5680*4882a593Smuzhiyun struct fib6_info *rt;
5681*4882a593Smuzhiyun unsigned int flags;
5682*4882a593Smuzhiyun unsigned int skip;
5683*4882a593Smuzhiyun unsigned int count;
5684*4882a593Smuzhiyun };
5685*4882a593Smuzhiyun
rt6_nh_dump_exceptions(struct fib6_nh * nh,void * arg)5686*4882a593Smuzhiyun static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
5687*4882a593Smuzhiyun {
5688*4882a593Smuzhiyun struct fib6_nh_exception_dump_walker *w = arg;
5689*4882a593Smuzhiyun struct rt6_rtnl_dump_arg *dump = w->dump;
5690*4882a593Smuzhiyun struct rt6_exception_bucket *bucket;
5691*4882a593Smuzhiyun struct rt6_exception *rt6_ex;
5692*4882a593Smuzhiyun int i, err;
5693*4882a593Smuzhiyun
5694*4882a593Smuzhiyun bucket = fib6_nh_get_excptn_bucket(nh, NULL);
5695*4882a593Smuzhiyun if (!bucket)
5696*4882a593Smuzhiyun return 0;
5697*4882a593Smuzhiyun
5698*4882a593Smuzhiyun for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
5699*4882a593Smuzhiyun hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
5700*4882a593Smuzhiyun if (w->skip) {
5701*4882a593Smuzhiyun w->skip--;
5702*4882a593Smuzhiyun continue;
5703*4882a593Smuzhiyun }
5704*4882a593Smuzhiyun
5705*4882a593Smuzhiyun /* Expiration of entries doesn't bump sernum, insertion
5706*4882a593Smuzhiyun * does. Removal is triggered by insertion, so we can
5707*4882a593Smuzhiyun * rely on the fact that if entries change between two
5708*4882a593Smuzhiyun * partial dumps, this node is scanned again completely,
5709*4882a593Smuzhiyun * see rt6_insert_exception() and fib6_dump_table().
5710*4882a593Smuzhiyun *
5711*4882a593Smuzhiyun * Count expired entries we go through as handled
5712*4882a593Smuzhiyun * entries that we'll skip next time, in case of partial
5713*4882a593Smuzhiyun * node dump. Otherwise, if entries expire meanwhile,
5714*4882a593Smuzhiyun * we'll skip the wrong amount.
5715*4882a593Smuzhiyun */
5716*4882a593Smuzhiyun if (rt6_check_expired(rt6_ex->rt6i)) {
5717*4882a593Smuzhiyun w->count++;
5718*4882a593Smuzhiyun continue;
5719*4882a593Smuzhiyun }
5720*4882a593Smuzhiyun
5721*4882a593Smuzhiyun err = rt6_fill_node(dump->net, dump->skb, w->rt,
5722*4882a593Smuzhiyun &rt6_ex->rt6i->dst, NULL, NULL, 0,
5723*4882a593Smuzhiyun RTM_NEWROUTE,
5724*4882a593Smuzhiyun NETLINK_CB(dump->cb->skb).portid,
5725*4882a593Smuzhiyun dump->cb->nlh->nlmsg_seq, w->flags);
5726*4882a593Smuzhiyun if (err)
5727*4882a593Smuzhiyun return err;
5728*4882a593Smuzhiyun
5729*4882a593Smuzhiyun w->count++;
5730*4882a593Smuzhiyun }
5731*4882a593Smuzhiyun bucket++;
5732*4882a593Smuzhiyun }
5733*4882a593Smuzhiyun
5734*4882a593Smuzhiyun return 0;
5735*4882a593Smuzhiyun }
5736*4882a593Smuzhiyun
5737*4882a593Smuzhiyun /* Return -1 if done with node, number of handled routes on partial dump */
rt6_dump_route(struct fib6_info * rt,void * p_arg,unsigned int skip)5738*4882a593Smuzhiyun int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
5739*4882a593Smuzhiyun {
5740*4882a593Smuzhiyun struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
5741*4882a593Smuzhiyun struct fib_dump_filter *filter = &arg->filter;
5742*4882a593Smuzhiyun unsigned int flags = NLM_F_MULTI;
5743*4882a593Smuzhiyun struct net *net = arg->net;
5744*4882a593Smuzhiyun int count = 0;
5745*4882a593Smuzhiyun
5746*4882a593Smuzhiyun if (rt == net->ipv6.fib6_null_entry)
5747*4882a593Smuzhiyun return -1;
5748*4882a593Smuzhiyun
5749*4882a593Smuzhiyun if ((filter->flags & RTM_F_PREFIX) &&
5750*4882a593Smuzhiyun !(rt->fib6_flags & RTF_PREFIX_RT)) {
5751*4882a593Smuzhiyun /* success since this is not a prefix route */
5752*4882a593Smuzhiyun return -1;
5753*4882a593Smuzhiyun }
5754*4882a593Smuzhiyun if (filter->filter_set &&
5755*4882a593Smuzhiyun ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
5756*4882a593Smuzhiyun (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
5757*4882a593Smuzhiyun (filter->protocol && rt->fib6_protocol != filter->protocol))) {
5758*4882a593Smuzhiyun return -1;
5759*4882a593Smuzhiyun }
5760*4882a593Smuzhiyun
5761*4882a593Smuzhiyun if (filter->filter_set ||
5762*4882a593Smuzhiyun !filter->dump_routes || !filter->dump_exceptions) {
5763*4882a593Smuzhiyun flags |= NLM_F_DUMP_FILTERED;
5764*4882a593Smuzhiyun }
5765*4882a593Smuzhiyun
5766*4882a593Smuzhiyun if (filter->dump_routes) {
5767*4882a593Smuzhiyun if (skip) {
5768*4882a593Smuzhiyun skip--;
5769*4882a593Smuzhiyun } else {
5770*4882a593Smuzhiyun if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
5771*4882a593Smuzhiyun 0, RTM_NEWROUTE,
5772*4882a593Smuzhiyun NETLINK_CB(arg->cb->skb).portid,
5773*4882a593Smuzhiyun arg->cb->nlh->nlmsg_seq, flags)) {
5774*4882a593Smuzhiyun return 0;
5775*4882a593Smuzhiyun }
5776*4882a593Smuzhiyun count++;
5777*4882a593Smuzhiyun }
5778*4882a593Smuzhiyun }
5779*4882a593Smuzhiyun
5780*4882a593Smuzhiyun if (filter->dump_exceptions) {
5781*4882a593Smuzhiyun struct fib6_nh_exception_dump_walker w = { .dump = arg,
5782*4882a593Smuzhiyun .rt = rt,
5783*4882a593Smuzhiyun .flags = flags,
5784*4882a593Smuzhiyun .skip = skip,
5785*4882a593Smuzhiyun .count = 0 };
5786*4882a593Smuzhiyun int err;
5787*4882a593Smuzhiyun
5788*4882a593Smuzhiyun rcu_read_lock();
5789*4882a593Smuzhiyun if (rt->nh) {
5790*4882a593Smuzhiyun err = nexthop_for_each_fib6_nh(rt->nh,
5791*4882a593Smuzhiyun rt6_nh_dump_exceptions,
5792*4882a593Smuzhiyun &w);
5793*4882a593Smuzhiyun } else {
5794*4882a593Smuzhiyun err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
5795*4882a593Smuzhiyun }
5796*4882a593Smuzhiyun rcu_read_unlock();
5797*4882a593Smuzhiyun
5798*4882a593Smuzhiyun if (err)
5799*4882a593Smuzhiyun return count += w.count;
5800*4882a593Smuzhiyun }
5801*4882a593Smuzhiyun
5802*4882a593Smuzhiyun return -1;
5803*4882a593Smuzhiyun }
5804*4882a593Smuzhiyun
inet6_rtm_valid_getroute_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)5805*4882a593Smuzhiyun static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
5806*4882a593Smuzhiyun const struct nlmsghdr *nlh,
5807*4882a593Smuzhiyun struct nlattr **tb,
5808*4882a593Smuzhiyun struct netlink_ext_ack *extack)
5809*4882a593Smuzhiyun {
5810*4882a593Smuzhiyun struct rtmsg *rtm;
5811*4882a593Smuzhiyun int i, err;
5812*4882a593Smuzhiyun
5813*4882a593Smuzhiyun if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
5814*4882a593Smuzhiyun NL_SET_ERR_MSG_MOD(extack,
5815*4882a593Smuzhiyun "Invalid header for get route request");
5816*4882a593Smuzhiyun return -EINVAL;
5817*4882a593Smuzhiyun }
5818*4882a593Smuzhiyun
5819*4882a593Smuzhiyun if (!netlink_strict_get_check(skb))
5820*4882a593Smuzhiyun return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
5821*4882a593Smuzhiyun rtm_ipv6_policy, extack);
5822*4882a593Smuzhiyun
5823*4882a593Smuzhiyun rtm = nlmsg_data(nlh);
5824*4882a593Smuzhiyun if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
5825*4882a593Smuzhiyun (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
5826*4882a593Smuzhiyun rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
5827*4882a593Smuzhiyun rtm->rtm_type) {
5828*4882a593Smuzhiyun NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
5829*4882a593Smuzhiyun return -EINVAL;
5830*4882a593Smuzhiyun }
5831*4882a593Smuzhiyun if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
5832*4882a593Smuzhiyun NL_SET_ERR_MSG_MOD(extack,
5833*4882a593Smuzhiyun "Invalid flags for get route request");
5834*4882a593Smuzhiyun return -EINVAL;
5835*4882a593Smuzhiyun }
5836*4882a593Smuzhiyun
5837*4882a593Smuzhiyun err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
5838*4882a593Smuzhiyun rtm_ipv6_policy, extack);
5839*4882a593Smuzhiyun if (err)
5840*4882a593Smuzhiyun return err;
5841*4882a593Smuzhiyun
5842*4882a593Smuzhiyun if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
5843*4882a593Smuzhiyun (tb[RTA_DST] && !rtm->rtm_dst_len)) {
5844*4882a593Smuzhiyun NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
5845*4882a593Smuzhiyun return -EINVAL;
5846*4882a593Smuzhiyun }
5847*4882a593Smuzhiyun
5848*4882a593Smuzhiyun for (i = 0; i <= RTA_MAX; i++) {
5849*4882a593Smuzhiyun if (!tb[i])
5850*4882a593Smuzhiyun continue;
5851*4882a593Smuzhiyun
5852*4882a593Smuzhiyun switch (i) {
5853*4882a593Smuzhiyun case RTA_SRC:
5854*4882a593Smuzhiyun case RTA_DST:
5855*4882a593Smuzhiyun case RTA_IIF:
5856*4882a593Smuzhiyun case RTA_OIF:
5857*4882a593Smuzhiyun case RTA_MARK:
5858*4882a593Smuzhiyun case RTA_UID:
5859*4882a593Smuzhiyun case RTA_SPORT:
5860*4882a593Smuzhiyun case RTA_DPORT:
5861*4882a593Smuzhiyun case RTA_IP_PROTO:
5862*4882a593Smuzhiyun break;
5863*4882a593Smuzhiyun default:
5864*4882a593Smuzhiyun NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
5865*4882a593Smuzhiyun return -EINVAL;
5866*4882a593Smuzhiyun }
5867*4882a593Smuzhiyun }
5868*4882a593Smuzhiyun
5869*4882a593Smuzhiyun return 0;
5870*4882a593Smuzhiyun }
5871*4882a593Smuzhiyun
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)5872*4882a593Smuzhiyun static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
5873*4882a593Smuzhiyun struct netlink_ext_ack *extack)
5874*4882a593Smuzhiyun {
5875*4882a593Smuzhiyun struct net *net = sock_net(in_skb->sk);
5876*4882a593Smuzhiyun struct nlattr *tb[RTA_MAX+1];
5877*4882a593Smuzhiyun int err, iif = 0, oif = 0;
5878*4882a593Smuzhiyun struct fib6_info *from;
5879*4882a593Smuzhiyun struct dst_entry *dst;
5880*4882a593Smuzhiyun struct rt6_info *rt;
5881*4882a593Smuzhiyun struct sk_buff *skb;
5882*4882a593Smuzhiyun struct rtmsg *rtm;
5883*4882a593Smuzhiyun struct flowi6 fl6 = {};
5884*4882a593Smuzhiyun bool fibmatch;
5885*4882a593Smuzhiyun
5886*4882a593Smuzhiyun err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
5887*4882a593Smuzhiyun if (err < 0)
5888*4882a593Smuzhiyun goto errout;
5889*4882a593Smuzhiyun
5890*4882a593Smuzhiyun err = -EINVAL;
5891*4882a593Smuzhiyun rtm = nlmsg_data(nlh);
5892*4882a593Smuzhiyun fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
5893*4882a593Smuzhiyun fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
5894*4882a593Smuzhiyun
5895*4882a593Smuzhiyun if (tb[RTA_SRC]) {
5896*4882a593Smuzhiyun if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
5897*4882a593Smuzhiyun goto errout;
5898*4882a593Smuzhiyun
5899*4882a593Smuzhiyun fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
5900*4882a593Smuzhiyun }
5901*4882a593Smuzhiyun
5902*4882a593Smuzhiyun if (tb[RTA_DST]) {
5903*4882a593Smuzhiyun if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
5904*4882a593Smuzhiyun goto errout;
5905*4882a593Smuzhiyun
5906*4882a593Smuzhiyun fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
5907*4882a593Smuzhiyun }
5908*4882a593Smuzhiyun
5909*4882a593Smuzhiyun if (tb[RTA_IIF])
5910*4882a593Smuzhiyun iif = nla_get_u32(tb[RTA_IIF]);
5911*4882a593Smuzhiyun
5912*4882a593Smuzhiyun if (tb[RTA_OIF])
5913*4882a593Smuzhiyun oif = nla_get_u32(tb[RTA_OIF]);
5914*4882a593Smuzhiyun
5915*4882a593Smuzhiyun if (tb[RTA_MARK])
5916*4882a593Smuzhiyun fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
5917*4882a593Smuzhiyun
5918*4882a593Smuzhiyun if (tb[RTA_UID])
5919*4882a593Smuzhiyun fl6.flowi6_uid = make_kuid(current_user_ns(),
5920*4882a593Smuzhiyun nla_get_u32(tb[RTA_UID]));
5921*4882a593Smuzhiyun else
5922*4882a593Smuzhiyun fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
5923*4882a593Smuzhiyun
5924*4882a593Smuzhiyun if (tb[RTA_SPORT])
5925*4882a593Smuzhiyun fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
5926*4882a593Smuzhiyun
5927*4882a593Smuzhiyun if (tb[RTA_DPORT])
5928*4882a593Smuzhiyun fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
5929*4882a593Smuzhiyun
5930*4882a593Smuzhiyun if (tb[RTA_IP_PROTO]) {
5931*4882a593Smuzhiyun err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5932*4882a593Smuzhiyun &fl6.flowi6_proto, AF_INET6,
5933*4882a593Smuzhiyun extack);
5934*4882a593Smuzhiyun if (err)
5935*4882a593Smuzhiyun goto errout;
5936*4882a593Smuzhiyun }
5937*4882a593Smuzhiyun
5938*4882a593Smuzhiyun if (iif) {
5939*4882a593Smuzhiyun struct net_device *dev;
5940*4882a593Smuzhiyun int flags = 0;
5941*4882a593Smuzhiyun
5942*4882a593Smuzhiyun rcu_read_lock();
5943*4882a593Smuzhiyun
5944*4882a593Smuzhiyun dev = dev_get_by_index_rcu(net, iif);
5945*4882a593Smuzhiyun if (!dev) {
5946*4882a593Smuzhiyun rcu_read_unlock();
5947*4882a593Smuzhiyun err = -ENODEV;
5948*4882a593Smuzhiyun goto errout;
5949*4882a593Smuzhiyun }
5950*4882a593Smuzhiyun
5951*4882a593Smuzhiyun fl6.flowi6_iif = iif;
5952*4882a593Smuzhiyun
5953*4882a593Smuzhiyun if (!ipv6_addr_any(&fl6.saddr))
5954*4882a593Smuzhiyun flags |= RT6_LOOKUP_F_HAS_SADDR;
5955*4882a593Smuzhiyun
5956*4882a593Smuzhiyun dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5957*4882a593Smuzhiyun
5958*4882a593Smuzhiyun rcu_read_unlock();
5959*4882a593Smuzhiyun } else {
5960*4882a593Smuzhiyun fl6.flowi6_oif = oif;
5961*4882a593Smuzhiyun
5962*4882a593Smuzhiyun dst = ip6_route_output(net, NULL, &fl6);
5963*4882a593Smuzhiyun }
5964*4882a593Smuzhiyun
5965*4882a593Smuzhiyun
5966*4882a593Smuzhiyun rt = container_of(dst, struct rt6_info, dst);
5967*4882a593Smuzhiyun if (rt->dst.error) {
5968*4882a593Smuzhiyun err = rt->dst.error;
5969*4882a593Smuzhiyun ip6_rt_put(rt);
5970*4882a593Smuzhiyun goto errout;
5971*4882a593Smuzhiyun }
5972*4882a593Smuzhiyun
5973*4882a593Smuzhiyun if (rt == net->ipv6.ip6_null_entry) {
5974*4882a593Smuzhiyun err = rt->dst.error;
5975*4882a593Smuzhiyun ip6_rt_put(rt);
5976*4882a593Smuzhiyun goto errout;
5977*4882a593Smuzhiyun }
5978*4882a593Smuzhiyun
5979*4882a593Smuzhiyun skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5980*4882a593Smuzhiyun if (!skb) {
5981*4882a593Smuzhiyun ip6_rt_put(rt);
5982*4882a593Smuzhiyun err = -ENOBUFS;
5983*4882a593Smuzhiyun goto errout;
5984*4882a593Smuzhiyun }
5985*4882a593Smuzhiyun
5986*4882a593Smuzhiyun skb_dst_set(skb, &rt->dst);
5987*4882a593Smuzhiyun
5988*4882a593Smuzhiyun rcu_read_lock();
5989*4882a593Smuzhiyun from = rcu_dereference(rt->from);
5990*4882a593Smuzhiyun if (from) {
5991*4882a593Smuzhiyun if (fibmatch)
5992*4882a593Smuzhiyun err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5993*4882a593Smuzhiyun iif, RTM_NEWROUTE,
5994*4882a593Smuzhiyun NETLINK_CB(in_skb).portid,
5995*4882a593Smuzhiyun nlh->nlmsg_seq, 0);
5996*4882a593Smuzhiyun else
5997*4882a593Smuzhiyun err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5998*4882a593Smuzhiyun &fl6.saddr, iif, RTM_NEWROUTE,
5999*4882a593Smuzhiyun NETLINK_CB(in_skb).portid,
6000*4882a593Smuzhiyun nlh->nlmsg_seq, 0);
6001*4882a593Smuzhiyun } else {
6002*4882a593Smuzhiyun err = -ENETUNREACH;
6003*4882a593Smuzhiyun }
6004*4882a593Smuzhiyun rcu_read_unlock();
6005*4882a593Smuzhiyun
6006*4882a593Smuzhiyun if (err < 0) {
6007*4882a593Smuzhiyun kfree_skb(skb);
6008*4882a593Smuzhiyun goto errout;
6009*4882a593Smuzhiyun }
6010*4882a593Smuzhiyun
6011*4882a593Smuzhiyun err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
6012*4882a593Smuzhiyun errout:
6013*4882a593Smuzhiyun return err;
6014*4882a593Smuzhiyun }
6015*4882a593Smuzhiyun
inet6_rt_notify(int event,struct fib6_info * rt,struct nl_info * info,unsigned int nlm_flags)6016*4882a593Smuzhiyun void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
6017*4882a593Smuzhiyun unsigned int nlm_flags)
6018*4882a593Smuzhiyun {
6019*4882a593Smuzhiyun struct sk_buff *skb;
6020*4882a593Smuzhiyun struct net *net = info->nl_net;
6021*4882a593Smuzhiyun u32 seq;
6022*4882a593Smuzhiyun int err;
6023*4882a593Smuzhiyun
6024*4882a593Smuzhiyun err = -ENOBUFS;
6025*4882a593Smuzhiyun seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6026*4882a593Smuzhiyun
6027*4882a593Smuzhiyun skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6028*4882a593Smuzhiyun if (!skb)
6029*4882a593Smuzhiyun goto errout;
6030*4882a593Smuzhiyun
6031*4882a593Smuzhiyun err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6032*4882a593Smuzhiyun event, info->portid, seq, nlm_flags);
6033*4882a593Smuzhiyun if (err < 0) {
6034*4882a593Smuzhiyun /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6035*4882a593Smuzhiyun WARN_ON(err == -EMSGSIZE);
6036*4882a593Smuzhiyun kfree_skb(skb);
6037*4882a593Smuzhiyun goto errout;
6038*4882a593Smuzhiyun }
6039*4882a593Smuzhiyun rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6040*4882a593Smuzhiyun info->nlh, gfp_any());
6041*4882a593Smuzhiyun return;
6042*4882a593Smuzhiyun errout:
6043*4882a593Smuzhiyun if (err < 0)
6044*4882a593Smuzhiyun rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6045*4882a593Smuzhiyun }
6046*4882a593Smuzhiyun
fib6_rt_update(struct net * net,struct fib6_info * rt,struct nl_info * info)6047*4882a593Smuzhiyun void fib6_rt_update(struct net *net, struct fib6_info *rt,
6048*4882a593Smuzhiyun struct nl_info *info)
6049*4882a593Smuzhiyun {
6050*4882a593Smuzhiyun u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
6051*4882a593Smuzhiyun struct sk_buff *skb;
6052*4882a593Smuzhiyun int err = -ENOBUFS;
6053*4882a593Smuzhiyun
6054*4882a593Smuzhiyun /* call_fib6_entry_notifiers will be removed when in-kernel notifier
6055*4882a593Smuzhiyun * is implemented and supported for nexthop objects
6056*4882a593Smuzhiyun */
6057*4882a593Smuzhiyun call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
6058*4882a593Smuzhiyun
6059*4882a593Smuzhiyun skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
6060*4882a593Smuzhiyun if (!skb)
6061*4882a593Smuzhiyun goto errout;
6062*4882a593Smuzhiyun
6063*4882a593Smuzhiyun err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
6064*4882a593Smuzhiyun RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
6065*4882a593Smuzhiyun if (err < 0) {
6066*4882a593Smuzhiyun /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
6067*4882a593Smuzhiyun WARN_ON(err == -EMSGSIZE);
6068*4882a593Smuzhiyun kfree_skb(skb);
6069*4882a593Smuzhiyun goto errout;
6070*4882a593Smuzhiyun }
6071*4882a593Smuzhiyun rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
6072*4882a593Smuzhiyun info->nlh, gfp_any());
6073*4882a593Smuzhiyun return;
6074*4882a593Smuzhiyun errout:
6075*4882a593Smuzhiyun if (err < 0)
6076*4882a593Smuzhiyun rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
6077*4882a593Smuzhiyun }
6078*4882a593Smuzhiyun
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * ptr)6079*4882a593Smuzhiyun static int ip6_route_dev_notify(struct notifier_block *this,
6080*4882a593Smuzhiyun unsigned long event, void *ptr)
6081*4882a593Smuzhiyun {
6082*4882a593Smuzhiyun struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6083*4882a593Smuzhiyun struct net *net = dev_net(dev);
6084*4882a593Smuzhiyun
6085*4882a593Smuzhiyun if (!(dev->flags & IFF_LOOPBACK))
6086*4882a593Smuzhiyun return NOTIFY_OK;
6087*4882a593Smuzhiyun
6088*4882a593Smuzhiyun if (event == NETDEV_REGISTER) {
6089*4882a593Smuzhiyun net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
6090*4882a593Smuzhiyun net->ipv6.ip6_null_entry->dst.dev = dev;
6091*4882a593Smuzhiyun net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
6092*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6093*4882a593Smuzhiyun net->ipv6.ip6_prohibit_entry->dst.dev = dev;
6094*4882a593Smuzhiyun net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
6095*4882a593Smuzhiyun net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
6096*4882a593Smuzhiyun net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
6097*4882a593Smuzhiyun #endif
6098*4882a593Smuzhiyun } else if (event == NETDEV_UNREGISTER &&
6099*4882a593Smuzhiyun dev->reg_state != NETREG_UNREGISTERED) {
6100*4882a593Smuzhiyun /* NETDEV_UNREGISTER could be fired for multiple times by
6101*4882a593Smuzhiyun * netdev_wait_allrefs(). Make sure we only call this once.
6102*4882a593Smuzhiyun */
6103*4882a593Smuzhiyun in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
6104*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6105*4882a593Smuzhiyun in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
6106*4882a593Smuzhiyun in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
6107*4882a593Smuzhiyun #endif
6108*4882a593Smuzhiyun }
6109*4882a593Smuzhiyun
6110*4882a593Smuzhiyun return NOTIFY_OK;
6111*4882a593Smuzhiyun }
6112*4882a593Smuzhiyun
6113*4882a593Smuzhiyun /*
6114*4882a593Smuzhiyun * /proc
6115*4882a593Smuzhiyun */
6116*4882a593Smuzhiyun
6117*4882a593Smuzhiyun #ifdef CONFIG_PROC_FS
rt6_stats_seq_show(struct seq_file * seq,void * v)6118*4882a593Smuzhiyun static int rt6_stats_seq_show(struct seq_file *seq, void *v)
6119*4882a593Smuzhiyun {
6120*4882a593Smuzhiyun struct net *net = (struct net *)seq->private;
6121*4882a593Smuzhiyun seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
6122*4882a593Smuzhiyun net->ipv6.rt6_stats->fib_nodes,
6123*4882a593Smuzhiyun net->ipv6.rt6_stats->fib_route_nodes,
6124*4882a593Smuzhiyun atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
6125*4882a593Smuzhiyun net->ipv6.rt6_stats->fib_rt_entries,
6126*4882a593Smuzhiyun net->ipv6.rt6_stats->fib_rt_cache,
6127*4882a593Smuzhiyun dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
6128*4882a593Smuzhiyun net->ipv6.rt6_stats->fib_discarded_routes);
6129*4882a593Smuzhiyun
6130*4882a593Smuzhiyun return 0;
6131*4882a593Smuzhiyun }
6132*4882a593Smuzhiyun #endif /* CONFIG_PROC_FS */
6133*4882a593Smuzhiyun
6134*4882a593Smuzhiyun #ifdef CONFIG_SYSCTL
6135*4882a593Smuzhiyun
ipv6_sysctl_rtcache_flush(struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)6136*4882a593Smuzhiyun static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
6137*4882a593Smuzhiyun void *buffer, size_t *lenp, loff_t *ppos)
6138*4882a593Smuzhiyun {
6139*4882a593Smuzhiyun struct net *net;
6140*4882a593Smuzhiyun int delay;
6141*4882a593Smuzhiyun int ret;
6142*4882a593Smuzhiyun if (!write)
6143*4882a593Smuzhiyun return -EINVAL;
6144*4882a593Smuzhiyun
6145*4882a593Smuzhiyun net = (struct net *)ctl->extra1;
6146*4882a593Smuzhiyun delay = net->ipv6.sysctl.flush_delay;
6147*4882a593Smuzhiyun ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
6148*4882a593Smuzhiyun if (ret)
6149*4882a593Smuzhiyun return ret;
6150*4882a593Smuzhiyun
6151*4882a593Smuzhiyun fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
6152*4882a593Smuzhiyun return 0;
6153*4882a593Smuzhiyun }
6154*4882a593Smuzhiyun
6155*4882a593Smuzhiyun static struct ctl_table ipv6_route_table_template[] = {
6156*4882a593Smuzhiyun {
6157*4882a593Smuzhiyun .procname = "flush",
6158*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.flush_delay,
6159*4882a593Smuzhiyun .maxlen = sizeof(int),
6160*4882a593Smuzhiyun .mode = 0200,
6161*4882a593Smuzhiyun .proc_handler = ipv6_sysctl_rtcache_flush
6162*4882a593Smuzhiyun },
6163*4882a593Smuzhiyun {
6164*4882a593Smuzhiyun .procname = "gc_thresh",
6165*4882a593Smuzhiyun .data = &ip6_dst_ops_template.gc_thresh,
6166*4882a593Smuzhiyun .maxlen = sizeof(int),
6167*4882a593Smuzhiyun .mode = 0644,
6168*4882a593Smuzhiyun .proc_handler = proc_dointvec,
6169*4882a593Smuzhiyun },
6170*4882a593Smuzhiyun {
6171*4882a593Smuzhiyun .procname = "max_size",
6172*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
6173*4882a593Smuzhiyun .maxlen = sizeof(int),
6174*4882a593Smuzhiyun .mode = 0644,
6175*4882a593Smuzhiyun .proc_handler = proc_dointvec,
6176*4882a593Smuzhiyun },
6177*4882a593Smuzhiyun {
6178*4882a593Smuzhiyun .procname = "gc_min_interval",
6179*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6180*4882a593Smuzhiyun .maxlen = sizeof(int),
6181*4882a593Smuzhiyun .mode = 0644,
6182*4882a593Smuzhiyun .proc_handler = proc_dointvec_jiffies,
6183*4882a593Smuzhiyun },
6184*4882a593Smuzhiyun {
6185*4882a593Smuzhiyun .procname = "gc_timeout",
6186*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
6187*4882a593Smuzhiyun .maxlen = sizeof(int),
6188*4882a593Smuzhiyun .mode = 0644,
6189*4882a593Smuzhiyun .proc_handler = proc_dointvec_jiffies,
6190*4882a593Smuzhiyun },
6191*4882a593Smuzhiyun {
6192*4882a593Smuzhiyun .procname = "gc_interval",
6193*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
6194*4882a593Smuzhiyun .maxlen = sizeof(int),
6195*4882a593Smuzhiyun .mode = 0644,
6196*4882a593Smuzhiyun .proc_handler = proc_dointvec_jiffies,
6197*4882a593Smuzhiyun },
6198*4882a593Smuzhiyun {
6199*4882a593Smuzhiyun .procname = "gc_elasticity",
6200*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
6201*4882a593Smuzhiyun .maxlen = sizeof(int),
6202*4882a593Smuzhiyun .mode = 0644,
6203*4882a593Smuzhiyun .proc_handler = proc_dointvec,
6204*4882a593Smuzhiyun },
6205*4882a593Smuzhiyun {
6206*4882a593Smuzhiyun .procname = "mtu_expires",
6207*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
6208*4882a593Smuzhiyun .maxlen = sizeof(int),
6209*4882a593Smuzhiyun .mode = 0644,
6210*4882a593Smuzhiyun .proc_handler = proc_dointvec_jiffies,
6211*4882a593Smuzhiyun },
6212*4882a593Smuzhiyun {
6213*4882a593Smuzhiyun .procname = "min_adv_mss",
6214*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
6215*4882a593Smuzhiyun .maxlen = sizeof(int),
6216*4882a593Smuzhiyun .mode = 0644,
6217*4882a593Smuzhiyun .proc_handler = proc_dointvec,
6218*4882a593Smuzhiyun },
6219*4882a593Smuzhiyun {
6220*4882a593Smuzhiyun .procname = "gc_min_interval_ms",
6221*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
6222*4882a593Smuzhiyun .maxlen = sizeof(int),
6223*4882a593Smuzhiyun .mode = 0644,
6224*4882a593Smuzhiyun .proc_handler = proc_dointvec_ms_jiffies,
6225*4882a593Smuzhiyun },
6226*4882a593Smuzhiyun {
6227*4882a593Smuzhiyun .procname = "skip_notify_on_dev_down",
6228*4882a593Smuzhiyun .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
6229*4882a593Smuzhiyun .maxlen = sizeof(int),
6230*4882a593Smuzhiyun .mode = 0644,
6231*4882a593Smuzhiyun .proc_handler = proc_dointvec_minmax,
6232*4882a593Smuzhiyun .extra1 = SYSCTL_ZERO,
6233*4882a593Smuzhiyun .extra2 = SYSCTL_ONE,
6234*4882a593Smuzhiyun },
6235*4882a593Smuzhiyun { }
6236*4882a593Smuzhiyun };
6237*4882a593Smuzhiyun
ipv6_route_sysctl_init(struct net * net)6238*4882a593Smuzhiyun struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
6239*4882a593Smuzhiyun {
6240*4882a593Smuzhiyun struct ctl_table *table;
6241*4882a593Smuzhiyun
6242*4882a593Smuzhiyun table = kmemdup(ipv6_route_table_template,
6243*4882a593Smuzhiyun sizeof(ipv6_route_table_template),
6244*4882a593Smuzhiyun GFP_KERNEL);
6245*4882a593Smuzhiyun
6246*4882a593Smuzhiyun if (table) {
6247*4882a593Smuzhiyun table[0].data = &net->ipv6.sysctl.flush_delay;
6248*4882a593Smuzhiyun table[0].extra1 = net;
6249*4882a593Smuzhiyun table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
6250*4882a593Smuzhiyun table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
6251*4882a593Smuzhiyun table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6252*4882a593Smuzhiyun table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
6253*4882a593Smuzhiyun table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
6254*4882a593Smuzhiyun table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
6255*4882a593Smuzhiyun table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
6256*4882a593Smuzhiyun table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
6257*4882a593Smuzhiyun table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
6258*4882a593Smuzhiyun table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
6259*4882a593Smuzhiyun
6260*4882a593Smuzhiyun /* Don't export sysctls to unprivileged users */
6261*4882a593Smuzhiyun if (net->user_ns != &init_user_ns)
6262*4882a593Smuzhiyun table[0].procname = NULL;
6263*4882a593Smuzhiyun }
6264*4882a593Smuzhiyun
6265*4882a593Smuzhiyun return table;
6266*4882a593Smuzhiyun }
6267*4882a593Smuzhiyun #endif
6268*4882a593Smuzhiyun
ip6_route_net_init(struct net * net)6269*4882a593Smuzhiyun static int __net_init ip6_route_net_init(struct net *net)
6270*4882a593Smuzhiyun {
6271*4882a593Smuzhiyun int ret = -ENOMEM;
6272*4882a593Smuzhiyun
6273*4882a593Smuzhiyun memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
6274*4882a593Smuzhiyun sizeof(net->ipv6.ip6_dst_ops));
6275*4882a593Smuzhiyun
6276*4882a593Smuzhiyun if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
6277*4882a593Smuzhiyun goto out_ip6_dst_ops;
6278*4882a593Smuzhiyun
6279*4882a593Smuzhiyun net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
6280*4882a593Smuzhiyun if (!net->ipv6.fib6_null_entry)
6281*4882a593Smuzhiyun goto out_ip6_dst_entries;
6282*4882a593Smuzhiyun memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
6283*4882a593Smuzhiyun sizeof(*net->ipv6.fib6_null_entry));
6284*4882a593Smuzhiyun
6285*4882a593Smuzhiyun net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
6286*4882a593Smuzhiyun sizeof(*net->ipv6.ip6_null_entry),
6287*4882a593Smuzhiyun GFP_KERNEL);
6288*4882a593Smuzhiyun if (!net->ipv6.ip6_null_entry)
6289*4882a593Smuzhiyun goto out_fib6_null_entry;
6290*4882a593Smuzhiyun net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6291*4882a593Smuzhiyun dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
6292*4882a593Smuzhiyun ip6_template_metrics, true);
6293*4882a593Smuzhiyun INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
6294*4882a593Smuzhiyun
6295*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6296*4882a593Smuzhiyun net->ipv6.fib6_has_custom_rules = false;
6297*4882a593Smuzhiyun net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
6298*4882a593Smuzhiyun sizeof(*net->ipv6.ip6_prohibit_entry),
6299*4882a593Smuzhiyun GFP_KERNEL);
6300*4882a593Smuzhiyun if (!net->ipv6.ip6_prohibit_entry)
6301*4882a593Smuzhiyun goto out_ip6_null_entry;
6302*4882a593Smuzhiyun net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6303*4882a593Smuzhiyun dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
6304*4882a593Smuzhiyun ip6_template_metrics, true);
6305*4882a593Smuzhiyun INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
6306*4882a593Smuzhiyun
6307*4882a593Smuzhiyun net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
6308*4882a593Smuzhiyun sizeof(*net->ipv6.ip6_blk_hole_entry),
6309*4882a593Smuzhiyun GFP_KERNEL);
6310*4882a593Smuzhiyun if (!net->ipv6.ip6_blk_hole_entry)
6311*4882a593Smuzhiyun goto out_ip6_prohibit_entry;
6312*4882a593Smuzhiyun net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
6313*4882a593Smuzhiyun dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
6314*4882a593Smuzhiyun ip6_template_metrics, true);
6315*4882a593Smuzhiyun INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
6316*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
6317*4882a593Smuzhiyun net->ipv6.fib6_routes_require_src = 0;
6318*4882a593Smuzhiyun #endif
6319*4882a593Smuzhiyun #endif
6320*4882a593Smuzhiyun
6321*4882a593Smuzhiyun net->ipv6.sysctl.flush_delay = 0;
6322*4882a593Smuzhiyun net->ipv6.sysctl.ip6_rt_max_size = 4096;
6323*4882a593Smuzhiyun net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
6324*4882a593Smuzhiyun net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
6325*4882a593Smuzhiyun net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
6326*4882a593Smuzhiyun net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
6327*4882a593Smuzhiyun net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
6328*4882a593Smuzhiyun net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
6329*4882a593Smuzhiyun net->ipv6.sysctl.skip_notify_on_dev_down = 0;
6330*4882a593Smuzhiyun
6331*4882a593Smuzhiyun net->ipv6.ip6_rt_gc_expire = 30*HZ;
6332*4882a593Smuzhiyun
6333*4882a593Smuzhiyun ret = 0;
6334*4882a593Smuzhiyun out:
6335*4882a593Smuzhiyun return ret;
6336*4882a593Smuzhiyun
6337*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6338*4882a593Smuzhiyun out_ip6_prohibit_entry:
6339*4882a593Smuzhiyun kfree(net->ipv6.ip6_prohibit_entry);
6340*4882a593Smuzhiyun out_ip6_null_entry:
6341*4882a593Smuzhiyun kfree(net->ipv6.ip6_null_entry);
6342*4882a593Smuzhiyun #endif
6343*4882a593Smuzhiyun out_fib6_null_entry:
6344*4882a593Smuzhiyun kfree(net->ipv6.fib6_null_entry);
6345*4882a593Smuzhiyun out_ip6_dst_entries:
6346*4882a593Smuzhiyun dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6347*4882a593Smuzhiyun out_ip6_dst_ops:
6348*4882a593Smuzhiyun goto out;
6349*4882a593Smuzhiyun }
6350*4882a593Smuzhiyun
ip6_route_net_exit(struct net * net)6351*4882a593Smuzhiyun static void __net_exit ip6_route_net_exit(struct net *net)
6352*4882a593Smuzhiyun {
6353*4882a593Smuzhiyun kfree(net->ipv6.fib6_null_entry);
6354*4882a593Smuzhiyun kfree(net->ipv6.ip6_null_entry);
6355*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6356*4882a593Smuzhiyun kfree(net->ipv6.ip6_prohibit_entry);
6357*4882a593Smuzhiyun kfree(net->ipv6.ip6_blk_hole_entry);
6358*4882a593Smuzhiyun #endif
6359*4882a593Smuzhiyun dst_entries_destroy(&net->ipv6.ip6_dst_ops);
6360*4882a593Smuzhiyun }
6361*4882a593Smuzhiyun
ip6_route_net_init_late(struct net * net)6362*4882a593Smuzhiyun static int __net_init ip6_route_net_init_late(struct net *net)
6363*4882a593Smuzhiyun {
6364*4882a593Smuzhiyun #ifdef CONFIG_PROC_FS
6365*4882a593Smuzhiyun if (!proc_create_net("ipv6_route", 0, net->proc_net,
6366*4882a593Smuzhiyun &ipv6_route_seq_ops,
6367*4882a593Smuzhiyun sizeof(struct ipv6_route_iter)))
6368*4882a593Smuzhiyun return -ENOMEM;
6369*4882a593Smuzhiyun
6370*4882a593Smuzhiyun if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
6371*4882a593Smuzhiyun rt6_stats_seq_show, NULL)) {
6372*4882a593Smuzhiyun remove_proc_entry("ipv6_route", net->proc_net);
6373*4882a593Smuzhiyun return -ENOMEM;
6374*4882a593Smuzhiyun }
6375*4882a593Smuzhiyun #endif
6376*4882a593Smuzhiyun return 0;
6377*4882a593Smuzhiyun }
6378*4882a593Smuzhiyun
ip6_route_net_exit_late(struct net * net)6379*4882a593Smuzhiyun static void __net_exit ip6_route_net_exit_late(struct net *net)
6380*4882a593Smuzhiyun {
6381*4882a593Smuzhiyun #ifdef CONFIG_PROC_FS
6382*4882a593Smuzhiyun remove_proc_entry("ipv6_route", net->proc_net);
6383*4882a593Smuzhiyun remove_proc_entry("rt6_stats", net->proc_net);
6384*4882a593Smuzhiyun #endif
6385*4882a593Smuzhiyun }
6386*4882a593Smuzhiyun
6387*4882a593Smuzhiyun static struct pernet_operations ip6_route_net_ops = {
6388*4882a593Smuzhiyun .init = ip6_route_net_init,
6389*4882a593Smuzhiyun .exit = ip6_route_net_exit,
6390*4882a593Smuzhiyun };
6391*4882a593Smuzhiyun
ipv6_inetpeer_init(struct net * net)6392*4882a593Smuzhiyun static int __net_init ipv6_inetpeer_init(struct net *net)
6393*4882a593Smuzhiyun {
6394*4882a593Smuzhiyun struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
6395*4882a593Smuzhiyun
6396*4882a593Smuzhiyun if (!bp)
6397*4882a593Smuzhiyun return -ENOMEM;
6398*4882a593Smuzhiyun inet_peer_base_init(bp);
6399*4882a593Smuzhiyun net->ipv6.peers = bp;
6400*4882a593Smuzhiyun return 0;
6401*4882a593Smuzhiyun }
6402*4882a593Smuzhiyun
ipv6_inetpeer_exit(struct net * net)6403*4882a593Smuzhiyun static void __net_exit ipv6_inetpeer_exit(struct net *net)
6404*4882a593Smuzhiyun {
6405*4882a593Smuzhiyun struct inet_peer_base *bp = net->ipv6.peers;
6406*4882a593Smuzhiyun
6407*4882a593Smuzhiyun net->ipv6.peers = NULL;
6408*4882a593Smuzhiyun inetpeer_invalidate_tree(bp);
6409*4882a593Smuzhiyun kfree(bp);
6410*4882a593Smuzhiyun }
6411*4882a593Smuzhiyun
6412*4882a593Smuzhiyun static struct pernet_operations ipv6_inetpeer_ops = {
6413*4882a593Smuzhiyun .init = ipv6_inetpeer_init,
6414*4882a593Smuzhiyun .exit = ipv6_inetpeer_exit,
6415*4882a593Smuzhiyun };
6416*4882a593Smuzhiyun
6417*4882a593Smuzhiyun static struct pernet_operations ip6_route_net_late_ops = {
6418*4882a593Smuzhiyun .init = ip6_route_net_init_late,
6419*4882a593Smuzhiyun .exit = ip6_route_net_exit_late,
6420*4882a593Smuzhiyun };
6421*4882a593Smuzhiyun
6422*4882a593Smuzhiyun static struct notifier_block ip6_route_dev_notifier = {
6423*4882a593Smuzhiyun .notifier_call = ip6_route_dev_notify,
6424*4882a593Smuzhiyun .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
6425*4882a593Smuzhiyun };
6426*4882a593Smuzhiyun
ip6_route_init_special_entries(void)6427*4882a593Smuzhiyun void __init ip6_route_init_special_entries(void)
6428*4882a593Smuzhiyun {
6429*4882a593Smuzhiyun /* Registering of the loopback is done before this portion of code,
6430*4882a593Smuzhiyun * the loopback reference in rt6_info will not be taken, do it
6431*4882a593Smuzhiyun * manually for init_net */
6432*4882a593Smuzhiyun init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
6433*4882a593Smuzhiyun init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
6434*4882a593Smuzhiyun init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6435*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
6436*4882a593Smuzhiyun init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
6437*4882a593Smuzhiyun init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6438*4882a593Smuzhiyun init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
6439*4882a593Smuzhiyun init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
6440*4882a593Smuzhiyun #endif
6441*4882a593Smuzhiyun }
6442*4882a593Smuzhiyun
6443*4882a593Smuzhiyun #if IS_BUILTIN(CONFIG_IPV6)
6444*4882a593Smuzhiyun #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6445*4882a593Smuzhiyun DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
6446*4882a593Smuzhiyun
6447*4882a593Smuzhiyun BTF_ID_LIST(btf_fib6_info_id)
6448*4882a593Smuzhiyun BTF_ID(struct, fib6_info)
6449*4882a593Smuzhiyun
6450*4882a593Smuzhiyun static const struct bpf_iter_seq_info ipv6_route_seq_info = {
6451*4882a593Smuzhiyun .seq_ops = &ipv6_route_seq_ops,
6452*4882a593Smuzhiyun .init_seq_private = bpf_iter_init_seq_net,
6453*4882a593Smuzhiyun .fini_seq_private = bpf_iter_fini_seq_net,
6454*4882a593Smuzhiyun .seq_priv_size = sizeof(struct ipv6_route_iter),
6455*4882a593Smuzhiyun };
6456*4882a593Smuzhiyun
6457*4882a593Smuzhiyun static struct bpf_iter_reg ipv6_route_reg_info = {
6458*4882a593Smuzhiyun .target = "ipv6_route",
6459*4882a593Smuzhiyun .ctx_arg_info_size = 1,
6460*4882a593Smuzhiyun .ctx_arg_info = {
6461*4882a593Smuzhiyun { offsetof(struct bpf_iter__ipv6_route, rt),
6462*4882a593Smuzhiyun PTR_TO_BTF_ID_OR_NULL },
6463*4882a593Smuzhiyun },
6464*4882a593Smuzhiyun .seq_info = &ipv6_route_seq_info,
6465*4882a593Smuzhiyun };
6466*4882a593Smuzhiyun
bpf_iter_register(void)6467*4882a593Smuzhiyun static int __init bpf_iter_register(void)
6468*4882a593Smuzhiyun {
6469*4882a593Smuzhiyun ipv6_route_reg_info.ctx_arg_info[0].btf_id = *btf_fib6_info_id;
6470*4882a593Smuzhiyun return bpf_iter_reg_target(&ipv6_route_reg_info);
6471*4882a593Smuzhiyun }
6472*4882a593Smuzhiyun
bpf_iter_unregister(void)6473*4882a593Smuzhiyun static void bpf_iter_unregister(void)
6474*4882a593Smuzhiyun {
6475*4882a593Smuzhiyun bpf_iter_unreg_target(&ipv6_route_reg_info);
6476*4882a593Smuzhiyun }
6477*4882a593Smuzhiyun #endif
6478*4882a593Smuzhiyun #endif
6479*4882a593Smuzhiyun
ip6_route_init(void)6480*4882a593Smuzhiyun int __init ip6_route_init(void)
6481*4882a593Smuzhiyun {
6482*4882a593Smuzhiyun int ret;
6483*4882a593Smuzhiyun int cpu;
6484*4882a593Smuzhiyun
6485*4882a593Smuzhiyun ret = -ENOMEM;
6486*4882a593Smuzhiyun ip6_dst_ops_template.kmem_cachep =
6487*4882a593Smuzhiyun kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
6488*4882a593Smuzhiyun SLAB_HWCACHE_ALIGN, NULL);
6489*4882a593Smuzhiyun if (!ip6_dst_ops_template.kmem_cachep)
6490*4882a593Smuzhiyun goto out;
6491*4882a593Smuzhiyun
6492*4882a593Smuzhiyun ret = dst_entries_init(&ip6_dst_blackhole_ops);
6493*4882a593Smuzhiyun if (ret)
6494*4882a593Smuzhiyun goto out_kmem_cache;
6495*4882a593Smuzhiyun
6496*4882a593Smuzhiyun ret = register_pernet_subsys(&ipv6_inetpeer_ops);
6497*4882a593Smuzhiyun if (ret)
6498*4882a593Smuzhiyun goto out_dst_entries;
6499*4882a593Smuzhiyun
6500*4882a593Smuzhiyun ret = register_pernet_subsys(&ip6_route_net_ops);
6501*4882a593Smuzhiyun if (ret)
6502*4882a593Smuzhiyun goto out_register_inetpeer;
6503*4882a593Smuzhiyun
6504*4882a593Smuzhiyun ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
6505*4882a593Smuzhiyun
6506*4882a593Smuzhiyun ret = fib6_init();
6507*4882a593Smuzhiyun if (ret)
6508*4882a593Smuzhiyun goto out_register_subsys;
6509*4882a593Smuzhiyun
6510*4882a593Smuzhiyun ret = xfrm6_init();
6511*4882a593Smuzhiyun if (ret)
6512*4882a593Smuzhiyun goto out_fib6_init;
6513*4882a593Smuzhiyun
6514*4882a593Smuzhiyun ret = fib6_rules_init();
6515*4882a593Smuzhiyun if (ret)
6516*4882a593Smuzhiyun goto xfrm6_init;
6517*4882a593Smuzhiyun
6518*4882a593Smuzhiyun ret = register_pernet_subsys(&ip6_route_net_late_ops);
6519*4882a593Smuzhiyun if (ret)
6520*4882a593Smuzhiyun goto fib6_rules_init;
6521*4882a593Smuzhiyun
6522*4882a593Smuzhiyun ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
6523*4882a593Smuzhiyun inet6_rtm_newroute, NULL, 0);
6524*4882a593Smuzhiyun if (ret < 0)
6525*4882a593Smuzhiyun goto out_register_late_subsys;
6526*4882a593Smuzhiyun
6527*4882a593Smuzhiyun ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
6528*4882a593Smuzhiyun inet6_rtm_delroute, NULL, 0);
6529*4882a593Smuzhiyun if (ret < 0)
6530*4882a593Smuzhiyun goto out_register_late_subsys;
6531*4882a593Smuzhiyun
6532*4882a593Smuzhiyun ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
6533*4882a593Smuzhiyun inet6_rtm_getroute, NULL,
6534*4882a593Smuzhiyun RTNL_FLAG_DOIT_UNLOCKED);
6535*4882a593Smuzhiyun if (ret < 0)
6536*4882a593Smuzhiyun goto out_register_late_subsys;
6537*4882a593Smuzhiyun
6538*4882a593Smuzhiyun ret = register_netdevice_notifier(&ip6_route_dev_notifier);
6539*4882a593Smuzhiyun if (ret)
6540*4882a593Smuzhiyun goto out_register_late_subsys;
6541*4882a593Smuzhiyun
6542*4882a593Smuzhiyun #if IS_BUILTIN(CONFIG_IPV6)
6543*4882a593Smuzhiyun #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6544*4882a593Smuzhiyun ret = bpf_iter_register();
6545*4882a593Smuzhiyun if (ret)
6546*4882a593Smuzhiyun goto out_register_late_subsys;
6547*4882a593Smuzhiyun #endif
6548*4882a593Smuzhiyun #endif
6549*4882a593Smuzhiyun
6550*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
6551*4882a593Smuzhiyun struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
6552*4882a593Smuzhiyun
6553*4882a593Smuzhiyun INIT_LIST_HEAD(&ul->head);
6554*4882a593Smuzhiyun spin_lock_init(&ul->lock);
6555*4882a593Smuzhiyun }
6556*4882a593Smuzhiyun
6557*4882a593Smuzhiyun out:
6558*4882a593Smuzhiyun return ret;
6559*4882a593Smuzhiyun
6560*4882a593Smuzhiyun out_register_late_subsys:
6561*4882a593Smuzhiyun rtnl_unregister_all(PF_INET6);
6562*4882a593Smuzhiyun unregister_pernet_subsys(&ip6_route_net_late_ops);
6563*4882a593Smuzhiyun fib6_rules_init:
6564*4882a593Smuzhiyun fib6_rules_cleanup();
6565*4882a593Smuzhiyun xfrm6_init:
6566*4882a593Smuzhiyun xfrm6_fini();
6567*4882a593Smuzhiyun out_fib6_init:
6568*4882a593Smuzhiyun fib6_gc_cleanup();
6569*4882a593Smuzhiyun out_register_subsys:
6570*4882a593Smuzhiyun unregister_pernet_subsys(&ip6_route_net_ops);
6571*4882a593Smuzhiyun out_register_inetpeer:
6572*4882a593Smuzhiyun unregister_pernet_subsys(&ipv6_inetpeer_ops);
6573*4882a593Smuzhiyun out_dst_entries:
6574*4882a593Smuzhiyun dst_entries_destroy(&ip6_dst_blackhole_ops);
6575*4882a593Smuzhiyun out_kmem_cache:
6576*4882a593Smuzhiyun kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6577*4882a593Smuzhiyun goto out;
6578*4882a593Smuzhiyun }
6579*4882a593Smuzhiyun
ip6_route_cleanup(void)6580*4882a593Smuzhiyun void ip6_route_cleanup(void)
6581*4882a593Smuzhiyun {
6582*4882a593Smuzhiyun #if IS_BUILTIN(CONFIG_IPV6)
6583*4882a593Smuzhiyun #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
6584*4882a593Smuzhiyun bpf_iter_unregister();
6585*4882a593Smuzhiyun #endif
6586*4882a593Smuzhiyun #endif
6587*4882a593Smuzhiyun unregister_netdevice_notifier(&ip6_route_dev_notifier);
6588*4882a593Smuzhiyun unregister_pernet_subsys(&ip6_route_net_late_ops);
6589*4882a593Smuzhiyun fib6_rules_cleanup();
6590*4882a593Smuzhiyun xfrm6_fini();
6591*4882a593Smuzhiyun fib6_gc_cleanup();
6592*4882a593Smuzhiyun unregister_pernet_subsys(&ipv6_inetpeer_ops);
6593*4882a593Smuzhiyun unregister_pernet_subsys(&ip6_route_net_ops);
6594*4882a593Smuzhiyun dst_entries_destroy(&ip6_dst_blackhole_ops);
6595*4882a593Smuzhiyun kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
6596*4882a593Smuzhiyun }
6597