xref: /OK3568_Linux_fs/kernel/net/ipv6/ip6_fib.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *	Linux INET6 implementation
4*4882a593Smuzhiyun  *	Forwarding Information Database
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  *	Authors:
7*4882a593Smuzhiyun  *	Pedro Roque		<roque@di.fc.ul.pt>
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  *	Changes:
10*4882a593Smuzhiyun  *	Yuji SEKIYA @USAGI:	Support default route on router node;
11*4882a593Smuzhiyun  *				remove ip6_null_entry from the top of
12*4882a593Smuzhiyun  *				routing table.
13*4882a593Smuzhiyun  *	Ville Nuorvala:		Fixed routing subtrees.
14*4882a593Smuzhiyun  */
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun #define pr_fmt(fmt) "IPv6: " fmt
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun #include <linux/errno.h>
19*4882a593Smuzhiyun #include <linux/types.h>
20*4882a593Smuzhiyun #include <linux/net.h>
21*4882a593Smuzhiyun #include <linux/route.h>
22*4882a593Smuzhiyun #include <linux/netdevice.h>
23*4882a593Smuzhiyun #include <linux/in6.h>
24*4882a593Smuzhiyun #include <linux/init.h>
25*4882a593Smuzhiyun #include <linux/list.h>
26*4882a593Smuzhiyun #include <linux/slab.h>
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun #include <net/ip.h>
29*4882a593Smuzhiyun #include <net/ipv6.h>
30*4882a593Smuzhiyun #include <net/ndisc.h>
31*4882a593Smuzhiyun #include <net/addrconf.h>
32*4882a593Smuzhiyun #include <net/lwtunnel.h>
33*4882a593Smuzhiyun #include <net/fib_notifier.h>
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun #include <net/ip6_fib.h>
36*4882a593Smuzhiyun #include <net/ip6_route.h>
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun static struct kmem_cache *fib6_node_kmem __read_mostly;
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun struct fib6_cleaner {
41*4882a593Smuzhiyun 	struct fib6_walker w;
42*4882a593Smuzhiyun 	struct net *net;
43*4882a593Smuzhiyun 	int (*func)(struct fib6_info *, void *arg);
44*4882a593Smuzhiyun 	int sernum;
45*4882a593Smuzhiyun 	void *arg;
46*4882a593Smuzhiyun 	bool skip_notify;
47*4882a593Smuzhiyun };
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
50*4882a593Smuzhiyun #define FWS_INIT FWS_S
51*4882a593Smuzhiyun #else
52*4882a593Smuzhiyun #define FWS_INIT FWS_L
53*4882a593Smuzhiyun #endif
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun static struct fib6_info *fib6_find_prefix(struct net *net,
56*4882a593Smuzhiyun 					 struct fib6_table *table,
57*4882a593Smuzhiyun 					 struct fib6_node *fn);
58*4882a593Smuzhiyun static struct fib6_node *fib6_repair_tree(struct net *net,
59*4882a593Smuzhiyun 					  struct fib6_table *table,
60*4882a593Smuzhiyun 					  struct fib6_node *fn);
61*4882a593Smuzhiyun static int fib6_walk(struct net *net, struct fib6_walker *w);
62*4882a593Smuzhiyun static int fib6_walk_continue(struct fib6_walker *w);
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun /*
65*4882a593Smuzhiyun  *	A routing update causes an increase of the serial number on the
66*4882a593Smuzhiyun  *	affected subtree. This allows for cached routes to be asynchronously
67*4882a593Smuzhiyun  *	tested when modifications are made to the destination cache as a
68*4882a593Smuzhiyun  *	result of redirects, path MTU changes, etc.
69*4882a593Smuzhiyun  */
70*4882a593Smuzhiyun 
71*4882a593Smuzhiyun static void fib6_gc_timer_cb(struct timer_list *t);
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun #define FOR_WALKERS(net, w) \
74*4882a593Smuzhiyun 	list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
75*4882a593Smuzhiyun 
fib6_walker_link(struct net * net,struct fib6_walker * w)76*4882a593Smuzhiyun static void fib6_walker_link(struct net *net, struct fib6_walker *w)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun 	write_lock_bh(&net->ipv6.fib6_walker_lock);
79*4882a593Smuzhiyun 	list_add(&w->lh, &net->ipv6.fib6_walkers);
80*4882a593Smuzhiyun 	write_unlock_bh(&net->ipv6.fib6_walker_lock);
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun 
fib6_walker_unlink(struct net * net,struct fib6_walker * w)83*4882a593Smuzhiyun static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun 	write_lock_bh(&net->ipv6.fib6_walker_lock);
86*4882a593Smuzhiyun 	list_del(&w->lh);
87*4882a593Smuzhiyun 	write_unlock_bh(&net->ipv6.fib6_walker_lock);
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun 
fib6_new_sernum(struct net * net)90*4882a593Smuzhiyun static int fib6_new_sernum(struct net *net)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun 	int new, old;
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	do {
95*4882a593Smuzhiyun 		old = atomic_read(&net->ipv6.fib6_sernum);
96*4882a593Smuzhiyun 		new = old < INT_MAX ? old + 1 : 1;
97*4882a593Smuzhiyun 	} while (atomic_cmpxchg(&net->ipv6.fib6_sernum,
98*4882a593Smuzhiyun 				old, new) != old);
99*4882a593Smuzhiyun 	return new;
100*4882a593Smuzhiyun }
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun enum {
103*4882a593Smuzhiyun 	FIB6_NO_SERNUM_CHANGE = 0,
104*4882a593Smuzhiyun };
105*4882a593Smuzhiyun 
fib6_update_sernum(struct net * net,struct fib6_info * f6i)106*4882a593Smuzhiyun void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	struct fib6_node *fn;
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	fn = rcu_dereference_protected(f6i->fib6_node,
111*4882a593Smuzhiyun 			lockdep_is_held(&f6i->fib6_table->tb6_lock));
112*4882a593Smuzhiyun 	if (fn)
113*4882a593Smuzhiyun 		WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun /*
117*4882a593Smuzhiyun  *	Auxiliary address test functions for the radix tree.
118*4882a593Smuzhiyun  *
119*4882a593Smuzhiyun  *	These assume a 32bit processor (although it will work on
120*4882a593Smuzhiyun  *	64bit processors)
121*4882a593Smuzhiyun  */
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun /*
124*4882a593Smuzhiyun  *	test bit
125*4882a593Smuzhiyun  */
126*4882a593Smuzhiyun #if defined(__LITTLE_ENDIAN)
127*4882a593Smuzhiyun # define BITOP_BE32_SWIZZLE	(0x1F & ~7)
128*4882a593Smuzhiyun #else
129*4882a593Smuzhiyun # define BITOP_BE32_SWIZZLE	0
130*4882a593Smuzhiyun #endif
131*4882a593Smuzhiyun 
addr_bit_set(const void * token,int fn_bit)132*4882a593Smuzhiyun static __be32 addr_bit_set(const void *token, int fn_bit)
133*4882a593Smuzhiyun {
134*4882a593Smuzhiyun 	const __be32 *addr = token;
135*4882a593Smuzhiyun 	/*
136*4882a593Smuzhiyun 	 * Here,
137*4882a593Smuzhiyun 	 *	1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
138*4882a593Smuzhiyun 	 * is optimized version of
139*4882a593Smuzhiyun 	 *	htonl(1 << ((~fn_bit)&0x1F))
140*4882a593Smuzhiyun 	 * See include/asm-generic/bitops/le.h.
141*4882a593Smuzhiyun 	 */
142*4882a593Smuzhiyun 	return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
143*4882a593Smuzhiyun 	       addr[fn_bit >> 5];
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun 
fib6_info_alloc(gfp_t gfp_flags,bool with_fib6_nh)146*4882a593Smuzhiyun struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun 	struct fib6_info *f6i;
149*4882a593Smuzhiyun 	size_t sz = sizeof(*f6i);
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	if (with_fib6_nh)
152*4882a593Smuzhiyun 		sz += sizeof(struct fib6_nh);
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	f6i = kzalloc(sz, gfp_flags);
155*4882a593Smuzhiyun 	if (!f6i)
156*4882a593Smuzhiyun 		return NULL;
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	/* fib6_siblings is a union with nh_list, so this initializes both */
159*4882a593Smuzhiyun 	INIT_LIST_HEAD(&f6i->fib6_siblings);
160*4882a593Smuzhiyun 	refcount_set(&f6i->fib6_ref, 1);
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	return f6i;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun 
fib6_info_destroy_rcu(struct rcu_head * head)165*4882a593Smuzhiyun void fib6_info_destroy_rcu(struct rcu_head *head)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun 	struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	WARN_ON(f6i->fib6_node);
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	if (f6i->nh)
172*4882a593Smuzhiyun 		nexthop_put(f6i->nh);
173*4882a593Smuzhiyun 	else
174*4882a593Smuzhiyun 		fib6_nh_release(f6i->fib6_nh);
175*4882a593Smuzhiyun 
176*4882a593Smuzhiyun 	ip_fib_metrics_put(f6i->fib6_metrics);
177*4882a593Smuzhiyun 	kfree(f6i);
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
180*4882a593Smuzhiyun 
node_alloc(struct net * net)181*4882a593Smuzhiyun static struct fib6_node *node_alloc(struct net *net)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun 	struct fib6_node *fn;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
186*4882a593Smuzhiyun 	if (fn)
187*4882a593Smuzhiyun 		net->ipv6.rt6_stats->fib_nodes++;
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun 	return fn;
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun 
node_free_immediate(struct net * net,struct fib6_node * fn)192*4882a593Smuzhiyun static void node_free_immediate(struct net *net, struct fib6_node *fn)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun 	kmem_cache_free(fib6_node_kmem, fn);
195*4882a593Smuzhiyun 	net->ipv6.rt6_stats->fib_nodes--;
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun 
node_free_rcu(struct rcu_head * head)198*4882a593Smuzhiyun static void node_free_rcu(struct rcu_head *head)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun 	struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	kmem_cache_free(fib6_node_kmem, fn);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
node_free(struct net * net,struct fib6_node * fn)205*4882a593Smuzhiyun static void node_free(struct net *net, struct fib6_node *fn)
206*4882a593Smuzhiyun {
207*4882a593Smuzhiyun 	call_rcu(&fn->rcu, node_free_rcu);
208*4882a593Smuzhiyun 	net->ipv6.rt6_stats->fib_nodes--;
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun 
fib6_free_table(struct fib6_table * table)211*4882a593Smuzhiyun static void fib6_free_table(struct fib6_table *table)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun 	inetpeer_invalidate_tree(&table->tb6_peers);
214*4882a593Smuzhiyun 	kfree(table);
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun 
fib6_link_table(struct net * net,struct fib6_table * tb)217*4882a593Smuzhiyun static void fib6_link_table(struct net *net, struct fib6_table *tb)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun 	unsigned int h;
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	/*
222*4882a593Smuzhiyun 	 * Initialize table lock at a single place to give lockdep a key,
223*4882a593Smuzhiyun 	 * tables aren't visible prior to being linked to the list.
224*4882a593Smuzhiyun 	 */
225*4882a593Smuzhiyun 	spin_lock_init(&tb->tb6_lock);
226*4882a593Smuzhiyun 	h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	/*
229*4882a593Smuzhiyun 	 * No protection necessary, this is the only list mutatation
230*4882a593Smuzhiyun 	 * operation, tables never disappear once they exist.
231*4882a593Smuzhiyun 	 */
232*4882a593Smuzhiyun 	hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
236*4882a593Smuzhiyun 
fib6_alloc_table(struct net * net,u32 id)237*4882a593Smuzhiyun static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
238*4882a593Smuzhiyun {
239*4882a593Smuzhiyun 	struct fib6_table *table;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	table = kzalloc(sizeof(*table), GFP_ATOMIC);
242*4882a593Smuzhiyun 	if (table) {
243*4882a593Smuzhiyun 		table->tb6_id = id;
244*4882a593Smuzhiyun 		rcu_assign_pointer(table->tb6_root.leaf,
245*4882a593Smuzhiyun 				   net->ipv6.fib6_null_entry);
246*4882a593Smuzhiyun 		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
247*4882a593Smuzhiyun 		inet_peer_base_init(&table->tb6_peers);
248*4882a593Smuzhiyun 	}
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	return table;
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun 
fib6_new_table(struct net * net,u32 id)253*4882a593Smuzhiyun struct fib6_table *fib6_new_table(struct net *net, u32 id)
254*4882a593Smuzhiyun {
255*4882a593Smuzhiyun 	struct fib6_table *tb;
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	if (id == 0)
258*4882a593Smuzhiyun 		id = RT6_TABLE_MAIN;
259*4882a593Smuzhiyun 	tb = fib6_get_table(net, id);
260*4882a593Smuzhiyun 	if (tb)
261*4882a593Smuzhiyun 		return tb;
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 	tb = fib6_alloc_table(net, id);
264*4882a593Smuzhiyun 	if (tb)
265*4882a593Smuzhiyun 		fib6_link_table(net, tb);
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun 	return tb;
268*4882a593Smuzhiyun }
269*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(fib6_new_table);
270*4882a593Smuzhiyun 
fib6_get_table(struct net * net,u32 id)271*4882a593Smuzhiyun struct fib6_table *fib6_get_table(struct net *net, u32 id)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun 	struct fib6_table *tb;
274*4882a593Smuzhiyun 	struct hlist_head *head;
275*4882a593Smuzhiyun 	unsigned int h;
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	if (id == 0)
278*4882a593Smuzhiyun 		id = RT6_TABLE_MAIN;
279*4882a593Smuzhiyun 	h = id & (FIB6_TABLE_HASHSZ - 1);
280*4882a593Smuzhiyun 	rcu_read_lock();
281*4882a593Smuzhiyun 	head = &net->ipv6.fib_table_hash[h];
282*4882a593Smuzhiyun 	hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
283*4882a593Smuzhiyun 		if (tb->tb6_id == id) {
284*4882a593Smuzhiyun 			rcu_read_unlock();
285*4882a593Smuzhiyun 			return tb;
286*4882a593Smuzhiyun 		}
287*4882a593Smuzhiyun 	}
288*4882a593Smuzhiyun 	rcu_read_unlock();
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 	return NULL;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(fib6_get_table);
293*4882a593Smuzhiyun 
fib6_tables_init(struct net * net)294*4882a593Smuzhiyun static void __net_init fib6_tables_init(struct net *net)
295*4882a593Smuzhiyun {
296*4882a593Smuzhiyun 	fib6_link_table(net, net->ipv6.fib6_main_tbl);
297*4882a593Smuzhiyun 	fib6_link_table(net, net->ipv6.fib6_local_tbl);
298*4882a593Smuzhiyun }
299*4882a593Smuzhiyun #else
300*4882a593Smuzhiyun 
fib6_new_table(struct net * net,u32 id)301*4882a593Smuzhiyun struct fib6_table *fib6_new_table(struct net *net, u32 id)
302*4882a593Smuzhiyun {
303*4882a593Smuzhiyun 	return fib6_get_table(net, id);
304*4882a593Smuzhiyun }
305*4882a593Smuzhiyun 
fib6_get_table(struct net * net,u32 id)306*4882a593Smuzhiyun struct fib6_table *fib6_get_table(struct net *net, u32 id)
307*4882a593Smuzhiyun {
308*4882a593Smuzhiyun 	  return net->ipv6.fib6_main_tbl;
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun 
fib6_rule_lookup(struct net * net,struct flowi6 * fl6,const struct sk_buff * skb,int flags,pol_lookup_t lookup)311*4882a593Smuzhiyun struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
312*4882a593Smuzhiyun 				   const struct sk_buff *skb,
313*4882a593Smuzhiyun 				   int flags, pol_lookup_t lookup)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun 	struct rt6_info *rt;
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	rt = pol_lookup_func(lookup,
318*4882a593Smuzhiyun 			net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
319*4882a593Smuzhiyun 	if (rt->dst.error == -EAGAIN) {
320*4882a593Smuzhiyun 		ip6_rt_put_flags(rt, flags);
321*4882a593Smuzhiyun 		rt = net->ipv6.ip6_null_entry;
322*4882a593Smuzhiyun 		if (!(flags & RT6_LOOKUP_F_DST_NOREF))
323*4882a593Smuzhiyun 			dst_hold(&rt->dst);
324*4882a593Smuzhiyun 	}
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	return &rt->dst;
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun /* called with rcu lock held; no reference taken on fib6_info */
fib6_lookup(struct net * net,int oif,struct flowi6 * fl6,struct fib6_result * res,int flags)330*4882a593Smuzhiyun int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
331*4882a593Smuzhiyun 		struct fib6_result *res, int flags)
332*4882a593Smuzhiyun {
333*4882a593Smuzhiyun 	return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
334*4882a593Smuzhiyun 				 res, flags);
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun 
fib6_tables_init(struct net * net)337*4882a593Smuzhiyun static void __net_init fib6_tables_init(struct net *net)
338*4882a593Smuzhiyun {
339*4882a593Smuzhiyun 	fib6_link_table(net, net->ipv6.fib6_main_tbl);
340*4882a593Smuzhiyun }
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun #endif
343*4882a593Smuzhiyun 
fib6_tables_seq_read(struct net * net)344*4882a593Smuzhiyun unsigned int fib6_tables_seq_read(struct net *net)
345*4882a593Smuzhiyun {
346*4882a593Smuzhiyun 	unsigned int h, fib_seq = 0;
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	rcu_read_lock();
349*4882a593Smuzhiyun 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
350*4882a593Smuzhiyun 		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
351*4882a593Smuzhiyun 		struct fib6_table *tb;
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 		hlist_for_each_entry_rcu(tb, head, tb6_hlist)
354*4882a593Smuzhiyun 			fib_seq += tb->fib_seq;
355*4882a593Smuzhiyun 	}
356*4882a593Smuzhiyun 	rcu_read_unlock();
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	return fib_seq;
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun 
call_fib6_entry_notifier(struct notifier_block * nb,enum fib_event_type event_type,struct fib6_info * rt,struct netlink_ext_ack * extack)361*4882a593Smuzhiyun static int call_fib6_entry_notifier(struct notifier_block *nb,
362*4882a593Smuzhiyun 				    enum fib_event_type event_type,
363*4882a593Smuzhiyun 				    struct fib6_info *rt,
364*4882a593Smuzhiyun 				    struct netlink_ext_ack *extack)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun 	struct fib6_entry_notifier_info info = {
367*4882a593Smuzhiyun 		.info.extack = extack,
368*4882a593Smuzhiyun 		.rt = rt,
369*4882a593Smuzhiyun 	};
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	return call_fib6_notifier(nb, event_type, &info.info);
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun 
call_fib6_multipath_entry_notifier(struct notifier_block * nb,enum fib_event_type event_type,struct fib6_info * rt,unsigned int nsiblings,struct netlink_ext_ack * extack)374*4882a593Smuzhiyun static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
375*4882a593Smuzhiyun 					      enum fib_event_type event_type,
376*4882a593Smuzhiyun 					      struct fib6_info *rt,
377*4882a593Smuzhiyun 					      unsigned int nsiblings,
378*4882a593Smuzhiyun 					      struct netlink_ext_ack *extack)
379*4882a593Smuzhiyun {
380*4882a593Smuzhiyun 	struct fib6_entry_notifier_info info = {
381*4882a593Smuzhiyun 		.info.extack = extack,
382*4882a593Smuzhiyun 		.rt = rt,
383*4882a593Smuzhiyun 		.nsiblings = nsiblings,
384*4882a593Smuzhiyun 	};
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 	return call_fib6_notifier(nb, event_type, &info.info);
387*4882a593Smuzhiyun }
388*4882a593Smuzhiyun 
call_fib6_entry_notifiers(struct net * net,enum fib_event_type event_type,struct fib6_info * rt,struct netlink_ext_ack * extack)389*4882a593Smuzhiyun int call_fib6_entry_notifiers(struct net *net,
390*4882a593Smuzhiyun 			      enum fib_event_type event_type,
391*4882a593Smuzhiyun 			      struct fib6_info *rt,
392*4882a593Smuzhiyun 			      struct netlink_ext_ack *extack)
393*4882a593Smuzhiyun {
394*4882a593Smuzhiyun 	struct fib6_entry_notifier_info info = {
395*4882a593Smuzhiyun 		.info.extack = extack,
396*4882a593Smuzhiyun 		.rt = rt,
397*4882a593Smuzhiyun 	};
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	rt->fib6_table->fib_seq++;
400*4882a593Smuzhiyun 	return call_fib6_notifiers(net, event_type, &info.info);
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun 
call_fib6_multipath_entry_notifiers(struct net * net,enum fib_event_type event_type,struct fib6_info * rt,unsigned int nsiblings,struct netlink_ext_ack * extack)403*4882a593Smuzhiyun int call_fib6_multipath_entry_notifiers(struct net *net,
404*4882a593Smuzhiyun 					enum fib_event_type event_type,
405*4882a593Smuzhiyun 					struct fib6_info *rt,
406*4882a593Smuzhiyun 					unsigned int nsiblings,
407*4882a593Smuzhiyun 					struct netlink_ext_ack *extack)
408*4882a593Smuzhiyun {
409*4882a593Smuzhiyun 	struct fib6_entry_notifier_info info = {
410*4882a593Smuzhiyun 		.info.extack = extack,
411*4882a593Smuzhiyun 		.rt = rt,
412*4882a593Smuzhiyun 		.nsiblings = nsiblings,
413*4882a593Smuzhiyun 	};
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 	rt->fib6_table->fib_seq++;
416*4882a593Smuzhiyun 	return call_fib6_notifiers(net, event_type, &info.info);
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun 
call_fib6_entry_notifiers_replace(struct net * net,struct fib6_info * rt)419*4882a593Smuzhiyun int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
420*4882a593Smuzhiyun {
421*4882a593Smuzhiyun 	struct fib6_entry_notifier_info info = {
422*4882a593Smuzhiyun 		.rt = rt,
423*4882a593Smuzhiyun 		.nsiblings = rt->fib6_nsiblings,
424*4882a593Smuzhiyun 	};
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 	rt->fib6_table->fib_seq++;
427*4882a593Smuzhiyun 	return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun struct fib6_dump_arg {
431*4882a593Smuzhiyun 	struct net *net;
432*4882a593Smuzhiyun 	struct notifier_block *nb;
433*4882a593Smuzhiyun 	struct netlink_ext_ack *extack;
434*4882a593Smuzhiyun };
435*4882a593Smuzhiyun 
fib6_rt_dump(struct fib6_info * rt,struct fib6_dump_arg * arg)436*4882a593Smuzhiyun static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
437*4882a593Smuzhiyun {
438*4882a593Smuzhiyun 	enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
439*4882a593Smuzhiyun 	int err;
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun 	if (!rt || rt == arg->net->ipv6.fib6_null_entry)
442*4882a593Smuzhiyun 		return 0;
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun 	if (rt->fib6_nsiblings)
445*4882a593Smuzhiyun 		err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
446*4882a593Smuzhiyun 							 rt,
447*4882a593Smuzhiyun 							 rt->fib6_nsiblings,
448*4882a593Smuzhiyun 							 arg->extack);
449*4882a593Smuzhiyun 	else
450*4882a593Smuzhiyun 		err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
451*4882a593Smuzhiyun 					       arg->extack);
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun 	return err;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun 
fib6_node_dump(struct fib6_walker * w)456*4882a593Smuzhiyun static int fib6_node_dump(struct fib6_walker *w)
457*4882a593Smuzhiyun {
458*4882a593Smuzhiyun 	int err;
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	err = fib6_rt_dump(w->leaf, w->args);
461*4882a593Smuzhiyun 	w->leaf = NULL;
462*4882a593Smuzhiyun 	return err;
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun 
fib6_table_dump(struct net * net,struct fib6_table * tb,struct fib6_walker * w)465*4882a593Smuzhiyun static int fib6_table_dump(struct net *net, struct fib6_table *tb,
466*4882a593Smuzhiyun 			   struct fib6_walker *w)
467*4882a593Smuzhiyun {
468*4882a593Smuzhiyun 	int err;
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	w->root = &tb->tb6_root;
471*4882a593Smuzhiyun 	spin_lock_bh(&tb->tb6_lock);
472*4882a593Smuzhiyun 	err = fib6_walk(net, w);
473*4882a593Smuzhiyun 	spin_unlock_bh(&tb->tb6_lock);
474*4882a593Smuzhiyun 	return err;
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun /* Called with rcu_read_lock() */
fib6_tables_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)478*4882a593Smuzhiyun int fib6_tables_dump(struct net *net, struct notifier_block *nb,
479*4882a593Smuzhiyun 		     struct netlink_ext_ack *extack)
480*4882a593Smuzhiyun {
481*4882a593Smuzhiyun 	struct fib6_dump_arg arg;
482*4882a593Smuzhiyun 	struct fib6_walker *w;
483*4882a593Smuzhiyun 	unsigned int h;
484*4882a593Smuzhiyun 	int err = 0;
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	w = kzalloc(sizeof(*w), GFP_ATOMIC);
487*4882a593Smuzhiyun 	if (!w)
488*4882a593Smuzhiyun 		return -ENOMEM;
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	w->func = fib6_node_dump;
491*4882a593Smuzhiyun 	arg.net = net;
492*4882a593Smuzhiyun 	arg.nb = nb;
493*4882a593Smuzhiyun 	arg.extack = extack;
494*4882a593Smuzhiyun 	w->args = &arg;
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
497*4882a593Smuzhiyun 		struct hlist_head *head = &net->ipv6.fib_table_hash[h];
498*4882a593Smuzhiyun 		struct fib6_table *tb;
499*4882a593Smuzhiyun 
500*4882a593Smuzhiyun 		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
501*4882a593Smuzhiyun 			err = fib6_table_dump(net, tb, w);
502*4882a593Smuzhiyun 			if (err < 0)
503*4882a593Smuzhiyun 				goto out;
504*4882a593Smuzhiyun 		}
505*4882a593Smuzhiyun 	}
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun out:
508*4882a593Smuzhiyun 	kfree(w);
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	return err;
511*4882a593Smuzhiyun }
512*4882a593Smuzhiyun 
fib6_dump_node(struct fib6_walker * w)513*4882a593Smuzhiyun static int fib6_dump_node(struct fib6_walker *w)
514*4882a593Smuzhiyun {
515*4882a593Smuzhiyun 	int res;
516*4882a593Smuzhiyun 	struct fib6_info *rt;
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	for_each_fib6_walker_rt(w) {
519*4882a593Smuzhiyun 		res = rt6_dump_route(rt, w->args, w->skip_in_node);
520*4882a593Smuzhiyun 		if (res >= 0) {
521*4882a593Smuzhiyun 			/* Frame is full, suspend walking */
522*4882a593Smuzhiyun 			w->leaf = rt;
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 			/* We'll restart from this node, so if some routes were
525*4882a593Smuzhiyun 			 * already dumped, skip them next time.
526*4882a593Smuzhiyun 			 */
527*4882a593Smuzhiyun 			w->skip_in_node += res;
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 			return 1;
530*4882a593Smuzhiyun 		}
531*4882a593Smuzhiyun 		w->skip_in_node = 0;
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 		/* Multipath routes are dumped in one route with the
534*4882a593Smuzhiyun 		 * RTA_MULTIPATH attribute. Jump 'rt' to point to the
535*4882a593Smuzhiyun 		 * last sibling of this route (no need to dump the
536*4882a593Smuzhiyun 		 * sibling routes again)
537*4882a593Smuzhiyun 		 */
538*4882a593Smuzhiyun 		if (rt->fib6_nsiblings)
539*4882a593Smuzhiyun 			rt = list_last_entry(&rt->fib6_siblings,
540*4882a593Smuzhiyun 					     struct fib6_info,
541*4882a593Smuzhiyun 					     fib6_siblings);
542*4882a593Smuzhiyun 	}
543*4882a593Smuzhiyun 	w->leaf = NULL;
544*4882a593Smuzhiyun 	return 0;
545*4882a593Smuzhiyun }
546*4882a593Smuzhiyun 
fib6_dump_end(struct netlink_callback * cb)547*4882a593Smuzhiyun static void fib6_dump_end(struct netlink_callback *cb)
548*4882a593Smuzhiyun {
549*4882a593Smuzhiyun 	struct net *net = sock_net(cb->skb->sk);
550*4882a593Smuzhiyun 	struct fib6_walker *w = (void *)cb->args[2];
551*4882a593Smuzhiyun 
552*4882a593Smuzhiyun 	if (w) {
553*4882a593Smuzhiyun 		if (cb->args[4]) {
554*4882a593Smuzhiyun 			cb->args[4] = 0;
555*4882a593Smuzhiyun 			fib6_walker_unlink(net, w);
556*4882a593Smuzhiyun 		}
557*4882a593Smuzhiyun 		cb->args[2] = 0;
558*4882a593Smuzhiyun 		kfree(w);
559*4882a593Smuzhiyun 	}
560*4882a593Smuzhiyun 	cb->done = (void *)cb->args[3];
561*4882a593Smuzhiyun 	cb->args[1] = 3;
562*4882a593Smuzhiyun }
563*4882a593Smuzhiyun 
fib6_dump_done(struct netlink_callback * cb)564*4882a593Smuzhiyun static int fib6_dump_done(struct netlink_callback *cb)
565*4882a593Smuzhiyun {
566*4882a593Smuzhiyun 	fib6_dump_end(cb);
567*4882a593Smuzhiyun 	return cb->done ? cb->done(cb) : 0;
568*4882a593Smuzhiyun }
569*4882a593Smuzhiyun 
fib6_dump_table(struct fib6_table * table,struct sk_buff * skb,struct netlink_callback * cb)570*4882a593Smuzhiyun static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
571*4882a593Smuzhiyun 			   struct netlink_callback *cb)
572*4882a593Smuzhiyun {
573*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
574*4882a593Smuzhiyun 	struct fib6_walker *w;
575*4882a593Smuzhiyun 	int res;
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 	w = (void *)cb->args[2];
578*4882a593Smuzhiyun 	w->root = &table->tb6_root;
579*4882a593Smuzhiyun 
580*4882a593Smuzhiyun 	if (cb->args[4] == 0) {
581*4882a593Smuzhiyun 		w->count = 0;
582*4882a593Smuzhiyun 		w->skip = 0;
583*4882a593Smuzhiyun 		w->skip_in_node = 0;
584*4882a593Smuzhiyun 
585*4882a593Smuzhiyun 		spin_lock_bh(&table->tb6_lock);
586*4882a593Smuzhiyun 		res = fib6_walk(net, w);
587*4882a593Smuzhiyun 		spin_unlock_bh(&table->tb6_lock);
588*4882a593Smuzhiyun 		if (res > 0) {
589*4882a593Smuzhiyun 			cb->args[4] = 1;
590*4882a593Smuzhiyun 			cb->args[5] = READ_ONCE(w->root->fn_sernum);
591*4882a593Smuzhiyun 		}
592*4882a593Smuzhiyun 	} else {
593*4882a593Smuzhiyun 		int sernum = READ_ONCE(w->root->fn_sernum);
594*4882a593Smuzhiyun 		if (cb->args[5] != sernum) {
595*4882a593Smuzhiyun 			/* Begin at the root if the tree changed */
596*4882a593Smuzhiyun 			cb->args[5] = sernum;
597*4882a593Smuzhiyun 			w->state = FWS_INIT;
598*4882a593Smuzhiyun 			w->node = w->root;
599*4882a593Smuzhiyun 			w->skip = w->count;
600*4882a593Smuzhiyun 			w->skip_in_node = 0;
601*4882a593Smuzhiyun 		} else
602*4882a593Smuzhiyun 			w->skip = 0;
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 		spin_lock_bh(&table->tb6_lock);
605*4882a593Smuzhiyun 		res = fib6_walk_continue(w);
606*4882a593Smuzhiyun 		spin_unlock_bh(&table->tb6_lock);
607*4882a593Smuzhiyun 		if (res <= 0) {
608*4882a593Smuzhiyun 			fib6_walker_unlink(net, w);
609*4882a593Smuzhiyun 			cb->args[4] = 0;
610*4882a593Smuzhiyun 		}
611*4882a593Smuzhiyun 	}
612*4882a593Smuzhiyun 
613*4882a593Smuzhiyun 	return res;
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun 
inet6_dump_fib(struct sk_buff * skb,struct netlink_callback * cb)616*4882a593Smuzhiyun static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
617*4882a593Smuzhiyun {
618*4882a593Smuzhiyun 	struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
619*4882a593Smuzhiyun 					 .filter.dump_routes = true };
620*4882a593Smuzhiyun 	const struct nlmsghdr *nlh = cb->nlh;
621*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
622*4882a593Smuzhiyun 	unsigned int h, s_h;
623*4882a593Smuzhiyun 	unsigned int e = 0, s_e;
624*4882a593Smuzhiyun 	struct fib6_walker *w;
625*4882a593Smuzhiyun 	struct fib6_table *tb;
626*4882a593Smuzhiyun 	struct hlist_head *head;
627*4882a593Smuzhiyun 	int res = 0;
628*4882a593Smuzhiyun 
629*4882a593Smuzhiyun 	if (cb->strict_check) {
630*4882a593Smuzhiyun 		int err;
631*4882a593Smuzhiyun 
632*4882a593Smuzhiyun 		err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
633*4882a593Smuzhiyun 		if (err < 0)
634*4882a593Smuzhiyun 			return err;
635*4882a593Smuzhiyun 	} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
636*4882a593Smuzhiyun 		struct rtmsg *rtm = nlmsg_data(nlh);
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun 		if (rtm->rtm_flags & RTM_F_PREFIX)
639*4882a593Smuzhiyun 			arg.filter.flags = RTM_F_PREFIX;
640*4882a593Smuzhiyun 	}
641*4882a593Smuzhiyun 
642*4882a593Smuzhiyun 	w = (void *)cb->args[2];
643*4882a593Smuzhiyun 	if (!w) {
644*4882a593Smuzhiyun 		/* New dump:
645*4882a593Smuzhiyun 		 *
646*4882a593Smuzhiyun 		 * 1. hook callback destructor.
647*4882a593Smuzhiyun 		 */
648*4882a593Smuzhiyun 		cb->args[3] = (long)cb->done;
649*4882a593Smuzhiyun 		cb->done = fib6_dump_done;
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 		/*
652*4882a593Smuzhiyun 		 * 2. allocate and initialize walker.
653*4882a593Smuzhiyun 		 */
654*4882a593Smuzhiyun 		w = kzalloc(sizeof(*w), GFP_ATOMIC);
655*4882a593Smuzhiyun 		if (!w)
656*4882a593Smuzhiyun 			return -ENOMEM;
657*4882a593Smuzhiyun 		w->func = fib6_dump_node;
658*4882a593Smuzhiyun 		cb->args[2] = (long)w;
659*4882a593Smuzhiyun 	}
660*4882a593Smuzhiyun 
661*4882a593Smuzhiyun 	arg.skb = skb;
662*4882a593Smuzhiyun 	arg.cb = cb;
663*4882a593Smuzhiyun 	arg.net = net;
664*4882a593Smuzhiyun 	w->args = &arg;
665*4882a593Smuzhiyun 
666*4882a593Smuzhiyun 	if (arg.filter.table_id) {
667*4882a593Smuzhiyun 		tb = fib6_get_table(net, arg.filter.table_id);
668*4882a593Smuzhiyun 		if (!tb) {
669*4882a593Smuzhiyun 			if (rtnl_msg_family(cb->nlh) != PF_INET6)
670*4882a593Smuzhiyun 				goto out;
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun 			NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
673*4882a593Smuzhiyun 			return -ENOENT;
674*4882a593Smuzhiyun 		}
675*4882a593Smuzhiyun 
676*4882a593Smuzhiyun 		if (!cb->args[0]) {
677*4882a593Smuzhiyun 			res = fib6_dump_table(tb, skb, cb);
678*4882a593Smuzhiyun 			if (!res)
679*4882a593Smuzhiyun 				cb->args[0] = 1;
680*4882a593Smuzhiyun 		}
681*4882a593Smuzhiyun 		goto out;
682*4882a593Smuzhiyun 	}
683*4882a593Smuzhiyun 
684*4882a593Smuzhiyun 	s_h = cb->args[0];
685*4882a593Smuzhiyun 	s_e = cb->args[1];
686*4882a593Smuzhiyun 
687*4882a593Smuzhiyun 	rcu_read_lock();
688*4882a593Smuzhiyun 	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
689*4882a593Smuzhiyun 		e = 0;
690*4882a593Smuzhiyun 		head = &net->ipv6.fib_table_hash[h];
691*4882a593Smuzhiyun 		hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
692*4882a593Smuzhiyun 			if (e < s_e)
693*4882a593Smuzhiyun 				goto next;
694*4882a593Smuzhiyun 			res = fib6_dump_table(tb, skb, cb);
695*4882a593Smuzhiyun 			if (res != 0)
696*4882a593Smuzhiyun 				goto out_unlock;
697*4882a593Smuzhiyun next:
698*4882a593Smuzhiyun 			e++;
699*4882a593Smuzhiyun 		}
700*4882a593Smuzhiyun 	}
701*4882a593Smuzhiyun out_unlock:
702*4882a593Smuzhiyun 	rcu_read_unlock();
703*4882a593Smuzhiyun 	cb->args[1] = e;
704*4882a593Smuzhiyun 	cb->args[0] = h;
705*4882a593Smuzhiyun out:
706*4882a593Smuzhiyun 	res = res < 0 ? res : skb->len;
707*4882a593Smuzhiyun 	if (res <= 0)
708*4882a593Smuzhiyun 		fib6_dump_end(cb);
709*4882a593Smuzhiyun 	return res;
710*4882a593Smuzhiyun }
711*4882a593Smuzhiyun 
fib6_metric_set(struct fib6_info * f6i,int metric,u32 val)712*4882a593Smuzhiyun void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
713*4882a593Smuzhiyun {
714*4882a593Smuzhiyun 	if (!f6i)
715*4882a593Smuzhiyun 		return;
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun 	if (f6i->fib6_metrics == &dst_default_metrics) {
718*4882a593Smuzhiyun 		struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
719*4882a593Smuzhiyun 
720*4882a593Smuzhiyun 		if (!p)
721*4882a593Smuzhiyun 			return;
722*4882a593Smuzhiyun 
723*4882a593Smuzhiyun 		refcount_set(&p->refcnt, 1);
724*4882a593Smuzhiyun 		f6i->fib6_metrics = p;
725*4882a593Smuzhiyun 	}
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 	f6i->fib6_metrics->metrics[metric - 1] = val;
728*4882a593Smuzhiyun }
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun /*
731*4882a593Smuzhiyun  *	Routing Table
732*4882a593Smuzhiyun  *
733*4882a593Smuzhiyun  *	return the appropriate node for a routing tree "add" operation
734*4882a593Smuzhiyun  *	by either creating and inserting or by returning an existing
735*4882a593Smuzhiyun  *	node.
736*4882a593Smuzhiyun  */
737*4882a593Smuzhiyun 
fib6_add_1(struct net * net,struct fib6_table * table,struct fib6_node * root,struct in6_addr * addr,int plen,int offset,int allow_create,int replace_required,struct netlink_ext_ack * extack)738*4882a593Smuzhiyun static struct fib6_node *fib6_add_1(struct net *net,
739*4882a593Smuzhiyun 				    struct fib6_table *table,
740*4882a593Smuzhiyun 				    struct fib6_node *root,
741*4882a593Smuzhiyun 				    struct in6_addr *addr, int plen,
742*4882a593Smuzhiyun 				    int offset, int allow_create,
743*4882a593Smuzhiyun 				    int replace_required,
744*4882a593Smuzhiyun 				    struct netlink_ext_ack *extack)
745*4882a593Smuzhiyun {
746*4882a593Smuzhiyun 	struct fib6_node *fn, *in, *ln;
747*4882a593Smuzhiyun 	struct fib6_node *pn = NULL;
748*4882a593Smuzhiyun 	struct rt6key *key;
749*4882a593Smuzhiyun 	int	bit;
750*4882a593Smuzhiyun 	__be32	dir = 0;
751*4882a593Smuzhiyun 
752*4882a593Smuzhiyun 	RT6_TRACE("fib6_add_1\n");
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	/* insert node in tree */
755*4882a593Smuzhiyun 
756*4882a593Smuzhiyun 	fn = root;
757*4882a593Smuzhiyun 
758*4882a593Smuzhiyun 	do {
759*4882a593Smuzhiyun 		struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
760*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
761*4882a593Smuzhiyun 		key = (struct rt6key *)((u8 *)leaf + offset);
762*4882a593Smuzhiyun 
763*4882a593Smuzhiyun 		/*
764*4882a593Smuzhiyun 		 *	Prefix match
765*4882a593Smuzhiyun 		 */
766*4882a593Smuzhiyun 		if (plen < fn->fn_bit ||
767*4882a593Smuzhiyun 		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
768*4882a593Smuzhiyun 			if (!allow_create) {
769*4882a593Smuzhiyun 				if (replace_required) {
770*4882a593Smuzhiyun 					NL_SET_ERR_MSG(extack,
771*4882a593Smuzhiyun 						       "Can not replace route - no match found");
772*4882a593Smuzhiyun 					pr_warn("Can't replace route, no match found\n");
773*4882a593Smuzhiyun 					return ERR_PTR(-ENOENT);
774*4882a593Smuzhiyun 				}
775*4882a593Smuzhiyun 				pr_warn("NLM_F_CREATE should be set when creating new route\n");
776*4882a593Smuzhiyun 			}
777*4882a593Smuzhiyun 			goto insert_above;
778*4882a593Smuzhiyun 		}
779*4882a593Smuzhiyun 
780*4882a593Smuzhiyun 		/*
781*4882a593Smuzhiyun 		 *	Exact match ?
782*4882a593Smuzhiyun 		 */
783*4882a593Smuzhiyun 
784*4882a593Smuzhiyun 		if (plen == fn->fn_bit) {
785*4882a593Smuzhiyun 			/* clean up an intermediate node */
786*4882a593Smuzhiyun 			if (!(fn->fn_flags & RTN_RTINFO)) {
787*4882a593Smuzhiyun 				RCU_INIT_POINTER(fn->leaf, NULL);
788*4882a593Smuzhiyun 				fib6_info_release(leaf);
789*4882a593Smuzhiyun 			/* remove null_entry in the root node */
790*4882a593Smuzhiyun 			} else if (fn->fn_flags & RTN_TL_ROOT &&
791*4882a593Smuzhiyun 				   rcu_access_pointer(fn->leaf) ==
792*4882a593Smuzhiyun 				   net->ipv6.fib6_null_entry) {
793*4882a593Smuzhiyun 				RCU_INIT_POINTER(fn->leaf, NULL);
794*4882a593Smuzhiyun 			}
795*4882a593Smuzhiyun 
796*4882a593Smuzhiyun 			return fn;
797*4882a593Smuzhiyun 		}
798*4882a593Smuzhiyun 
799*4882a593Smuzhiyun 		/*
800*4882a593Smuzhiyun 		 *	We have more bits to go
801*4882a593Smuzhiyun 		 */
802*4882a593Smuzhiyun 
803*4882a593Smuzhiyun 		/* Try to walk down on tree. */
804*4882a593Smuzhiyun 		dir = addr_bit_set(addr, fn->fn_bit);
805*4882a593Smuzhiyun 		pn = fn;
806*4882a593Smuzhiyun 		fn = dir ?
807*4882a593Smuzhiyun 		     rcu_dereference_protected(fn->right,
808*4882a593Smuzhiyun 					lockdep_is_held(&table->tb6_lock)) :
809*4882a593Smuzhiyun 		     rcu_dereference_protected(fn->left,
810*4882a593Smuzhiyun 					lockdep_is_held(&table->tb6_lock));
811*4882a593Smuzhiyun 	} while (fn);
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun 	if (!allow_create) {
814*4882a593Smuzhiyun 		/* We should not create new node because
815*4882a593Smuzhiyun 		 * NLM_F_REPLACE was specified without NLM_F_CREATE
816*4882a593Smuzhiyun 		 * I assume it is safe to require NLM_F_CREATE when
817*4882a593Smuzhiyun 		 * REPLACE flag is used! Later we may want to remove the
818*4882a593Smuzhiyun 		 * check for replace_required, because according
819*4882a593Smuzhiyun 		 * to netlink specification, NLM_F_CREATE
820*4882a593Smuzhiyun 		 * MUST be specified if new route is created.
821*4882a593Smuzhiyun 		 * That would keep IPv6 consistent with IPv4
822*4882a593Smuzhiyun 		 */
823*4882a593Smuzhiyun 		if (replace_required) {
824*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack,
825*4882a593Smuzhiyun 				       "Can not replace route - no match found");
826*4882a593Smuzhiyun 			pr_warn("Can't replace route, no match found\n");
827*4882a593Smuzhiyun 			return ERR_PTR(-ENOENT);
828*4882a593Smuzhiyun 		}
829*4882a593Smuzhiyun 		pr_warn("NLM_F_CREATE should be set when creating new route\n");
830*4882a593Smuzhiyun 	}
831*4882a593Smuzhiyun 	/*
832*4882a593Smuzhiyun 	 *	We walked to the bottom of tree.
833*4882a593Smuzhiyun 	 *	Create new leaf node without children.
834*4882a593Smuzhiyun 	 */
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	ln = node_alloc(net);
837*4882a593Smuzhiyun 
838*4882a593Smuzhiyun 	if (!ln)
839*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
840*4882a593Smuzhiyun 	ln->fn_bit = plen;
841*4882a593Smuzhiyun 	RCU_INIT_POINTER(ln->parent, pn);
842*4882a593Smuzhiyun 
843*4882a593Smuzhiyun 	if (dir)
844*4882a593Smuzhiyun 		rcu_assign_pointer(pn->right, ln);
845*4882a593Smuzhiyun 	else
846*4882a593Smuzhiyun 		rcu_assign_pointer(pn->left, ln);
847*4882a593Smuzhiyun 
848*4882a593Smuzhiyun 	return ln;
849*4882a593Smuzhiyun 
850*4882a593Smuzhiyun 
851*4882a593Smuzhiyun insert_above:
852*4882a593Smuzhiyun 	/*
853*4882a593Smuzhiyun 	 * split since we don't have a common prefix anymore or
854*4882a593Smuzhiyun 	 * we have a less significant route.
855*4882a593Smuzhiyun 	 * we've to insert an intermediate node on the list
856*4882a593Smuzhiyun 	 * this new node will point to the one we need to create
857*4882a593Smuzhiyun 	 * and the current
858*4882a593Smuzhiyun 	 */
859*4882a593Smuzhiyun 
860*4882a593Smuzhiyun 	pn = rcu_dereference_protected(fn->parent,
861*4882a593Smuzhiyun 				       lockdep_is_held(&table->tb6_lock));
862*4882a593Smuzhiyun 
863*4882a593Smuzhiyun 	/* find 1st bit in difference between the 2 addrs.
864*4882a593Smuzhiyun 
865*4882a593Smuzhiyun 	   See comment in __ipv6_addr_diff: bit may be an invalid value,
866*4882a593Smuzhiyun 	   but if it is >= plen, the value is ignored in any case.
867*4882a593Smuzhiyun 	 */
868*4882a593Smuzhiyun 
869*4882a593Smuzhiyun 	bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
870*4882a593Smuzhiyun 
871*4882a593Smuzhiyun 	/*
872*4882a593Smuzhiyun 	 *		(intermediate)[in]
873*4882a593Smuzhiyun 	 *	          /	   \
874*4882a593Smuzhiyun 	 *	(new leaf node)[ln] (old node)[fn]
875*4882a593Smuzhiyun 	 */
876*4882a593Smuzhiyun 	if (plen > bit) {
877*4882a593Smuzhiyun 		in = node_alloc(net);
878*4882a593Smuzhiyun 		ln = node_alloc(net);
879*4882a593Smuzhiyun 
880*4882a593Smuzhiyun 		if (!in || !ln) {
881*4882a593Smuzhiyun 			if (in)
882*4882a593Smuzhiyun 				node_free_immediate(net, in);
883*4882a593Smuzhiyun 			if (ln)
884*4882a593Smuzhiyun 				node_free_immediate(net, ln);
885*4882a593Smuzhiyun 			return ERR_PTR(-ENOMEM);
886*4882a593Smuzhiyun 		}
887*4882a593Smuzhiyun 
888*4882a593Smuzhiyun 		/*
889*4882a593Smuzhiyun 		 * new intermediate node.
890*4882a593Smuzhiyun 		 * RTN_RTINFO will
891*4882a593Smuzhiyun 		 * be off since that an address that chooses one of
892*4882a593Smuzhiyun 		 * the branches would not match less specific routes
893*4882a593Smuzhiyun 		 * in the other branch
894*4882a593Smuzhiyun 		 */
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 		in->fn_bit = bit;
897*4882a593Smuzhiyun 
898*4882a593Smuzhiyun 		RCU_INIT_POINTER(in->parent, pn);
899*4882a593Smuzhiyun 		in->leaf = fn->leaf;
900*4882a593Smuzhiyun 		fib6_info_hold(rcu_dereference_protected(in->leaf,
901*4882a593Smuzhiyun 				lockdep_is_held(&table->tb6_lock)));
902*4882a593Smuzhiyun 
903*4882a593Smuzhiyun 		/* update parent pointer */
904*4882a593Smuzhiyun 		if (dir)
905*4882a593Smuzhiyun 			rcu_assign_pointer(pn->right, in);
906*4882a593Smuzhiyun 		else
907*4882a593Smuzhiyun 			rcu_assign_pointer(pn->left, in);
908*4882a593Smuzhiyun 
909*4882a593Smuzhiyun 		ln->fn_bit = plen;
910*4882a593Smuzhiyun 
911*4882a593Smuzhiyun 		RCU_INIT_POINTER(ln->parent, in);
912*4882a593Smuzhiyun 		rcu_assign_pointer(fn->parent, in);
913*4882a593Smuzhiyun 
914*4882a593Smuzhiyun 		if (addr_bit_set(addr, bit)) {
915*4882a593Smuzhiyun 			rcu_assign_pointer(in->right, ln);
916*4882a593Smuzhiyun 			rcu_assign_pointer(in->left, fn);
917*4882a593Smuzhiyun 		} else {
918*4882a593Smuzhiyun 			rcu_assign_pointer(in->left, ln);
919*4882a593Smuzhiyun 			rcu_assign_pointer(in->right, fn);
920*4882a593Smuzhiyun 		}
921*4882a593Smuzhiyun 	} else { /* plen <= bit */
922*4882a593Smuzhiyun 
923*4882a593Smuzhiyun 		/*
924*4882a593Smuzhiyun 		 *		(new leaf node)[ln]
925*4882a593Smuzhiyun 		 *	          /	   \
926*4882a593Smuzhiyun 		 *	     (old node)[fn] NULL
927*4882a593Smuzhiyun 		 */
928*4882a593Smuzhiyun 
929*4882a593Smuzhiyun 		ln = node_alloc(net);
930*4882a593Smuzhiyun 
931*4882a593Smuzhiyun 		if (!ln)
932*4882a593Smuzhiyun 			return ERR_PTR(-ENOMEM);
933*4882a593Smuzhiyun 
934*4882a593Smuzhiyun 		ln->fn_bit = plen;
935*4882a593Smuzhiyun 
936*4882a593Smuzhiyun 		RCU_INIT_POINTER(ln->parent, pn);
937*4882a593Smuzhiyun 
938*4882a593Smuzhiyun 		if (addr_bit_set(&key->addr, plen))
939*4882a593Smuzhiyun 			RCU_INIT_POINTER(ln->right, fn);
940*4882a593Smuzhiyun 		else
941*4882a593Smuzhiyun 			RCU_INIT_POINTER(ln->left, fn);
942*4882a593Smuzhiyun 
943*4882a593Smuzhiyun 		rcu_assign_pointer(fn->parent, ln);
944*4882a593Smuzhiyun 
945*4882a593Smuzhiyun 		if (dir)
946*4882a593Smuzhiyun 			rcu_assign_pointer(pn->right, ln);
947*4882a593Smuzhiyun 		else
948*4882a593Smuzhiyun 			rcu_assign_pointer(pn->left, ln);
949*4882a593Smuzhiyun 	}
950*4882a593Smuzhiyun 	return ln;
951*4882a593Smuzhiyun }
952*4882a593Smuzhiyun 
__fib6_drop_pcpu_from(struct fib6_nh * fib6_nh,const struct fib6_info * match,const struct fib6_table * table)953*4882a593Smuzhiyun static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
954*4882a593Smuzhiyun 				  const struct fib6_info *match,
955*4882a593Smuzhiyun 				  const struct fib6_table *table)
956*4882a593Smuzhiyun {
957*4882a593Smuzhiyun 	int cpu;
958*4882a593Smuzhiyun 
959*4882a593Smuzhiyun 	if (!fib6_nh->rt6i_pcpu)
960*4882a593Smuzhiyun 		return;
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun 	/* release the reference to this fib entry from
963*4882a593Smuzhiyun 	 * all of its cached pcpu routes
964*4882a593Smuzhiyun 	 */
965*4882a593Smuzhiyun 	for_each_possible_cpu(cpu) {
966*4882a593Smuzhiyun 		struct rt6_info **ppcpu_rt;
967*4882a593Smuzhiyun 		struct rt6_info *pcpu_rt;
968*4882a593Smuzhiyun 
969*4882a593Smuzhiyun 		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
970*4882a593Smuzhiyun 		pcpu_rt = *ppcpu_rt;
971*4882a593Smuzhiyun 
972*4882a593Smuzhiyun 		/* only dropping the 'from' reference if the cached route
973*4882a593Smuzhiyun 		 * is using 'match'. The cached pcpu_rt->from only changes
974*4882a593Smuzhiyun 		 * from a fib6_info to NULL (ip6_dst_destroy); it can never
975*4882a593Smuzhiyun 		 * change from one fib6_info reference to another
976*4882a593Smuzhiyun 		 */
977*4882a593Smuzhiyun 		if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
978*4882a593Smuzhiyun 			struct fib6_info *from;
979*4882a593Smuzhiyun 
980*4882a593Smuzhiyun 			from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
981*4882a593Smuzhiyun 			fib6_info_release(from);
982*4882a593Smuzhiyun 		}
983*4882a593Smuzhiyun 	}
984*4882a593Smuzhiyun }
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun struct fib6_nh_pcpu_arg {
987*4882a593Smuzhiyun 	struct fib6_info	*from;
988*4882a593Smuzhiyun 	const struct fib6_table *table;
989*4882a593Smuzhiyun };
990*4882a593Smuzhiyun 
fib6_nh_drop_pcpu_from(struct fib6_nh * nh,void * _arg)991*4882a593Smuzhiyun static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
992*4882a593Smuzhiyun {
993*4882a593Smuzhiyun 	struct fib6_nh_pcpu_arg *arg = _arg;
994*4882a593Smuzhiyun 
995*4882a593Smuzhiyun 	__fib6_drop_pcpu_from(nh, arg->from, arg->table);
996*4882a593Smuzhiyun 	return 0;
997*4882a593Smuzhiyun }
998*4882a593Smuzhiyun 
fib6_drop_pcpu_from(struct fib6_info * f6i,const struct fib6_table * table)999*4882a593Smuzhiyun static void fib6_drop_pcpu_from(struct fib6_info *f6i,
1000*4882a593Smuzhiyun 				const struct fib6_table *table)
1001*4882a593Smuzhiyun {
1002*4882a593Smuzhiyun 	/* Make sure rt6_make_pcpu_route() wont add other percpu routes
1003*4882a593Smuzhiyun 	 * while we are cleaning them here.
1004*4882a593Smuzhiyun 	 */
1005*4882a593Smuzhiyun 	f6i->fib6_destroying = 1;
1006*4882a593Smuzhiyun 	mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
1007*4882a593Smuzhiyun 
1008*4882a593Smuzhiyun 	if (f6i->nh) {
1009*4882a593Smuzhiyun 		struct fib6_nh_pcpu_arg arg = {
1010*4882a593Smuzhiyun 			.from = f6i,
1011*4882a593Smuzhiyun 			.table = table
1012*4882a593Smuzhiyun 		};
1013*4882a593Smuzhiyun 
1014*4882a593Smuzhiyun 		nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
1015*4882a593Smuzhiyun 					 &arg);
1016*4882a593Smuzhiyun 	} else {
1017*4882a593Smuzhiyun 		struct fib6_nh *fib6_nh;
1018*4882a593Smuzhiyun 
1019*4882a593Smuzhiyun 		fib6_nh = f6i->fib6_nh;
1020*4882a593Smuzhiyun 		__fib6_drop_pcpu_from(fib6_nh, f6i, table);
1021*4882a593Smuzhiyun 	}
1022*4882a593Smuzhiyun }
1023*4882a593Smuzhiyun 
fib6_purge_rt(struct fib6_info * rt,struct fib6_node * fn,struct net * net)1024*4882a593Smuzhiyun static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
1025*4882a593Smuzhiyun 			  struct net *net)
1026*4882a593Smuzhiyun {
1027*4882a593Smuzhiyun 	struct fib6_table *table = rt->fib6_table;
1028*4882a593Smuzhiyun 
1029*4882a593Smuzhiyun 	/* Flush all cached dst in exception table */
1030*4882a593Smuzhiyun 	rt6_flush_exceptions(rt);
1031*4882a593Smuzhiyun 	fib6_drop_pcpu_from(rt, table);
1032*4882a593Smuzhiyun 
1033*4882a593Smuzhiyun 	if (rt->nh && !list_empty(&rt->nh_list))
1034*4882a593Smuzhiyun 		list_del_init(&rt->nh_list);
1035*4882a593Smuzhiyun 
1036*4882a593Smuzhiyun 	if (refcount_read(&rt->fib6_ref) != 1) {
1037*4882a593Smuzhiyun 		/* This route is used as dummy address holder in some split
1038*4882a593Smuzhiyun 		 * nodes. It is not leaked, but it still holds other resources,
1039*4882a593Smuzhiyun 		 * which must be released in time. So, scan ascendant nodes
1040*4882a593Smuzhiyun 		 * and replace dummy references to this route with references
1041*4882a593Smuzhiyun 		 * to still alive ones.
1042*4882a593Smuzhiyun 		 */
1043*4882a593Smuzhiyun 		while (fn) {
1044*4882a593Smuzhiyun 			struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
1045*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1046*4882a593Smuzhiyun 			struct fib6_info *new_leaf;
1047*4882a593Smuzhiyun 			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
1048*4882a593Smuzhiyun 				new_leaf = fib6_find_prefix(net, table, fn);
1049*4882a593Smuzhiyun 				fib6_info_hold(new_leaf);
1050*4882a593Smuzhiyun 
1051*4882a593Smuzhiyun 				rcu_assign_pointer(fn->leaf, new_leaf);
1052*4882a593Smuzhiyun 				fib6_info_release(rt);
1053*4882a593Smuzhiyun 			}
1054*4882a593Smuzhiyun 			fn = rcu_dereference_protected(fn->parent,
1055*4882a593Smuzhiyun 				    lockdep_is_held(&table->tb6_lock));
1056*4882a593Smuzhiyun 		}
1057*4882a593Smuzhiyun 	}
1058*4882a593Smuzhiyun }
1059*4882a593Smuzhiyun 
1060*4882a593Smuzhiyun /*
1061*4882a593Smuzhiyun  *	Insert routing information in a node.
1062*4882a593Smuzhiyun  */
1063*4882a593Smuzhiyun 
fib6_add_rt2node(struct fib6_node * fn,struct fib6_info * rt,struct nl_info * info,struct netlink_ext_ack * extack)1064*4882a593Smuzhiyun static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
1065*4882a593Smuzhiyun 			    struct nl_info *info,
1066*4882a593Smuzhiyun 			    struct netlink_ext_ack *extack)
1067*4882a593Smuzhiyun {
1068*4882a593Smuzhiyun 	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
1069*4882a593Smuzhiyun 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
1070*4882a593Smuzhiyun 	struct fib6_info *iter = NULL;
1071*4882a593Smuzhiyun 	struct fib6_info __rcu **ins;
1072*4882a593Smuzhiyun 	struct fib6_info __rcu **fallback_ins = NULL;
1073*4882a593Smuzhiyun 	int replace = (info->nlh &&
1074*4882a593Smuzhiyun 		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
1075*4882a593Smuzhiyun 	int add = (!info->nlh ||
1076*4882a593Smuzhiyun 		   (info->nlh->nlmsg_flags & NLM_F_CREATE));
1077*4882a593Smuzhiyun 	int found = 0;
1078*4882a593Smuzhiyun 	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
1079*4882a593Smuzhiyun 	bool notify_sibling_rt = false;
1080*4882a593Smuzhiyun 	u16 nlflags = NLM_F_EXCL;
1081*4882a593Smuzhiyun 	int err;
1082*4882a593Smuzhiyun 
1083*4882a593Smuzhiyun 	if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
1084*4882a593Smuzhiyun 		nlflags |= NLM_F_APPEND;
1085*4882a593Smuzhiyun 
1086*4882a593Smuzhiyun 	ins = &fn->leaf;
1087*4882a593Smuzhiyun 
1088*4882a593Smuzhiyun 	for (iter = leaf; iter;
1089*4882a593Smuzhiyun 	     iter = rcu_dereference_protected(iter->fib6_next,
1090*4882a593Smuzhiyun 				lockdep_is_held(&rt->fib6_table->tb6_lock))) {
1091*4882a593Smuzhiyun 		/*
1092*4882a593Smuzhiyun 		 *	Search for duplicates
1093*4882a593Smuzhiyun 		 */
1094*4882a593Smuzhiyun 
1095*4882a593Smuzhiyun 		if (iter->fib6_metric == rt->fib6_metric) {
1096*4882a593Smuzhiyun 			/*
1097*4882a593Smuzhiyun 			 *	Same priority level
1098*4882a593Smuzhiyun 			 */
1099*4882a593Smuzhiyun 			if (info->nlh &&
1100*4882a593Smuzhiyun 			    (info->nlh->nlmsg_flags & NLM_F_EXCL))
1101*4882a593Smuzhiyun 				return -EEXIST;
1102*4882a593Smuzhiyun 
1103*4882a593Smuzhiyun 			nlflags &= ~NLM_F_EXCL;
1104*4882a593Smuzhiyun 			if (replace) {
1105*4882a593Smuzhiyun 				if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
1106*4882a593Smuzhiyun 					found++;
1107*4882a593Smuzhiyun 					break;
1108*4882a593Smuzhiyun 				}
1109*4882a593Smuzhiyun 				fallback_ins = fallback_ins ?: ins;
1110*4882a593Smuzhiyun 				goto next_iter;
1111*4882a593Smuzhiyun 			}
1112*4882a593Smuzhiyun 
1113*4882a593Smuzhiyun 			if (rt6_duplicate_nexthop(iter, rt)) {
1114*4882a593Smuzhiyun 				if (rt->fib6_nsiblings)
1115*4882a593Smuzhiyun 					rt->fib6_nsiblings = 0;
1116*4882a593Smuzhiyun 				if (!(iter->fib6_flags & RTF_EXPIRES))
1117*4882a593Smuzhiyun 					return -EEXIST;
1118*4882a593Smuzhiyun 				if (!(rt->fib6_flags & RTF_EXPIRES))
1119*4882a593Smuzhiyun 					fib6_clean_expires(iter);
1120*4882a593Smuzhiyun 				else
1121*4882a593Smuzhiyun 					fib6_set_expires(iter, rt->expires);
1122*4882a593Smuzhiyun 
1123*4882a593Smuzhiyun 				if (rt->fib6_pmtu)
1124*4882a593Smuzhiyun 					fib6_metric_set(iter, RTAX_MTU,
1125*4882a593Smuzhiyun 							rt->fib6_pmtu);
1126*4882a593Smuzhiyun 				return -EEXIST;
1127*4882a593Smuzhiyun 			}
1128*4882a593Smuzhiyun 			/* If we have the same destination and the same metric,
1129*4882a593Smuzhiyun 			 * but not the same gateway, then the route we try to
1130*4882a593Smuzhiyun 			 * add is sibling to this route, increment our counter
1131*4882a593Smuzhiyun 			 * of siblings, and later we will add our route to the
1132*4882a593Smuzhiyun 			 * list.
1133*4882a593Smuzhiyun 			 * Only static routes (which don't have flag
1134*4882a593Smuzhiyun 			 * RTF_EXPIRES) are used for ECMPv6.
1135*4882a593Smuzhiyun 			 *
1136*4882a593Smuzhiyun 			 * To avoid long list, we only had siblings if the
1137*4882a593Smuzhiyun 			 * route have a gateway.
1138*4882a593Smuzhiyun 			 */
1139*4882a593Smuzhiyun 			if (rt_can_ecmp &&
1140*4882a593Smuzhiyun 			    rt6_qualify_for_ecmp(iter))
1141*4882a593Smuzhiyun 				rt->fib6_nsiblings++;
1142*4882a593Smuzhiyun 		}
1143*4882a593Smuzhiyun 
1144*4882a593Smuzhiyun 		if (iter->fib6_metric > rt->fib6_metric)
1145*4882a593Smuzhiyun 			break;
1146*4882a593Smuzhiyun 
1147*4882a593Smuzhiyun next_iter:
1148*4882a593Smuzhiyun 		ins = &iter->fib6_next;
1149*4882a593Smuzhiyun 	}
1150*4882a593Smuzhiyun 
1151*4882a593Smuzhiyun 	if (fallback_ins && !found) {
1152*4882a593Smuzhiyun 		/* No matching route with same ecmp-able-ness found, replace
1153*4882a593Smuzhiyun 		 * first matching route
1154*4882a593Smuzhiyun 		 */
1155*4882a593Smuzhiyun 		ins = fallback_ins;
1156*4882a593Smuzhiyun 		iter = rcu_dereference_protected(*ins,
1157*4882a593Smuzhiyun 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
1158*4882a593Smuzhiyun 		found++;
1159*4882a593Smuzhiyun 	}
1160*4882a593Smuzhiyun 
1161*4882a593Smuzhiyun 	/* Reset round-robin state, if necessary */
1162*4882a593Smuzhiyun 	if (ins == &fn->leaf)
1163*4882a593Smuzhiyun 		fn->rr_ptr = NULL;
1164*4882a593Smuzhiyun 
1165*4882a593Smuzhiyun 	/* Link this route to others same route. */
1166*4882a593Smuzhiyun 	if (rt->fib6_nsiblings) {
1167*4882a593Smuzhiyun 		unsigned int fib6_nsiblings;
1168*4882a593Smuzhiyun 		struct fib6_info *sibling, *temp_sibling;
1169*4882a593Smuzhiyun 
1170*4882a593Smuzhiyun 		/* Find the first route that have the same metric */
1171*4882a593Smuzhiyun 		sibling = leaf;
1172*4882a593Smuzhiyun 		notify_sibling_rt = true;
1173*4882a593Smuzhiyun 		while (sibling) {
1174*4882a593Smuzhiyun 			if (sibling->fib6_metric == rt->fib6_metric &&
1175*4882a593Smuzhiyun 			    rt6_qualify_for_ecmp(sibling)) {
1176*4882a593Smuzhiyun 				list_add_tail(&rt->fib6_siblings,
1177*4882a593Smuzhiyun 					      &sibling->fib6_siblings);
1178*4882a593Smuzhiyun 				break;
1179*4882a593Smuzhiyun 			}
1180*4882a593Smuzhiyun 			sibling = rcu_dereference_protected(sibling->fib6_next,
1181*4882a593Smuzhiyun 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
1182*4882a593Smuzhiyun 			notify_sibling_rt = false;
1183*4882a593Smuzhiyun 		}
1184*4882a593Smuzhiyun 		/* For each sibling in the list, increment the counter of
1185*4882a593Smuzhiyun 		 * siblings. BUG() if counters does not match, list of siblings
1186*4882a593Smuzhiyun 		 * is broken!
1187*4882a593Smuzhiyun 		 */
1188*4882a593Smuzhiyun 		fib6_nsiblings = 0;
1189*4882a593Smuzhiyun 		list_for_each_entry_safe(sibling, temp_sibling,
1190*4882a593Smuzhiyun 					 &rt->fib6_siblings, fib6_siblings) {
1191*4882a593Smuzhiyun 			sibling->fib6_nsiblings++;
1192*4882a593Smuzhiyun 			BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
1193*4882a593Smuzhiyun 			fib6_nsiblings++;
1194*4882a593Smuzhiyun 		}
1195*4882a593Smuzhiyun 		BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
1196*4882a593Smuzhiyun 		rt6_multipath_rebalance(temp_sibling);
1197*4882a593Smuzhiyun 	}
1198*4882a593Smuzhiyun 
1199*4882a593Smuzhiyun 	/*
1200*4882a593Smuzhiyun 	 *	insert node
1201*4882a593Smuzhiyun 	 */
1202*4882a593Smuzhiyun 	if (!replace) {
1203*4882a593Smuzhiyun 		if (!add)
1204*4882a593Smuzhiyun 			pr_warn("NLM_F_CREATE should be set when creating new route\n");
1205*4882a593Smuzhiyun 
1206*4882a593Smuzhiyun add:
1207*4882a593Smuzhiyun 		nlflags |= NLM_F_CREATE;
1208*4882a593Smuzhiyun 
1209*4882a593Smuzhiyun 		/* The route should only be notified if it is the first
1210*4882a593Smuzhiyun 		 * route in the node or if it is added as a sibling
1211*4882a593Smuzhiyun 		 * route to the first route in the node.
1212*4882a593Smuzhiyun 		 */
1213*4882a593Smuzhiyun 		if (!info->skip_notify_kernel &&
1214*4882a593Smuzhiyun 		    (notify_sibling_rt || ins == &fn->leaf)) {
1215*4882a593Smuzhiyun 			enum fib_event_type fib_event;
1216*4882a593Smuzhiyun 
1217*4882a593Smuzhiyun 			if (notify_sibling_rt)
1218*4882a593Smuzhiyun 				fib_event = FIB_EVENT_ENTRY_APPEND;
1219*4882a593Smuzhiyun 			else
1220*4882a593Smuzhiyun 				fib_event = FIB_EVENT_ENTRY_REPLACE;
1221*4882a593Smuzhiyun 			err = call_fib6_entry_notifiers(info->nl_net,
1222*4882a593Smuzhiyun 							fib_event, rt,
1223*4882a593Smuzhiyun 							extack);
1224*4882a593Smuzhiyun 			if (err) {
1225*4882a593Smuzhiyun 				struct fib6_info *sibling, *next_sibling;
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun 				/* If the route has siblings, then it first
1228*4882a593Smuzhiyun 				 * needs to be unlinked from them.
1229*4882a593Smuzhiyun 				 */
1230*4882a593Smuzhiyun 				if (!rt->fib6_nsiblings)
1231*4882a593Smuzhiyun 					return err;
1232*4882a593Smuzhiyun 
1233*4882a593Smuzhiyun 				list_for_each_entry_safe(sibling, next_sibling,
1234*4882a593Smuzhiyun 							 &rt->fib6_siblings,
1235*4882a593Smuzhiyun 							 fib6_siblings)
1236*4882a593Smuzhiyun 					sibling->fib6_nsiblings--;
1237*4882a593Smuzhiyun 				rt->fib6_nsiblings = 0;
1238*4882a593Smuzhiyun 				list_del_init(&rt->fib6_siblings);
1239*4882a593Smuzhiyun 				rt6_multipath_rebalance(next_sibling);
1240*4882a593Smuzhiyun 				return err;
1241*4882a593Smuzhiyun 			}
1242*4882a593Smuzhiyun 		}
1243*4882a593Smuzhiyun 
1244*4882a593Smuzhiyun 		rcu_assign_pointer(rt->fib6_next, iter);
1245*4882a593Smuzhiyun 		fib6_info_hold(rt);
1246*4882a593Smuzhiyun 		rcu_assign_pointer(rt->fib6_node, fn);
1247*4882a593Smuzhiyun 		rcu_assign_pointer(*ins, rt);
1248*4882a593Smuzhiyun 		if (!info->skip_notify)
1249*4882a593Smuzhiyun 			inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
1250*4882a593Smuzhiyun 		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
1251*4882a593Smuzhiyun 
1252*4882a593Smuzhiyun 		if (!(fn->fn_flags & RTN_RTINFO)) {
1253*4882a593Smuzhiyun 			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1254*4882a593Smuzhiyun 			fn->fn_flags |= RTN_RTINFO;
1255*4882a593Smuzhiyun 		}
1256*4882a593Smuzhiyun 
1257*4882a593Smuzhiyun 	} else {
1258*4882a593Smuzhiyun 		int nsiblings;
1259*4882a593Smuzhiyun 
1260*4882a593Smuzhiyun 		if (!found) {
1261*4882a593Smuzhiyun 			if (add)
1262*4882a593Smuzhiyun 				goto add;
1263*4882a593Smuzhiyun 			pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
1264*4882a593Smuzhiyun 			return -ENOENT;
1265*4882a593Smuzhiyun 		}
1266*4882a593Smuzhiyun 
1267*4882a593Smuzhiyun 		if (!info->skip_notify_kernel && ins == &fn->leaf) {
1268*4882a593Smuzhiyun 			err = call_fib6_entry_notifiers(info->nl_net,
1269*4882a593Smuzhiyun 							FIB_EVENT_ENTRY_REPLACE,
1270*4882a593Smuzhiyun 							rt, extack);
1271*4882a593Smuzhiyun 			if (err)
1272*4882a593Smuzhiyun 				return err;
1273*4882a593Smuzhiyun 		}
1274*4882a593Smuzhiyun 
1275*4882a593Smuzhiyun 		fib6_info_hold(rt);
1276*4882a593Smuzhiyun 		rcu_assign_pointer(rt->fib6_node, fn);
1277*4882a593Smuzhiyun 		rt->fib6_next = iter->fib6_next;
1278*4882a593Smuzhiyun 		rcu_assign_pointer(*ins, rt);
1279*4882a593Smuzhiyun 		if (!info->skip_notify)
1280*4882a593Smuzhiyun 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
1281*4882a593Smuzhiyun 		if (!(fn->fn_flags & RTN_RTINFO)) {
1282*4882a593Smuzhiyun 			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1283*4882a593Smuzhiyun 			fn->fn_flags |= RTN_RTINFO;
1284*4882a593Smuzhiyun 		}
1285*4882a593Smuzhiyun 		nsiblings = iter->fib6_nsiblings;
1286*4882a593Smuzhiyun 		iter->fib6_node = NULL;
1287*4882a593Smuzhiyun 		fib6_purge_rt(iter, fn, info->nl_net);
1288*4882a593Smuzhiyun 		if (rcu_access_pointer(fn->rr_ptr) == iter)
1289*4882a593Smuzhiyun 			fn->rr_ptr = NULL;
1290*4882a593Smuzhiyun 		fib6_info_release(iter);
1291*4882a593Smuzhiyun 
1292*4882a593Smuzhiyun 		if (nsiblings) {
1293*4882a593Smuzhiyun 			/* Replacing an ECMP route, remove all siblings */
1294*4882a593Smuzhiyun 			ins = &rt->fib6_next;
1295*4882a593Smuzhiyun 			iter = rcu_dereference_protected(*ins,
1296*4882a593Smuzhiyun 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
1297*4882a593Smuzhiyun 			while (iter) {
1298*4882a593Smuzhiyun 				if (iter->fib6_metric > rt->fib6_metric)
1299*4882a593Smuzhiyun 					break;
1300*4882a593Smuzhiyun 				if (rt6_qualify_for_ecmp(iter)) {
1301*4882a593Smuzhiyun 					*ins = iter->fib6_next;
1302*4882a593Smuzhiyun 					iter->fib6_node = NULL;
1303*4882a593Smuzhiyun 					fib6_purge_rt(iter, fn, info->nl_net);
1304*4882a593Smuzhiyun 					if (rcu_access_pointer(fn->rr_ptr) == iter)
1305*4882a593Smuzhiyun 						fn->rr_ptr = NULL;
1306*4882a593Smuzhiyun 					fib6_info_release(iter);
1307*4882a593Smuzhiyun 					nsiblings--;
1308*4882a593Smuzhiyun 					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
1309*4882a593Smuzhiyun 				} else {
1310*4882a593Smuzhiyun 					ins = &iter->fib6_next;
1311*4882a593Smuzhiyun 				}
1312*4882a593Smuzhiyun 				iter = rcu_dereference_protected(*ins,
1313*4882a593Smuzhiyun 					lockdep_is_held(&rt->fib6_table->tb6_lock));
1314*4882a593Smuzhiyun 			}
1315*4882a593Smuzhiyun 			WARN_ON(nsiblings != 0);
1316*4882a593Smuzhiyun 		}
1317*4882a593Smuzhiyun 	}
1318*4882a593Smuzhiyun 
1319*4882a593Smuzhiyun 	return 0;
1320*4882a593Smuzhiyun }
1321*4882a593Smuzhiyun 
fib6_start_gc(struct net * net,struct fib6_info * rt)1322*4882a593Smuzhiyun static void fib6_start_gc(struct net *net, struct fib6_info *rt)
1323*4882a593Smuzhiyun {
1324*4882a593Smuzhiyun 	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
1325*4882a593Smuzhiyun 	    (rt->fib6_flags & RTF_EXPIRES))
1326*4882a593Smuzhiyun 		mod_timer(&net->ipv6.ip6_fib_timer,
1327*4882a593Smuzhiyun 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1328*4882a593Smuzhiyun }
1329*4882a593Smuzhiyun 
fib6_force_start_gc(struct net * net)1330*4882a593Smuzhiyun void fib6_force_start_gc(struct net *net)
1331*4882a593Smuzhiyun {
1332*4882a593Smuzhiyun 	if (!timer_pending(&net->ipv6.ip6_fib_timer))
1333*4882a593Smuzhiyun 		mod_timer(&net->ipv6.ip6_fib_timer,
1334*4882a593Smuzhiyun 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1335*4882a593Smuzhiyun }
1336*4882a593Smuzhiyun 
__fib6_update_sernum_upto_root(struct fib6_info * rt,int sernum)1337*4882a593Smuzhiyun static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
1338*4882a593Smuzhiyun 					   int sernum)
1339*4882a593Smuzhiyun {
1340*4882a593Smuzhiyun 	struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
1341*4882a593Smuzhiyun 				lockdep_is_held(&rt->fib6_table->tb6_lock));
1342*4882a593Smuzhiyun 
1343*4882a593Smuzhiyun 	/* paired with smp_rmb() in rt6_get_cookie_safe() */
1344*4882a593Smuzhiyun 	smp_wmb();
1345*4882a593Smuzhiyun 	while (fn) {
1346*4882a593Smuzhiyun 		WRITE_ONCE(fn->fn_sernum, sernum);
1347*4882a593Smuzhiyun 		fn = rcu_dereference_protected(fn->parent,
1348*4882a593Smuzhiyun 				lockdep_is_held(&rt->fib6_table->tb6_lock));
1349*4882a593Smuzhiyun 	}
1350*4882a593Smuzhiyun }
1351*4882a593Smuzhiyun 
fib6_update_sernum_upto_root(struct net * net,struct fib6_info * rt)1352*4882a593Smuzhiyun void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
1353*4882a593Smuzhiyun {
1354*4882a593Smuzhiyun 	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
1355*4882a593Smuzhiyun }
1356*4882a593Smuzhiyun 
1357*4882a593Smuzhiyun /* allow ipv4 to update sernum via ipv6_stub */
fib6_update_sernum_stub(struct net * net,struct fib6_info * f6i)1358*4882a593Smuzhiyun void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
1359*4882a593Smuzhiyun {
1360*4882a593Smuzhiyun 	spin_lock_bh(&f6i->fib6_table->tb6_lock);
1361*4882a593Smuzhiyun 	fib6_update_sernum_upto_root(net, f6i);
1362*4882a593Smuzhiyun 	spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1363*4882a593Smuzhiyun }
1364*4882a593Smuzhiyun 
1365*4882a593Smuzhiyun /*
1366*4882a593Smuzhiyun  *	Add routing information to the routing tree.
1367*4882a593Smuzhiyun  *	<destination addr>/<source addr>
1368*4882a593Smuzhiyun  *	with source addr info in sub-trees
1369*4882a593Smuzhiyun  *	Need to own table->tb6_lock
1370*4882a593Smuzhiyun  */
1371*4882a593Smuzhiyun 
fib6_add(struct fib6_node * root,struct fib6_info * rt,struct nl_info * info,struct netlink_ext_ack * extack)1372*4882a593Smuzhiyun int fib6_add(struct fib6_node *root, struct fib6_info *rt,
1373*4882a593Smuzhiyun 	     struct nl_info *info, struct netlink_ext_ack *extack)
1374*4882a593Smuzhiyun {
1375*4882a593Smuzhiyun 	struct fib6_table *table = rt->fib6_table;
1376*4882a593Smuzhiyun 	struct fib6_node *fn, *pn = NULL;
1377*4882a593Smuzhiyun 	int err = -ENOMEM;
1378*4882a593Smuzhiyun 	int allow_create = 1;
1379*4882a593Smuzhiyun 	int replace_required = 0;
1380*4882a593Smuzhiyun 
1381*4882a593Smuzhiyun 	if (info->nlh) {
1382*4882a593Smuzhiyun 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
1383*4882a593Smuzhiyun 			allow_create = 0;
1384*4882a593Smuzhiyun 		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
1385*4882a593Smuzhiyun 			replace_required = 1;
1386*4882a593Smuzhiyun 	}
1387*4882a593Smuzhiyun 	if (!allow_create && !replace_required)
1388*4882a593Smuzhiyun 		pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
1389*4882a593Smuzhiyun 
1390*4882a593Smuzhiyun 	fn = fib6_add_1(info->nl_net, table, root,
1391*4882a593Smuzhiyun 			&rt->fib6_dst.addr, rt->fib6_dst.plen,
1392*4882a593Smuzhiyun 			offsetof(struct fib6_info, fib6_dst), allow_create,
1393*4882a593Smuzhiyun 			replace_required, extack);
1394*4882a593Smuzhiyun 	if (IS_ERR(fn)) {
1395*4882a593Smuzhiyun 		err = PTR_ERR(fn);
1396*4882a593Smuzhiyun 		fn = NULL;
1397*4882a593Smuzhiyun 		goto out;
1398*4882a593Smuzhiyun 	}
1399*4882a593Smuzhiyun 
1400*4882a593Smuzhiyun 	pn = fn;
1401*4882a593Smuzhiyun 
1402*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1403*4882a593Smuzhiyun 	if (rt->fib6_src.plen) {
1404*4882a593Smuzhiyun 		struct fib6_node *sn;
1405*4882a593Smuzhiyun 
1406*4882a593Smuzhiyun 		if (!rcu_access_pointer(fn->subtree)) {
1407*4882a593Smuzhiyun 			struct fib6_node *sfn;
1408*4882a593Smuzhiyun 
1409*4882a593Smuzhiyun 			/*
1410*4882a593Smuzhiyun 			 * Create subtree.
1411*4882a593Smuzhiyun 			 *
1412*4882a593Smuzhiyun 			 *		fn[main tree]
1413*4882a593Smuzhiyun 			 *		|
1414*4882a593Smuzhiyun 			 *		sfn[subtree root]
1415*4882a593Smuzhiyun 			 *		   \
1416*4882a593Smuzhiyun 			 *		    sn[new leaf node]
1417*4882a593Smuzhiyun 			 */
1418*4882a593Smuzhiyun 
1419*4882a593Smuzhiyun 			/* Create subtree root node */
1420*4882a593Smuzhiyun 			sfn = node_alloc(info->nl_net);
1421*4882a593Smuzhiyun 			if (!sfn)
1422*4882a593Smuzhiyun 				goto failure;
1423*4882a593Smuzhiyun 
1424*4882a593Smuzhiyun 			fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
1425*4882a593Smuzhiyun 			rcu_assign_pointer(sfn->leaf,
1426*4882a593Smuzhiyun 					   info->nl_net->ipv6.fib6_null_entry);
1427*4882a593Smuzhiyun 			sfn->fn_flags = RTN_ROOT;
1428*4882a593Smuzhiyun 
1429*4882a593Smuzhiyun 			/* Now add the first leaf node to new subtree */
1430*4882a593Smuzhiyun 
1431*4882a593Smuzhiyun 			sn = fib6_add_1(info->nl_net, table, sfn,
1432*4882a593Smuzhiyun 					&rt->fib6_src.addr, rt->fib6_src.plen,
1433*4882a593Smuzhiyun 					offsetof(struct fib6_info, fib6_src),
1434*4882a593Smuzhiyun 					allow_create, replace_required, extack);
1435*4882a593Smuzhiyun 
1436*4882a593Smuzhiyun 			if (IS_ERR(sn)) {
1437*4882a593Smuzhiyun 				/* If it is failed, discard just allocated
1438*4882a593Smuzhiyun 				   root, and then (in failure) stale node
1439*4882a593Smuzhiyun 				   in main tree.
1440*4882a593Smuzhiyun 				 */
1441*4882a593Smuzhiyun 				node_free_immediate(info->nl_net, sfn);
1442*4882a593Smuzhiyun 				err = PTR_ERR(sn);
1443*4882a593Smuzhiyun 				goto failure;
1444*4882a593Smuzhiyun 			}
1445*4882a593Smuzhiyun 
1446*4882a593Smuzhiyun 			/* Now link new subtree to main tree */
1447*4882a593Smuzhiyun 			rcu_assign_pointer(sfn->parent, fn);
1448*4882a593Smuzhiyun 			rcu_assign_pointer(fn->subtree, sfn);
1449*4882a593Smuzhiyun 		} else {
1450*4882a593Smuzhiyun 			sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
1451*4882a593Smuzhiyun 					&rt->fib6_src.addr, rt->fib6_src.plen,
1452*4882a593Smuzhiyun 					offsetof(struct fib6_info, fib6_src),
1453*4882a593Smuzhiyun 					allow_create, replace_required, extack);
1454*4882a593Smuzhiyun 
1455*4882a593Smuzhiyun 			if (IS_ERR(sn)) {
1456*4882a593Smuzhiyun 				err = PTR_ERR(sn);
1457*4882a593Smuzhiyun 				goto failure;
1458*4882a593Smuzhiyun 			}
1459*4882a593Smuzhiyun 		}
1460*4882a593Smuzhiyun 
1461*4882a593Smuzhiyun 		if (!rcu_access_pointer(fn->leaf)) {
1462*4882a593Smuzhiyun 			if (fn->fn_flags & RTN_TL_ROOT) {
1463*4882a593Smuzhiyun 				/* put back null_entry for root node */
1464*4882a593Smuzhiyun 				rcu_assign_pointer(fn->leaf,
1465*4882a593Smuzhiyun 					    info->nl_net->ipv6.fib6_null_entry);
1466*4882a593Smuzhiyun 			} else {
1467*4882a593Smuzhiyun 				fib6_info_hold(rt);
1468*4882a593Smuzhiyun 				rcu_assign_pointer(fn->leaf, rt);
1469*4882a593Smuzhiyun 			}
1470*4882a593Smuzhiyun 		}
1471*4882a593Smuzhiyun 		fn = sn;
1472*4882a593Smuzhiyun 	}
1473*4882a593Smuzhiyun #endif
1474*4882a593Smuzhiyun 
1475*4882a593Smuzhiyun 	err = fib6_add_rt2node(fn, rt, info, extack);
1476*4882a593Smuzhiyun 	if (!err) {
1477*4882a593Smuzhiyun 		if (rt->nh)
1478*4882a593Smuzhiyun 			list_add(&rt->nh_list, &rt->nh->f6i_list);
1479*4882a593Smuzhiyun 		__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
1480*4882a593Smuzhiyun 		fib6_start_gc(info->nl_net, rt);
1481*4882a593Smuzhiyun 	}
1482*4882a593Smuzhiyun 
1483*4882a593Smuzhiyun out:
1484*4882a593Smuzhiyun 	if (err) {
1485*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1486*4882a593Smuzhiyun 		/*
1487*4882a593Smuzhiyun 		 * If fib6_add_1 has cleared the old leaf pointer in the
1488*4882a593Smuzhiyun 		 * super-tree leaf node we have to find a new one for it.
1489*4882a593Smuzhiyun 		 */
1490*4882a593Smuzhiyun 		if (pn != fn) {
1491*4882a593Smuzhiyun 			struct fib6_info *pn_leaf =
1492*4882a593Smuzhiyun 				rcu_dereference_protected(pn->leaf,
1493*4882a593Smuzhiyun 				    lockdep_is_held(&table->tb6_lock));
1494*4882a593Smuzhiyun 			if (pn_leaf == rt) {
1495*4882a593Smuzhiyun 				pn_leaf = NULL;
1496*4882a593Smuzhiyun 				RCU_INIT_POINTER(pn->leaf, NULL);
1497*4882a593Smuzhiyun 				fib6_info_release(rt);
1498*4882a593Smuzhiyun 			}
1499*4882a593Smuzhiyun 			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1500*4882a593Smuzhiyun 				pn_leaf = fib6_find_prefix(info->nl_net, table,
1501*4882a593Smuzhiyun 							   pn);
1502*4882a593Smuzhiyun #if RT6_DEBUG >= 2
1503*4882a593Smuzhiyun 				if (!pn_leaf) {
1504*4882a593Smuzhiyun 					WARN_ON(!pn_leaf);
1505*4882a593Smuzhiyun 					pn_leaf =
1506*4882a593Smuzhiyun 					    info->nl_net->ipv6.fib6_null_entry;
1507*4882a593Smuzhiyun 				}
1508*4882a593Smuzhiyun #endif
1509*4882a593Smuzhiyun 				fib6_info_hold(pn_leaf);
1510*4882a593Smuzhiyun 				rcu_assign_pointer(pn->leaf, pn_leaf);
1511*4882a593Smuzhiyun 			}
1512*4882a593Smuzhiyun 		}
1513*4882a593Smuzhiyun #endif
1514*4882a593Smuzhiyun 		goto failure;
1515*4882a593Smuzhiyun 	} else if (fib6_requires_src(rt)) {
1516*4882a593Smuzhiyun 		fib6_routes_require_src_inc(info->nl_net);
1517*4882a593Smuzhiyun 	}
1518*4882a593Smuzhiyun 	return err;
1519*4882a593Smuzhiyun 
1520*4882a593Smuzhiyun failure:
1521*4882a593Smuzhiyun 	/* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
1522*4882a593Smuzhiyun 	 * 1. fn is an intermediate node and we failed to add the new
1523*4882a593Smuzhiyun 	 * route to it in both subtree creation failure and fib6_add_rt2node()
1524*4882a593Smuzhiyun 	 * failure case.
1525*4882a593Smuzhiyun 	 * 2. fn is the root node in the table and we fail to add the first
1526*4882a593Smuzhiyun 	 * default route to it.
1527*4882a593Smuzhiyun 	 */
1528*4882a593Smuzhiyun 	if (fn &&
1529*4882a593Smuzhiyun 	    (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
1530*4882a593Smuzhiyun 	     (fn->fn_flags & RTN_TL_ROOT &&
1531*4882a593Smuzhiyun 	      !rcu_access_pointer(fn->leaf))))
1532*4882a593Smuzhiyun 		fib6_repair_tree(info->nl_net, table, fn);
1533*4882a593Smuzhiyun 	return err;
1534*4882a593Smuzhiyun }
1535*4882a593Smuzhiyun 
1536*4882a593Smuzhiyun /*
1537*4882a593Smuzhiyun  *	Routing tree lookup
1538*4882a593Smuzhiyun  *
1539*4882a593Smuzhiyun  */
1540*4882a593Smuzhiyun 
1541*4882a593Smuzhiyun struct lookup_args {
1542*4882a593Smuzhiyun 	int			offset;		/* key offset on fib6_info */
1543*4882a593Smuzhiyun 	const struct in6_addr	*addr;		/* search key			*/
1544*4882a593Smuzhiyun };
1545*4882a593Smuzhiyun 
fib6_node_lookup_1(struct fib6_node * root,struct lookup_args * args)1546*4882a593Smuzhiyun static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
1547*4882a593Smuzhiyun 					    struct lookup_args *args)
1548*4882a593Smuzhiyun {
1549*4882a593Smuzhiyun 	struct fib6_node *fn;
1550*4882a593Smuzhiyun 	__be32 dir;
1551*4882a593Smuzhiyun 
1552*4882a593Smuzhiyun 	if (unlikely(args->offset == 0))
1553*4882a593Smuzhiyun 		return NULL;
1554*4882a593Smuzhiyun 
1555*4882a593Smuzhiyun 	/*
1556*4882a593Smuzhiyun 	 *	Descend on a tree
1557*4882a593Smuzhiyun 	 */
1558*4882a593Smuzhiyun 
1559*4882a593Smuzhiyun 	fn = root;
1560*4882a593Smuzhiyun 
1561*4882a593Smuzhiyun 	for (;;) {
1562*4882a593Smuzhiyun 		struct fib6_node *next;
1563*4882a593Smuzhiyun 
1564*4882a593Smuzhiyun 		dir = addr_bit_set(args->addr, fn->fn_bit);
1565*4882a593Smuzhiyun 
1566*4882a593Smuzhiyun 		next = dir ? rcu_dereference(fn->right) :
1567*4882a593Smuzhiyun 			     rcu_dereference(fn->left);
1568*4882a593Smuzhiyun 
1569*4882a593Smuzhiyun 		if (next) {
1570*4882a593Smuzhiyun 			fn = next;
1571*4882a593Smuzhiyun 			continue;
1572*4882a593Smuzhiyun 		}
1573*4882a593Smuzhiyun 		break;
1574*4882a593Smuzhiyun 	}
1575*4882a593Smuzhiyun 
1576*4882a593Smuzhiyun 	while (fn) {
1577*4882a593Smuzhiyun 		struct fib6_node *subtree = FIB6_SUBTREE(fn);
1578*4882a593Smuzhiyun 
1579*4882a593Smuzhiyun 		if (subtree || fn->fn_flags & RTN_RTINFO) {
1580*4882a593Smuzhiyun 			struct fib6_info *leaf = rcu_dereference(fn->leaf);
1581*4882a593Smuzhiyun 			struct rt6key *key;
1582*4882a593Smuzhiyun 
1583*4882a593Smuzhiyun 			if (!leaf)
1584*4882a593Smuzhiyun 				goto backtrack;
1585*4882a593Smuzhiyun 
1586*4882a593Smuzhiyun 			key = (struct rt6key *) ((u8 *)leaf + args->offset);
1587*4882a593Smuzhiyun 
1588*4882a593Smuzhiyun 			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
1589*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1590*4882a593Smuzhiyun 				if (subtree) {
1591*4882a593Smuzhiyun 					struct fib6_node *sfn;
1592*4882a593Smuzhiyun 					sfn = fib6_node_lookup_1(subtree,
1593*4882a593Smuzhiyun 								 args + 1);
1594*4882a593Smuzhiyun 					if (!sfn)
1595*4882a593Smuzhiyun 						goto backtrack;
1596*4882a593Smuzhiyun 					fn = sfn;
1597*4882a593Smuzhiyun 				}
1598*4882a593Smuzhiyun #endif
1599*4882a593Smuzhiyun 				if (fn->fn_flags & RTN_RTINFO)
1600*4882a593Smuzhiyun 					return fn;
1601*4882a593Smuzhiyun 			}
1602*4882a593Smuzhiyun 		}
1603*4882a593Smuzhiyun backtrack:
1604*4882a593Smuzhiyun 		if (fn->fn_flags & RTN_ROOT)
1605*4882a593Smuzhiyun 			break;
1606*4882a593Smuzhiyun 
1607*4882a593Smuzhiyun 		fn = rcu_dereference(fn->parent);
1608*4882a593Smuzhiyun 	}
1609*4882a593Smuzhiyun 
1610*4882a593Smuzhiyun 	return NULL;
1611*4882a593Smuzhiyun }
1612*4882a593Smuzhiyun 
1613*4882a593Smuzhiyun /* called with rcu_read_lock() held
1614*4882a593Smuzhiyun  */
fib6_node_lookup(struct fib6_node * root,const struct in6_addr * daddr,const struct in6_addr * saddr)1615*4882a593Smuzhiyun struct fib6_node *fib6_node_lookup(struct fib6_node *root,
1616*4882a593Smuzhiyun 				   const struct in6_addr *daddr,
1617*4882a593Smuzhiyun 				   const struct in6_addr *saddr)
1618*4882a593Smuzhiyun {
1619*4882a593Smuzhiyun 	struct fib6_node *fn;
1620*4882a593Smuzhiyun 	struct lookup_args args[] = {
1621*4882a593Smuzhiyun 		{
1622*4882a593Smuzhiyun 			.offset = offsetof(struct fib6_info, fib6_dst),
1623*4882a593Smuzhiyun 			.addr = daddr,
1624*4882a593Smuzhiyun 		},
1625*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1626*4882a593Smuzhiyun 		{
1627*4882a593Smuzhiyun 			.offset = offsetof(struct fib6_info, fib6_src),
1628*4882a593Smuzhiyun 			.addr = saddr,
1629*4882a593Smuzhiyun 		},
1630*4882a593Smuzhiyun #endif
1631*4882a593Smuzhiyun 		{
1632*4882a593Smuzhiyun 			.offset = 0,	/* sentinel */
1633*4882a593Smuzhiyun 		}
1634*4882a593Smuzhiyun 	};
1635*4882a593Smuzhiyun 
1636*4882a593Smuzhiyun 	fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
1637*4882a593Smuzhiyun 	if (!fn || fn->fn_flags & RTN_TL_ROOT)
1638*4882a593Smuzhiyun 		fn = root;
1639*4882a593Smuzhiyun 
1640*4882a593Smuzhiyun 	return fn;
1641*4882a593Smuzhiyun }
1642*4882a593Smuzhiyun 
1643*4882a593Smuzhiyun /*
1644*4882a593Smuzhiyun  *	Get node with specified destination prefix (and source prefix,
1645*4882a593Smuzhiyun  *	if subtrees are used)
1646*4882a593Smuzhiyun  *	exact_match == true means we try to find fn with exact match of
1647*4882a593Smuzhiyun  *	the passed in prefix addr
1648*4882a593Smuzhiyun  *	exact_match == false means we try to find fn with longest prefix
1649*4882a593Smuzhiyun  *	match of the passed in prefix addr. This is useful for finding fn
1650*4882a593Smuzhiyun  *	for cached route as it will be stored in the exception table under
1651*4882a593Smuzhiyun  *	the node with longest prefix length.
1652*4882a593Smuzhiyun  */
1653*4882a593Smuzhiyun 
1654*4882a593Smuzhiyun 
fib6_locate_1(struct fib6_node * root,const struct in6_addr * addr,int plen,int offset,bool exact_match)1655*4882a593Smuzhiyun static struct fib6_node *fib6_locate_1(struct fib6_node *root,
1656*4882a593Smuzhiyun 				       const struct in6_addr *addr,
1657*4882a593Smuzhiyun 				       int plen, int offset,
1658*4882a593Smuzhiyun 				       bool exact_match)
1659*4882a593Smuzhiyun {
1660*4882a593Smuzhiyun 	struct fib6_node *fn, *prev = NULL;
1661*4882a593Smuzhiyun 
1662*4882a593Smuzhiyun 	for (fn = root; fn ; ) {
1663*4882a593Smuzhiyun 		struct fib6_info *leaf = rcu_dereference(fn->leaf);
1664*4882a593Smuzhiyun 		struct rt6key *key;
1665*4882a593Smuzhiyun 
1666*4882a593Smuzhiyun 		/* This node is being deleted */
1667*4882a593Smuzhiyun 		if (!leaf) {
1668*4882a593Smuzhiyun 			if (plen <= fn->fn_bit)
1669*4882a593Smuzhiyun 				goto out;
1670*4882a593Smuzhiyun 			else
1671*4882a593Smuzhiyun 				goto next;
1672*4882a593Smuzhiyun 		}
1673*4882a593Smuzhiyun 
1674*4882a593Smuzhiyun 		key = (struct rt6key *)((u8 *)leaf + offset);
1675*4882a593Smuzhiyun 
1676*4882a593Smuzhiyun 		/*
1677*4882a593Smuzhiyun 		 *	Prefix match
1678*4882a593Smuzhiyun 		 */
1679*4882a593Smuzhiyun 		if (plen < fn->fn_bit ||
1680*4882a593Smuzhiyun 		    !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
1681*4882a593Smuzhiyun 			goto out;
1682*4882a593Smuzhiyun 
1683*4882a593Smuzhiyun 		if (plen == fn->fn_bit)
1684*4882a593Smuzhiyun 			return fn;
1685*4882a593Smuzhiyun 
1686*4882a593Smuzhiyun 		if (fn->fn_flags & RTN_RTINFO)
1687*4882a593Smuzhiyun 			prev = fn;
1688*4882a593Smuzhiyun 
1689*4882a593Smuzhiyun next:
1690*4882a593Smuzhiyun 		/*
1691*4882a593Smuzhiyun 		 *	We have more bits to go
1692*4882a593Smuzhiyun 		 */
1693*4882a593Smuzhiyun 		if (addr_bit_set(addr, fn->fn_bit))
1694*4882a593Smuzhiyun 			fn = rcu_dereference(fn->right);
1695*4882a593Smuzhiyun 		else
1696*4882a593Smuzhiyun 			fn = rcu_dereference(fn->left);
1697*4882a593Smuzhiyun 	}
1698*4882a593Smuzhiyun out:
1699*4882a593Smuzhiyun 	if (exact_match)
1700*4882a593Smuzhiyun 		return NULL;
1701*4882a593Smuzhiyun 	else
1702*4882a593Smuzhiyun 		return prev;
1703*4882a593Smuzhiyun }
1704*4882a593Smuzhiyun 
fib6_locate(struct fib6_node * root,const struct in6_addr * daddr,int dst_len,const struct in6_addr * saddr,int src_len,bool exact_match)1705*4882a593Smuzhiyun struct fib6_node *fib6_locate(struct fib6_node *root,
1706*4882a593Smuzhiyun 			      const struct in6_addr *daddr, int dst_len,
1707*4882a593Smuzhiyun 			      const struct in6_addr *saddr, int src_len,
1708*4882a593Smuzhiyun 			      bool exact_match)
1709*4882a593Smuzhiyun {
1710*4882a593Smuzhiyun 	struct fib6_node *fn;
1711*4882a593Smuzhiyun 
1712*4882a593Smuzhiyun 	fn = fib6_locate_1(root, daddr, dst_len,
1713*4882a593Smuzhiyun 			   offsetof(struct fib6_info, fib6_dst),
1714*4882a593Smuzhiyun 			   exact_match);
1715*4882a593Smuzhiyun 
1716*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1717*4882a593Smuzhiyun 	if (src_len) {
1718*4882a593Smuzhiyun 		WARN_ON(saddr == NULL);
1719*4882a593Smuzhiyun 		if (fn) {
1720*4882a593Smuzhiyun 			struct fib6_node *subtree = FIB6_SUBTREE(fn);
1721*4882a593Smuzhiyun 
1722*4882a593Smuzhiyun 			if (subtree) {
1723*4882a593Smuzhiyun 				fn = fib6_locate_1(subtree, saddr, src_len,
1724*4882a593Smuzhiyun 					   offsetof(struct fib6_info, fib6_src),
1725*4882a593Smuzhiyun 					   exact_match);
1726*4882a593Smuzhiyun 			}
1727*4882a593Smuzhiyun 		}
1728*4882a593Smuzhiyun 	}
1729*4882a593Smuzhiyun #endif
1730*4882a593Smuzhiyun 
1731*4882a593Smuzhiyun 	if (fn && fn->fn_flags & RTN_RTINFO)
1732*4882a593Smuzhiyun 		return fn;
1733*4882a593Smuzhiyun 
1734*4882a593Smuzhiyun 	return NULL;
1735*4882a593Smuzhiyun }
1736*4882a593Smuzhiyun 
1737*4882a593Smuzhiyun 
1738*4882a593Smuzhiyun /*
1739*4882a593Smuzhiyun  *	Deletion
1740*4882a593Smuzhiyun  *
1741*4882a593Smuzhiyun  */
1742*4882a593Smuzhiyun 
fib6_find_prefix(struct net * net,struct fib6_table * table,struct fib6_node * fn)1743*4882a593Smuzhiyun static struct fib6_info *fib6_find_prefix(struct net *net,
1744*4882a593Smuzhiyun 					 struct fib6_table *table,
1745*4882a593Smuzhiyun 					 struct fib6_node *fn)
1746*4882a593Smuzhiyun {
1747*4882a593Smuzhiyun 	struct fib6_node *child_left, *child_right;
1748*4882a593Smuzhiyun 
1749*4882a593Smuzhiyun 	if (fn->fn_flags & RTN_ROOT)
1750*4882a593Smuzhiyun 		return net->ipv6.fib6_null_entry;
1751*4882a593Smuzhiyun 
1752*4882a593Smuzhiyun 	while (fn) {
1753*4882a593Smuzhiyun 		child_left = rcu_dereference_protected(fn->left,
1754*4882a593Smuzhiyun 				    lockdep_is_held(&table->tb6_lock));
1755*4882a593Smuzhiyun 		child_right = rcu_dereference_protected(fn->right,
1756*4882a593Smuzhiyun 				    lockdep_is_held(&table->tb6_lock));
1757*4882a593Smuzhiyun 		if (child_left)
1758*4882a593Smuzhiyun 			return rcu_dereference_protected(child_left->leaf,
1759*4882a593Smuzhiyun 					lockdep_is_held(&table->tb6_lock));
1760*4882a593Smuzhiyun 		if (child_right)
1761*4882a593Smuzhiyun 			return rcu_dereference_protected(child_right->leaf,
1762*4882a593Smuzhiyun 					lockdep_is_held(&table->tb6_lock));
1763*4882a593Smuzhiyun 
1764*4882a593Smuzhiyun 		fn = FIB6_SUBTREE(fn);
1765*4882a593Smuzhiyun 	}
1766*4882a593Smuzhiyun 	return NULL;
1767*4882a593Smuzhiyun }
1768*4882a593Smuzhiyun 
1769*4882a593Smuzhiyun /*
1770*4882a593Smuzhiyun  *	Called to trim the tree of intermediate nodes when possible. "fn"
1771*4882a593Smuzhiyun  *	is the node we want to try and remove.
1772*4882a593Smuzhiyun  *	Need to own table->tb6_lock
1773*4882a593Smuzhiyun  */
1774*4882a593Smuzhiyun 
fib6_repair_tree(struct net * net,struct fib6_table * table,struct fib6_node * fn)1775*4882a593Smuzhiyun static struct fib6_node *fib6_repair_tree(struct net *net,
1776*4882a593Smuzhiyun 					  struct fib6_table *table,
1777*4882a593Smuzhiyun 					  struct fib6_node *fn)
1778*4882a593Smuzhiyun {
1779*4882a593Smuzhiyun 	int children;
1780*4882a593Smuzhiyun 	int nstate;
1781*4882a593Smuzhiyun 	struct fib6_node *child;
1782*4882a593Smuzhiyun 	struct fib6_walker *w;
1783*4882a593Smuzhiyun 	int iter = 0;
1784*4882a593Smuzhiyun 
1785*4882a593Smuzhiyun 	/* Set fn->leaf to null_entry for root node. */
1786*4882a593Smuzhiyun 	if (fn->fn_flags & RTN_TL_ROOT) {
1787*4882a593Smuzhiyun 		rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
1788*4882a593Smuzhiyun 		return fn;
1789*4882a593Smuzhiyun 	}
1790*4882a593Smuzhiyun 
1791*4882a593Smuzhiyun 	for (;;) {
1792*4882a593Smuzhiyun 		struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1793*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1794*4882a593Smuzhiyun 		struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
1795*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1796*4882a593Smuzhiyun 		struct fib6_node *pn = rcu_dereference_protected(fn->parent,
1797*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1798*4882a593Smuzhiyun 		struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
1799*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1800*4882a593Smuzhiyun 		struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
1801*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1802*4882a593Smuzhiyun 		struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
1803*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1804*4882a593Smuzhiyun 		struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
1805*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1806*4882a593Smuzhiyun 		struct fib6_info *new_fn_leaf;
1807*4882a593Smuzhiyun 
1808*4882a593Smuzhiyun 		RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1809*4882a593Smuzhiyun 		iter++;
1810*4882a593Smuzhiyun 
1811*4882a593Smuzhiyun 		WARN_ON(fn->fn_flags & RTN_RTINFO);
1812*4882a593Smuzhiyun 		WARN_ON(fn->fn_flags & RTN_TL_ROOT);
1813*4882a593Smuzhiyun 		WARN_ON(fn_leaf);
1814*4882a593Smuzhiyun 
1815*4882a593Smuzhiyun 		children = 0;
1816*4882a593Smuzhiyun 		child = NULL;
1817*4882a593Smuzhiyun 		if (fn_r) {
1818*4882a593Smuzhiyun 			child = fn_r;
1819*4882a593Smuzhiyun 			children |= 1;
1820*4882a593Smuzhiyun 		}
1821*4882a593Smuzhiyun 		if (fn_l) {
1822*4882a593Smuzhiyun 			child = fn_l;
1823*4882a593Smuzhiyun 			children |= 2;
1824*4882a593Smuzhiyun 		}
1825*4882a593Smuzhiyun 
1826*4882a593Smuzhiyun 		if (children == 3 || FIB6_SUBTREE(fn)
1827*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1828*4882a593Smuzhiyun 		    /* Subtree root (i.e. fn) may have one child */
1829*4882a593Smuzhiyun 		    || (children && fn->fn_flags & RTN_ROOT)
1830*4882a593Smuzhiyun #endif
1831*4882a593Smuzhiyun 		    ) {
1832*4882a593Smuzhiyun 			new_fn_leaf = fib6_find_prefix(net, table, fn);
1833*4882a593Smuzhiyun #if RT6_DEBUG >= 2
1834*4882a593Smuzhiyun 			if (!new_fn_leaf) {
1835*4882a593Smuzhiyun 				WARN_ON(!new_fn_leaf);
1836*4882a593Smuzhiyun 				new_fn_leaf = net->ipv6.fib6_null_entry;
1837*4882a593Smuzhiyun 			}
1838*4882a593Smuzhiyun #endif
1839*4882a593Smuzhiyun 			fib6_info_hold(new_fn_leaf);
1840*4882a593Smuzhiyun 			rcu_assign_pointer(fn->leaf, new_fn_leaf);
1841*4882a593Smuzhiyun 			return pn;
1842*4882a593Smuzhiyun 		}
1843*4882a593Smuzhiyun 
1844*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1845*4882a593Smuzhiyun 		if (FIB6_SUBTREE(pn) == fn) {
1846*4882a593Smuzhiyun 			WARN_ON(!(fn->fn_flags & RTN_ROOT));
1847*4882a593Smuzhiyun 			RCU_INIT_POINTER(pn->subtree, NULL);
1848*4882a593Smuzhiyun 			nstate = FWS_L;
1849*4882a593Smuzhiyun 		} else {
1850*4882a593Smuzhiyun 			WARN_ON(fn->fn_flags & RTN_ROOT);
1851*4882a593Smuzhiyun #endif
1852*4882a593Smuzhiyun 			if (pn_r == fn)
1853*4882a593Smuzhiyun 				rcu_assign_pointer(pn->right, child);
1854*4882a593Smuzhiyun 			else if (pn_l == fn)
1855*4882a593Smuzhiyun 				rcu_assign_pointer(pn->left, child);
1856*4882a593Smuzhiyun #if RT6_DEBUG >= 2
1857*4882a593Smuzhiyun 			else
1858*4882a593Smuzhiyun 				WARN_ON(1);
1859*4882a593Smuzhiyun #endif
1860*4882a593Smuzhiyun 			if (child)
1861*4882a593Smuzhiyun 				rcu_assign_pointer(child->parent, pn);
1862*4882a593Smuzhiyun 			nstate = FWS_R;
1863*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
1864*4882a593Smuzhiyun 		}
1865*4882a593Smuzhiyun #endif
1866*4882a593Smuzhiyun 
1867*4882a593Smuzhiyun 		read_lock(&net->ipv6.fib6_walker_lock);
1868*4882a593Smuzhiyun 		FOR_WALKERS(net, w) {
1869*4882a593Smuzhiyun 			if (!child) {
1870*4882a593Smuzhiyun 				if (w->node == fn) {
1871*4882a593Smuzhiyun 					RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
1872*4882a593Smuzhiyun 					w->node = pn;
1873*4882a593Smuzhiyun 					w->state = nstate;
1874*4882a593Smuzhiyun 				}
1875*4882a593Smuzhiyun 			} else {
1876*4882a593Smuzhiyun 				if (w->node == fn) {
1877*4882a593Smuzhiyun 					w->node = child;
1878*4882a593Smuzhiyun 					if (children&2) {
1879*4882a593Smuzhiyun 						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
1880*4882a593Smuzhiyun 						w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
1881*4882a593Smuzhiyun 					} else {
1882*4882a593Smuzhiyun 						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
1883*4882a593Smuzhiyun 						w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
1884*4882a593Smuzhiyun 					}
1885*4882a593Smuzhiyun 				}
1886*4882a593Smuzhiyun 			}
1887*4882a593Smuzhiyun 		}
1888*4882a593Smuzhiyun 		read_unlock(&net->ipv6.fib6_walker_lock);
1889*4882a593Smuzhiyun 
1890*4882a593Smuzhiyun 		node_free(net, fn);
1891*4882a593Smuzhiyun 		if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
1892*4882a593Smuzhiyun 			return pn;
1893*4882a593Smuzhiyun 
1894*4882a593Smuzhiyun 		RCU_INIT_POINTER(pn->leaf, NULL);
1895*4882a593Smuzhiyun 		fib6_info_release(pn_leaf);
1896*4882a593Smuzhiyun 		fn = pn;
1897*4882a593Smuzhiyun 	}
1898*4882a593Smuzhiyun }
1899*4882a593Smuzhiyun 
fib6_del_route(struct fib6_table * table,struct fib6_node * fn,struct fib6_info __rcu ** rtp,struct nl_info * info)1900*4882a593Smuzhiyun static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1901*4882a593Smuzhiyun 			   struct fib6_info __rcu **rtp, struct nl_info *info)
1902*4882a593Smuzhiyun {
1903*4882a593Smuzhiyun 	struct fib6_info *leaf, *replace_rt = NULL;
1904*4882a593Smuzhiyun 	struct fib6_walker *w;
1905*4882a593Smuzhiyun 	struct fib6_info *rt = rcu_dereference_protected(*rtp,
1906*4882a593Smuzhiyun 				    lockdep_is_held(&table->tb6_lock));
1907*4882a593Smuzhiyun 	struct net *net = info->nl_net;
1908*4882a593Smuzhiyun 	bool notify_del = false;
1909*4882a593Smuzhiyun 
1910*4882a593Smuzhiyun 	RT6_TRACE("fib6_del_route\n");
1911*4882a593Smuzhiyun 
1912*4882a593Smuzhiyun 	/* If the deleted route is the first in the node and it is not part of
1913*4882a593Smuzhiyun 	 * a multipath route, then we need to replace it with the next route
1914*4882a593Smuzhiyun 	 * in the node, if exists.
1915*4882a593Smuzhiyun 	 */
1916*4882a593Smuzhiyun 	leaf = rcu_dereference_protected(fn->leaf,
1917*4882a593Smuzhiyun 					 lockdep_is_held(&table->tb6_lock));
1918*4882a593Smuzhiyun 	if (leaf == rt && !rt->fib6_nsiblings) {
1919*4882a593Smuzhiyun 		if (rcu_access_pointer(rt->fib6_next))
1920*4882a593Smuzhiyun 			replace_rt = rcu_dereference_protected(rt->fib6_next,
1921*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1922*4882a593Smuzhiyun 		else
1923*4882a593Smuzhiyun 			notify_del = true;
1924*4882a593Smuzhiyun 	}
1925*4882a593Smuzhiyun 
1926*4882a593Smuzhiyun 	/* Unlink it */
1927*4882a593Smuzhiyun 	*rtp = rt->fib6_next;
1928*4882a593Smuzhiyun 	rt->fib6_node = NULL;
1929*4882a593Smuzhiyun 	net->ipv6.rt6_stats->fib_rt_entries--;
1930*4882a593Smuzhiyun 	net->ipv6.rt6_stats->fib_discarded_routes++;
1931*4882a593Smuzhiyun 
1932*4882a593Smuzhiyun 	/* Reset round-robin state, if necessary */
1933*4882a593Smuzhiyun 	if (rcu_access_pointer(fn->rr_ptr) == rt)
1934*4882a593Smuzhiyun 		fn->rr_ptr = NULL;
1935*4882a593Smuzhiyun 
1936*4882a593Smuzhiyun 	/* Remove this entry from other siblings */
1937*4882a593Smuzhiyun 	if (rt->fib6_nsiblings) {
1938*4882a593Smuzhiyun 		struct fib6_info *sibling, *next_sibling;
1939*4882a593Smuzhiyun 
1940*4882a593Smuzhiyun 		/* The route is deleted from a multipath route. If this
1941*4882a593Smuzhiyun 		 * multipath route is the first route in the node, then we need
1942*4882a593Smuzhiyun 		 * to emit a delete notification. Otherwise, we need to skip
1943*4882a593Smuzhiyun 		 * the notification.
1944*4882a593Smuzhiyun 		 */
1945*4882a593Smuzhiyun 		if (rt->fib6_metric == leaf->fib6_metric &&
1946*4882a593Smuzhiyun 		    rt6_qualify_for_ecmp(leaf))
1947*4882a593Smuzhiyun 			notify_del = true;
1948*4882a593Smuzhiyun 		list_for_each_entry_safe(sibling, next_sibling,
1949*4882a593Smuzhiyun 					 &rt->fib6_siblings, fib6_siblings)
1950*4882a593Smuzhiyun 			sibling->fib6_nsiblings--;
1951*4882a593Smuzhiyun 		rt->fib6_nsiblings = 0;
1952*4882a593Smuzhiyun 		list_del_init(&rt->fib6_siblings);
1953*4882a593Smuzhiyun 		rt6_multipath_rebalance(next_sibling);
1954*4882a593Smuzhiyun 	}
1955*4882a593Smuzhiyun 
1956*4882a593Smuzhiyun 	/* Adjust walkers */
1957*4882a593Smuzhiyun 	read_lock(&net->ipv6.fib6_walker_lock);
1958*4882a593Smuzhiyun 	FOR_WALKERS(net, w) {
1959*4882a593Smuzhiyun 		if (w->state == FWS_C && w->leaf == rt) {
1960*4882a593Smuzhiyun 			RT6_TRACE("walker %p adjusted by delroute\n", w);
1961*4882a593Smuzhiyun 			w->leaf = rcu_dereference_protected(rt->fib6_next,
1962*4882a593Smuzhiyun 					    lockdep_is_held(&table->tb6_lock));
1963*4882a593Smuzhiyun 			if (!w->leaf)
1964*4882a593Smuzhiyun 				w->state = FWS_U;
1965*4882a593Smuzhiyun 		}
1966*4882a593Smuzhiyun 	}
1967*4882a593Smuzhiyun 	read_unlock(&net->ipv6.fib6_walker_lock);
1968*4882a593Smuzhiyun 
1969*4882a593Smuzhiyun 	/* If it was last route, call fib6_repair_tree() to:
1970*4882a593Smuzhiyun 	 * 1. For root node, put back null_entry as how the table was created.
1971*4882a593Smuzhiyun 	 * 2. For other nodes, expunge its radix tree node.
1972*4882a593Smuzhiyun 	 */
1973*4882a593Smuzhiyun 	if (!rcu_access_pointer(fn->leaf)) {
1974*4882a593Smuzhiyun 		if (!(fn->fn_flags & RTN_TL_ROOT)) {
1975*4882a593Smuzhiyun 			fn->fn_flags &= ~RTN_RTINFO;
1976*4882a593Smuzhiyun 			net->ipv6.rt6_stats->fib_route_nodes--;
1977*4882a593Smuzhiyun 		}
1978*4882a593Smuzhiyun 		fn = fib6_repair_tree(net, table, fn);
1979*4882a593Smuzhiyun 	}
1980*4882a593Smuzhiyun 
1981*4882a593Smuzhiyun 	fib6_purge_rt(rt, fn, net);
1982*4882a593Smuzhiyun 
1983*4882a593Smuzhiyun 	if (!info->skip_notify_kernel) {
1984*4882a593Smuzhiyun 		if (notify_del)
1985*4882a593Smuzhiyun 			call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1986*4882a593Smuzhiyun 						  rt, NULL);
1987*4882a593Smuzhiyun 		else if (replace_rt)
1988*4882a593Smuzhiyun 			call_fib6_entry_notifiers_replace(net, replace_rt);
1989*4882a593Smuzhiyun 	}
1990*4882a593Smuzhiyun 	if (!info->skip_notify)
1991*4882a593Smuzhiyun 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
1992*4882a593Smuzhiyun 
1993*4882a593Smuzhiyun 	fib6_info_release(rt);
1994*4882a593Smuzhiyun }
1995*4882a593Smuzhiyun 
1996*4882a593Smuzhiyun /* Need to own table->tb6_lock */
fib6_del(struct fib6_info * rt,struct nl_info * info)1997*4882a593Smuzhiyun int fib6_del(struct fib6_info *rt, struct nl_info *info)
1998*4882a593Smuzhiyun {
1999*4882a593Smuzhiyun 	struct net *net = info->nl_net;
2000*4882a593Smuzhiyun 	struct fib6_info __rcu **rtp;
2001*4882a593Smuzhiyun 	struct fib6_info __rcu **rtp_next;
2002*4882a593Smuzhiyun 	struct fib6_table *table;
2003*4882a593Smuzhiyun 	struct fib6_node *fn;
2004*4882a593Smuzhiyun 
2005*4882a593Smuzhiyun 	if (rt == net->ipv6.fib6_null_entry)
2006*4882a593Smuzhiyun 		return -ENOENT;
2007*4882a593Smuzhiyun 
2008*4882a593Smuzhiyun 	table = rt->fib6_table;
2009*4882a593Smuzhiyun 	fn = rcu_dereference_protected(rt->fib6_node,
2010*4882a593Smuzhiyun 				       lockdep_is_held(&table->tb6_lock));
2011*4882a593Smuzhiyun 	if (!fn)
2012*4882a593Smuzhiyun 		return -ENOENT;
2013*4882a593Smuzhiyun 
2014*4882a593Smuzhiyun 	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
2015*4882a593Smuzhiyun 
2016*4882a593Smuzhiyun 	/*
2017*4882a593Smuzhiyun 	 *	Walk the leaf entries looking for ourself
2018*4882a593Smuzhiyun 	 */
2019*4882a593Smuzhiyun 
2020*4882a593Smuzhiyun 	for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
2021*4882a593Smuzhiyun 		struct fib6_info *cur = rcu_dereference_protected(*rtp,
2022*4882a593Smuzhiyun 					lockdep_is_held(&table->tb6_lock));
2023*4882a593Smuzhiyun 		if (rt == cur) {
2024*4882a593Smuzhiyun 			if (fib6_requires_src(cur))
2025*4882a593Smuzhiyun 				fib6_routes_require_src_dec(info->nl_net);
2026*4882a593Smuzhiyun 			fib6_del_route(table, fn, rtp, info);
2027*4882a593Smuzhiyun 			return 0;
2028*4882a593Smuzhiyun 		}
2029*4882a593Smuzhiyun 		rtp_next = &cur->fib6_next;
2030*4882a593Smuzhiyun 	}
2031*4882a593Smuzhiyun 	return -ENOENT;
2032*4882a593Smuzhiyun }
2033*4882a593Smuzhiyun 
2034*4882a593Smuzhiyun /*
2035*4882a593Smuzhiyun  *	Tree traversal function.
2036*4882a593Smuzhiyun  *
2037*4882a593Smuzhiyun  *	Certainly, it is not interrupt safe.
2038*4882a593Smuzhiyun  *	However, it is internally reenterable wrt itself and fib6_add/fib6_del.
2039*4882a593Smuzhiyun  *	It means, that we can modify tree during walking
2040*4882a593Smuzhiyun  *	and use this function for garbage collection, clone pruning,
2041*4882a593Smuzhiyun  *	cleaning tree when a device goes down etc. etc.
2042*4882a593Smuzhiyun  *
2043*4882a593Smuzhiyun  *	It guarantees that every node will be traversed,
2044*4882a593Smuzhiyun  *	and that it will be traversed only once.
2045*4882a593Smuzhiyun  *
2046*4882a593Smuzhiyun  *	Callback function w->func may return:
2047*4882a593Smuzhiyun  *	0 -> continue walking.
2048*4882a593Smuzhiyun  *	positive value -> walking is suspended (used by tree dumps,
2049*4882a593Smuzhiyun  *	and probably by gc, if it will be split to several slices)
2050*4882a593Smuzhiyun  *	negative value -> terminate walking.
2051*4882a593Smuzhiyun  *
2052*4882a593Smuzhiyun  *	The function itself returns:
2053*4882a593Smuzhiyun  *	0   -> walk is complete.
2054*4882a593Smuzhiyun  *	>0  -> walk is incomplete (i.e. suspended)
2055*4882a593Smuzhiyun  *	<0  -> walk is terminated by an error.
2056*4882a593Smuzhiyun  *
2057*4882a593Smuzhiyun  *	This function is called with tb6_lock held.
2058*4882a593Smuzhiyun  */
2059*4882a593Smuzhiyun 
fib6_walk_continue(struct fib6_walker * w)2060*4882a593Smuzhiyun static int fib6_walk_continue(struct fib6_walker *w)
2061*4882a593Smuzhiyun {
2062*4882a593Smuzhiyun 	struct fib6_node *fn, *pn, *left, *right;
2063*4882a593Smuzhiyun 
2064*4882a593Smuzhiyun 	/* w->root should always be table->tb6_root */
2065*4882a593Smuzhiyun 	WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
2066*4882a593Smuzhiyun 
2067*4882a593Smuzhiyun 	for (;;) {
2068*4882a593Smuzhiyun 		fn = w->node;
2069*4882a593Smuzhiyun 		if (!fn)
2070*4882a593Smuzhiyun 			return 0;
2071*4882a593Smuzhiyun 
2072*4882a593Smuzhiyun 		switch (w->state) {
2073*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
2074*4882a593Smuzhiyun 		case FWS_S:
2075*4882a593Smuzhiyun 			if (FIB6_SUBTREE(fn)) {
2076*4882a593Smuzhiyun 				w->node = FIB6_SUBTREE(fn);
2077*4882a593Smuzhiyun 				continue;
2078*4882a593Smuzhiyun 			}
2079*4882a593Smuzhiyun 			w->state = FWS_L;
2080*4882a593Smuzhiyun 			fallthrough;
2081*4882a593Smuzhiyun #endif
2082*4882a593Smuzhiyun 		case FWS_L:
2083*4882a593Smuzhiyun 			left = rcu_dereference_protected(fn->left, 1);
2084*4882a593Smuzhiyun 			if (left) {
2085*4882a593Smuzhiyun 				w->node = left;
2086*4882a593Smuzhiyun 				w->state = FWS_INIT;
2087*4882a593Smuzhiyun 				continue;
2088*4882a593Smuzhiyun 			}
2089*4882a593Smuzhiyun 			w->state = FWS_R;
2090*4882a593Smuzhiyun 			fallthrough;
2091*4882a593Smuzhiyun 		case FWS_R:
2092*4882a593Smuzhiyun 			right = rcu_dereference_protected(fn->right, 1);
2093*4882a593Smuzhiyun 			if (right) {
2094*4882a593Smuzhiyun 				w->node = right;
2095*4882a593Smuzhiyun 				w->state = FWS_INIT;
2096*4882a593Smuzhiyun 				continue;
2097*4882a593Smuzhiyun 			}
2098*4882a593Smuzhiyun 			w->state = FWS_C;
2099*4882a593Smuzhiyun 			w->leaf = rcu_dereference_protected(fn->leaf, 1);
2100*4882a593Smuzhiyun 			fallthrough;
2101*4882a593Smuzhiyun 		case FWS_C:
2102*4882a593Smuzhiyun 			if (w->leaf && fn->fn_flags & RTN_RTINFO) {
2103*4882a593Smuzhiyun 				int err;
2104*4882a593Smuzhiyun 
2105*4882a593Smuzhiyun 				if (w->skip) {
2106*4882a593Smuzhiyun 					w->skip--;
2107*4882a593Smuzhiyun 					goto skip;
2108*4882a593Smuzhiyun 				}
2109*4882a593Smuzhiyun 
2110*4882a593Smuzhiyun 				err = w->func(w);
2111*4882a593Smuzhiyun 				if (err)
2112*4882a593Smuzhiyun 					return err;
2113*4882a593Smuzhiyun 
2114*4882a593Smuzhiyun 				w->count++;
2115*4882a593Smuzhiyun 				continue;
2116*4882a593Smuzhiyun 			}
2117*4882a593Smuzhiyun skip:
2118*4882a593Smuzhiyun 			w->state = FWS_U;
2119*4882a593Smuzhiyun 			fallthrough;
2120*4882a593Smuzhiyun 		case FWS_U:
2121*4882a593Smuzhiyun 			if (fn == w->root)
2122*4882a593Smuzhiyun 				return 0;
2123*4882a593Smuzhiyun 			pn = rcu_dereference_protected(fn->parent, 1);
2124*4882a593Smuzhiyun 			left = rcu_dereference_protected(pn->left, 1);
2125*4882a593Smuzhiyun 			right = rcu_dereference_protected(pn->right, 1);
2126*4882a593Smuzhiyun 			w->node = pn;
2127*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
2128*4882a593Smuzhiyun 			if (FIB6_SUBTREE(pn) == fn) {
2129*4882a593Smuzhiyun 				WARN_ON(!(fn->fn_flags & RTN_ROOT));
2130*4882a593Smuzhiyun 				w->state = FWS_L;
2131*4882a593Smuzhiyun 				continue;
2132*4882a593Smuzhiyun 			}
2133*4882a593Smuzhiyun #endif
2134*4882a593Smuzhiyun 			if (left == fn) {
2135*4882a593Smuzhiyun 				w->state = FWS_R;
2136*4882a593Smuzhiyun 				continue;
2137*4882a593Smuzhiyun 			}
2138*4882a593Smuzhiyun 			if (right == fn) {
2139*4882a593Smuzhiyun 				w->state = FWS_C;
2140*4882a593Smuzhiyun 				w->leaf = rcu_dereference_protected(w->node->leaf, 1);
2141*4882a593Smuzhiyun 				continue;
2142*4882a593Smuzhiyun 			}
2143*4882a593Smuzhiyun #if RT6_DEBUG >= 2
2144*4882a593Smuzhiyun 			WARN_ON(1);
2145*4882a593Smuzhiyun #endif
2146*4882a593Smuzhiyun 		}
2147*4882a593Smuzhiyun 	}
2148*4882a593Smuzhiyun }
2149*4882a593Smuzhiyun 
fib6_walk(struct net * net,struct fib6_walker * w)2150*4882a593Smuzhiyun static int fib6_walk(struct net *net, struct fib6_walker *w)
2151*4882a593Smuzhiyun {
2152*4882a593Smuzhiyun 	int res;
2153*4882a593Smuzhiyun 
2154*4882a593Smuzhiyun 	w->state = FWS_INIT;
2155*4882a593Smuzhiyun 	w->node = w->root;
2156*4882a593Smuzhiyun 
2157*4882a593Smuzhiyun 	fib6_walker_link(net, w);
2158*4882a593Smuzhiyun 	res = fib6_walk_continue(w);
2159*4882a593Smuzhiyun 	if (res <= 0)
2160*4882a593Smuzhiyun 		fib6_walker_unlink(net, w);
2161*4882a593Smuzhiyun 	return res;
2162*4882a593Smuzhiyun }
2163*4882a593Smuzhiyun 
fib6_clean_node(struct fib6_walker * w)2164*4882a593Smuzhiyun static int fib6_clean_node(struct fib6_walker *w)
2165*4882a593Smuzhiyun {
2166*4882a593Smuzhiyun 	int res;
2167*4882a593Smuzhiyun 	struct fib6_info *rt;
2168*4882a593Smuzhiyun 	struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
2169*4882a593Smuzhiyun 	struct nl_info info = {
2170*4882a593Smuzhiyun 		.nl_net = c->net,
2171*4882a593Smuzhiyun 		.skip_notify = c->skip_notify,
2172*4882a593Smuzhiyun 	};
2173*4882a593Smuzhiyun 
2174*4882a593Smuzhiyun 	if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
2175*4882a593Smuzhiyun 	    READ_ONCE(w->node->fn_sernum) != c->sernum)
2176*4882a593Smuzhiyun 		WRITE_ONCE(w->node->fn_sernum, c->sernum);
2177*4882a593Smuzhiyun 
2178*4882a593Smuzhiyun 	if (!c->func) {
2179*4882a593Smuzhiyun 		WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
2180*4882a593Smuzhiyun 		w->leaf = NULL;
2181*4882a593Smuzhiyun 		return 0;
2182*4882a593Smuzhiyun 	}
2183*4882a593Smuzhiyun 
2184*4882a593Smuzhiyun 	for_each_fib6_walker_rt(w) {
2185*4882a593Smuzhiyun 		res = c->func(rt, c->arg);
2186*4882a593Smuzhiyun 		if (res == -1) {
2187*4882a593Smuzhiyun 			w->leaf = rt;
2188*4882a593Smuzhiyun 			res = fib6_del(rt, &info);
2189*4882a593Smuzhiyun 			if (res) {
2190*4882a593Smuzhiyun #if RT6_DEBUG >= 2
2191*4882a593Smuzhiyun 				pr_debug("%s: del failed: rt=%p@%p err=%d\n",
2192*4882a593Smuzhiyun 					 __func__, rt,
2193*4882a593Smuzhiyun 					 rcu_access_pointer(rt->fib6_node),
2194*4882a593Smuzhiyun 					 res);
2195*4882a593Smuzhiyun #endif
2196*4882a593Smuzhiyun 				continue;
2197*4882a593Smuzhiyun 			}
2198*4882a593Smuzhiyun 			return 0;
2199*4882a593Smuzhiyun 		} else if (res == -2) {
2200*4882a593Smuzhiyun 			if (WARN_ON(!rt->fib6_nsiblings))
2201*4882a593Smuzhiyun 				continue;
2202*4882a593Smuzhiyun 			rt = list_last_entry(&rt->fib6_siblings,
2203*4882a593Smuzhiyun 					     struct fib6_info, fib6_siblings);
2204*4882a593Smuzhiyun 			continue;
2205*4882a593Smuzhiyun 		}
2206*4882a593Smuzhiyun 		WARN_ON(res != 0);
2207*4882a593Smuzhiyun 	}
2208*4882a593Smuzhiyun 	w->leaf = rt;
2209*4882a593Smuzhiyun 	return 0;
2210*4882a593Smuzhiyun }
2211*4882a593Smuzhiyun 
2212*4882a593Smuzhiyun /*
2213*4882a593Smuzhiyun  *	Convenient frontend to tree walker.
2214*4882a593Smuzhiyun  *
2215*4882a593Smuzhiyun  *	func is called on each route.
2216*4882a593Smuzhiyun  *		It may return -2 -> skip multipath route.
2217*4882a593Smuzhiyun  *			      -1 -> delete this route.
2218*4882a593Smuzhiyun  *		              0  -> continue walking
2219*4882a593Smuzhiyun  */
2220*4882a593Smuzhiyun 
fib6_clean_tree(struct net * net,struct fib6_node * root,int (* func)(struct fib6_info *,void * arg),int sernum,void * arg,bool skip_notify)2221*4882a593Smuzhiyun static void fib6_clean_tree(struct net *net, struct fib6_node *root,
2222*4882a593Smuzhiyun 			    int (*func)(struct fib6_info *, void *arg),
2223*4882a593Smuzhiyun 			    int sernum, void *arg, bool skip_notify)
2224*4882a593Smuzhiyun {
2225*4882a593Smuzhiyun 	struct fib6_cleaner c;
2226*4882a593Smuzhiyun 
2227*4882a593Smuzhiyun 	c.w.root = root;
2228*4882a593Smuzhiyun 	c.w.func = fib6_clean_node;
2229*4882a593Smuzhiyun 	c.w.count = 0;
2230*4882a593Smuzhiyun 	c.w.skip = 0;
2231*4882a593Smuzhiyun 	c.w.skip_in_node = 0;
2232*4882a593Smuzhiyun 	c.func = func;
2233*4882a593Smuzhiyun 	c.sernum = sernum;
2234*4882a593Smuzhiyun 	c.arg = arg;
2235*4882a593Smuzhiyun 	c.net = net;
2236*4882a593Smuzhiyun 	c.skip_notify = skip_notify;
2237*4882a593Smuzhiyun 
2238*4882a593Smuzhiyun 	fib6_walk(net, &c.w);
2239*4882a593Smuzhiyun }
2240*4882a593Smuzhiyun 
__fib6_clean_all(struct net * net,int (* func)(struct fib6_info *,void *),int sernum,void * arg,bool skip_notify)2241*4882a593Smuzhiyun static void __fib6_clean_all(struct net *net,
2242*4882a593Smuzhiyun 			     int (*func)(struct fib6_info *, void *),
2243*4882a593Smuzhiyun 			     int sernum, void *arg, bool skip_notify)
2244*4882a593Smuzhiyun {
2245*4882a593Smuzhiyun 	struct fib6_table *table;
2246*4882a593Smuzhiyun 	struct hlist_head *head;
2247*4882a593Smuzhiyun 	unsigned int h;
2248*4882a593Smuzhiyun 
2249*4882a593Smuzhiyun 	rcu_read_lock();
2250*4882a593Smuzhiyun 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2251*4882a593Smuzhiyun 		head = &net->ipv6.fib_table_hash[h];
2252*4882a593Smuzhiyun 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2253*4882a593Smuzhiyun 			spin_lock_bh(&table->tb6_lock);
2254*4882a593Smuzhiyun 			fib6_clean_tree(net, &table->tb6_root,
2255*4882a593Smuzhiyun 					func, sernum, arg, skip_notify);
2256*4882a593Smuzhiyun 			spin_unlock_bh(&table->tb6_lock);
2257*4882a593Smuzhiyun 		}
2258*4882a593Smuzhiyun 	}
2259*4882a593Smuzhiyun 	rcu_read_unlock();
2260*4882a593Smuzhiyun }
2261*4882a593Smuzhiyun 
fib6_clean_all(struct net * net,int (* func)(struct fib6_info *,void *),void * arg)2262*4882a593Smuzhiyun void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
2263*4882a593Smuzhiyun 		    void *arg)
2264*4882a593Smuzhiyun {
2265*4882a593Smuzhiyun 	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
2266*4882a593Smuzhiyun }
2267*4882a593Smuzhiyun 
fib6_clean_all_skip_notify(struct net * net,int (* func)(struct fib6_info *,void *),void * arg)2268*4882a593Smuzhiyun void fib6_clean_all_skip_notify(struct net *net,
2269*4882a593Smuzhiyun 				int (*func)(struct fib6_info *, void *),
2270*4882a593Smuzhiyun 				void *arg)
2271*4882a593Smuzhiyun {
2272*4882a593Smuzhiyun 	__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
2273*4882a593Smuzhiyun }
2274*4882a593Smuzhiyun 
fib6_flush_trees(struct net * net)2275*4882a593Smuzhiyun static void fib6_flush_trees(struct net *net)
2276*4882a593Smuzhiyun {
2277*4882a593Smuzhiyun 	int new_sernum = fib6_new_sernum(net);
2278*4882a593Smuzhiyun 
2279*4882a593Smuzhiyun 	__fib6_clean_all(net, NULL, new_sernum, NULL, false);
2280*4882a593Smuzhiyun }
2281*4882a593Smuzhiyun 
2282*4882a593Smuzhiyun /*
2283*4882a593Smuzhiyun  *	Garbage collection
2284*4882a593Smuzhiyun  */
2285*4882a593Smuzhiyun 
fib6_age(struct fib6_info * rt,void * arg)2286*4882a593Smuzhiyun static int fib6_age(struct fib6_info *rt, void *arg)
2287*4882a593Smuzhiyun {
2288*4882a593Smuzhiyun 	struct fib6_gc_args *gc_args = arg;
2289*4882a593Smuzhiyun 	unsigned long now = jiffies;
2290*4882a593Smuzhiyun 
2291*4882a593Smuzhiyun 	/*
2292*4882a593Smuzhiyun 	 *	check addrconf expiration here.
2293*4882a593Smuzhiyun 	 *	Routes are expired even if they are in use.
2294*4882a593Smuzhiyun 	 */
2295*4882a593Smuzhiyun 
2296*4882a593Smuzhiyun 	if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
2297*4882a593Smuzhiyun 		if (time_after(now, rt->expires)) {
2298*4882a593Smuzhiyun 			RT6_TRACE("expiring %p\n", rt);
2299*4882a593Smuzhiyun 			return -1;
2300*4882a593Smuzhiyun 		}
2301*4882a593Smuzhiyun 		gc_args->more++;
2302*4882a593Smuzhiyun 	}
2303*4882a593Smuzhiyun 
2304*4882a593Smuzhiyun 	/*	Also age clones in the exception table.
2305*4882a593Smuzhiyun 	 *	Note, that clones are aged out
2306*4882a593Smuzhiyun 	 *	only if they are not in use now.
2307*4882a593Smuzhiyun 	 */
2308*4882a593Smuzhiyun 	rt6_age_exceptions(rt, gc_args, now);
2309*4882a593Smuzhiyun 
2310*4882a593Smuzhiyun 	return 0;
2311*4882a593Smuzhiyun }
2312*4882a593Smuzhiyun 
fib6_run_gc(unsigned long expires,struct net * net,bool force)2313*4882a593Smuzhiyun void fib6_run_gc(unsigned long expires, struct net *net, bool force)
2314*4882a593Smuzhiyun {
2315*4882a593Smuzhiyun 	struct fib6_gc_args gc_args;
2316*4882a593Smuzhiyun 	unsigned long now;
2317*4882a593Smuzhiyun 
2318*4882a593Smuzhiyun 	if (force) {
2319*4882a593Smuzhiyun 		spin_lock_bh(&net->ipv6.fib6_gc_lock);
2320*4882a593Smuzhiyun 	} else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
2321*4882a593Smuzhiyun 		mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
2322*4882a593Smuzhiyun 		return;
2323*4882a593Smuzhiyun 	}
2324*4882a593Smuzhiyun 	gc_args.timeout = expires ? (int)expires :
2325*4882a593Smuzhiyun 			  net->ipv6.sysctl.ip6_rt_gc_interval;
2326*4882a593Smuzhiyun 	gc_args.more = 0;
2327*4882a593Smuzhiyun 
2328*4882a593Smuzhiyun 	fib6_clean_all(net, fib6_age, &gc_args);
2329*4882a593Smuzhiyun 	now = jiffies;
2330*4882a593Smuzhiyun 	net->ipv6.ip6_rt_last_gc = now;
2331*4882a593Smuzhiyun 
2332*4882a593Smuzhiyun 	if (gc_args.more)
2333*4882a593Smuzhiyun 		mod_timer(&net->ipv6.ip6_fib_timer,
2334*4882a593Smuzhiyun 			  round_jiffies(now
2335*4882a593Smuzhiyun 					+ net->ipv6.sysctl.ip6_rt_gc_interval));
2336*4882a593Smuzhiyun 	else
2337*4882a593Smuzhiyun 		del_timer(&net->ipv6.ip6_fib_timer);
2338*4882a593Smuzhiyun 	spin_unlock_bh(&net->ipv6.fib6_gc_lock);
2339*4882a593Smuzhiyun }
2340*4882a593Smuzhiyun 
fib6_gc_timer_cb(struct timer_list * t)2341*4882a593Smuzhiyun static void fib6_gc_timer_cb(struct timer_list *t)
2342*4882a593Smuzhiyun {
2343*4882a593Smuzhiyun 	struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
2344*4882a593Smuzhiyun 
2345*4882a593Smuzhiyun 	fib6_run_gc(0, arg, true);
2346*4882a593Smuzhiyun }
2347*4882a593Smuzhiyun 
fib6_net_init(struct net * net)2348*4882a593Smuzhiyun static int __net_init fib6_net_init(struct net *net)
2349*4882a593Smuzhiyun {
2350*4882a593Smuzhiyun 	size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
2351*4882a593Smuzhiyun 	int err;
2352*4882a593Smuzhiyun 
2353*4882a593Smuzhiyun 	err = fib6_notifier_init(net);
2354*4882a593Smuzhiyun 	if (err)
2355*4882a593Smuzhiyun 		return err;
2356*4882a593Smuzhiyun 
2357*4882a593Smuzhiyun 	spin_lock_init(&net->ipv6.fib6_gc_lock);
2358*4882a593Smuzhiyun 	rwlock_init(&net->ipv6.fib6_walker_lock);
2359*4882a593Smuzhiyun 	INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
2360*4882a593Smuzhiyun 	timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);
2361*4882a593Smuzhiyun 
2362*4882a593Smuzhiyun 	net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
2363*4882a593Smuzhiyun 	if (!net->ipv6.rt6_stats)
2364*4882a593Smuzhiyun 		goto out_timer;
2365*4882a593Smuzhiyun 
2366*4882a593Smuzhiyun 	/* Avoid false sharing : Use at least a full cache line */
2367*4882a593Smuzhiyun 	size = max_t(size_t, size, L1_CACHE_BYTES);
2368*4882a593Smuzhiyun 
2369*4882a593Smuzhiyun 	net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
2370*4882a593Smuzhiyun 	if (!net->ipv6.fib_table_hash)
2371*4882a593Smuzhiyun 		goto out_rt6_stats;
2372*4882a593Smuzhiyun 
2373*4882a593Smuzhiyun 	net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
2374*4882a593Smuzhiyun 					  GFP_KERNEL);
2375*4882a593Smuzhiyun 	if (!net->ipv6.fib6_main_tbl)
2376*4882a593Smuzhiyun 		goto out_fib_table_hash;
2377*4882a593Smuzhiyun 
2378*4882a593Smuzhiyun 	net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
2379*4882a593Smuzhiyun 	rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
2380*4882a593Smuzhiyun 			   net->ipv6.fib6_null_entry);
2381*4882a593Smuzhiyun 	net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
2382*4882a593Smuzhiyun 		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
2383*4882a593Smuzhiyun 	inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
2384*4882a593Smuzhiyun 
2385*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2386*4882a593Smuzhiyun 	net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
2387*4882a593Smuzhiyun 					   GFP_KERNEL);
2388*4882a593Smuzhiyun 	if (!net->ipv6.fib6_local_tbl)
2389*4882a593Smuzhiyun 		goto out_fib6_main_tbl;
2390*4882a593Smuzhiyun 	net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
2391*4882a593Smuzhiyun 	rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
2392*4882a593Smuzhiyun 			   net->ipv6.fib6_null_entry);
2393*4882a593Smuzhiyun 	net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
2394*4882a593Smuzhiyun 		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
2395*4882a593Smuzhiyun 	inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
2396*4882a593Smuzhiyun #endif
2397*4882a593Smuzhiyun 	fib6_tables_init(net);
2398*4882a593Smuzhiyun 
2399*4882a593Smuzhiyun 	return 0;
2400*4882a593Smuzhiyun 
2401*4882a593Smuzhiyun #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2402*4882a593Smuzhiyun out_fib6_main_tbl:
2403*4882a593Smuzhiyun 	kfree(net->ipv6.fib6_main_tbl);
2404*4882a593Smuzhiyun #endif
2405*4882a593Smuzhiyun out_fib_table_hash:
2406*4882a593Smuzhiyun 	kfree(net->ipv6.fib_table_hash);
2407*4882a593Smuzhiyun out_rt6_stats:
2408*4882a593Smuzhiyun 	kfree(net->ipv6.rt6_stats);
2409*4882a593Smuzhiyun out_timer:
2410*4882a593Smuzhiyun 	fib6_notifier_exit(net);
2411*4882a593Smuzhiyun 	return -ENOMEM;
2412*4882a593Smuzhiyun }
2413*4882a593Smuzhiyun 
fib6_net_exit(struct net * net)2414*4882a593Smuzhiyun static void fib6_net_exit(struct net *net)
2415*4882a593Smuzhiyun {
2416*4882a593Smuzhiyun 	unsigned int i;
2417*4882a593Smuzhiyun 
2418*4882a593Smuzhiyun 	del_timer_sync(&net->ipv6.ip6_fib_timer);
2419*4882a593Smuzhiyun 
2420*4882a593Smuzhiyun 	for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
2421*4882a593Smuzhiyun 		struct hlist_head *head = &net->ipv6.fib_table_hash[i];
2422*4882a593Smuzhiyun 		struct hlist_node *tmp;
2423*4882a593Smuzhiyun 		struct fib6_table *tb;
2424*4882a593Smuzhiyun 
2425*4882a593Smuzhiyun 		hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
2426*4882a593Smuzhiyun 			hlist_del(&tb->tb6_hlist);
2427*4882a593Smuzhiyun 			fib6_free_table(tb);
2428*4882a593Smuzhiyun 		}
2429*4882a593Smuzhiyun 	}
2430*4882a593Smuzhiyun 
2431*4882a593Smuzhiyun 	kfree(net->ipv6.fib_table_hash);
2432*4882a593Smuzhiyun 	kfree(net->ipv6.rt6_stats);
2433*4882a593Smuzhiyun 	fib6_notifier_exit(net);
2434*4882a593Smuzhiyun }
2435*4882a593Smuzhiyun 
2436*4882a593Smuzhiyun static struct pernet_operations fib6_net_ops = {
2437*4882a593Smuzhiyun 	.init = fib6_net_init,
2438*4882a593Smuzhiyun 	.exit = fib6_net_exit,
2439*4882a593Smuzhiyun };
2440*4882a593Smuzhiyun 
fib6_init(void)2441*4882a593Smuzhiyun int __init fib6_init(void)
2442*4882a593Smuzhiyun {
2443*4882a593Smuzhiyun 	int ret = -ENOMEM;
2444*4882a593Smuzhiyun 
2445*4882a593Smuzhiyun 	fib6_node_kmem = kmem_cache_create("fib6_nodes",
2446*4882a593Smuzhiyun 					   sizeof(struct fib6_node),
2447*4882a593Smuzhiyun 					   0, SLAB_HWCACHE_ALIGN,
2448*4882a593Smuzhiyun 					   NULL);
2449*4882a593Smuzhiyun 	if (!fib6_node_kmem)
2450*4882a593Smuzhiyun 		goto out;
2451*4882a593Smuzhiyun 
2452*4882a593Smuzhiyun 	ret = register_pernet_subsys(&fib6_net_ops);
2453*4882a593Smuzhiyun 	if (ret)
2454*4882a593Smuzhiyun 		goto out_kmem_cache_create;
2455*4882a593Smuzhiyun 
2456*4882a593Smuzhiyun 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
2457*4882a593Smuzhiyun 				   inet6_dump_fib, 0);
2458*4882a593Smuzhiyun 	if (ret)
2459*4882a593Smuzhiyun 		goto out_unregister_subsys;
2460*4882a593Smuzhiyun 
2461*4882a593Smuzhiyun 	__fib6_flush_trees = fib6_flush_trees;
2462*4882a593Smuzhiyun out:
2463*4882a593Smuzhiyun 	return ret;
2464*4882a593Smuzhiyun 
2465*4882a593Smuzhiyun out_unregister_subsys:
2466*4882a593Smuzhiyun 	unregister_pernet_subsys(&fib6_net_ops);
2467*4882a593Smuzhiyun out_kmem_cache_create:
2468*4882a593Smuzhiyun 	kmem_cache_destroy(fib6_node_kmem);
2469*4882a593Smuzhiyun 	goto out;
2470*4882a593Smuzhiyun }
2471*4882a593Smuzhiyun 
fib6_gc_cleanup(void)2472*4882a593Smuzhiyun void fib6_gc_cleanup(void)
2473*4882a593Smuzhiyun {
2474*4882a593Smuzhiyun 	unregister_pernet_subsys(&fib6_net_ops);
2475*4882a593Smuzhiyun 	kmem_cache_destroy(fib6_node_kmem);
2476*4882a593Smuzhiyun }
2477*4882a593Smuzhiyun 
2478*4882a593Smuzhiyun #ifdef CONFIG_PROC_FS
ipv6_route_native_seq_show(struct seq_file * seq,void * v)2479*4882a593Smuzhiyun static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
2480*4882a593Smuzhiyun {
2481*4882a593Smuzhiyun 	struct fib6_info *rt = v;
2482*4882a593Smuzhiyun 	struct ipv6_route_iter *iter = seq->private;
2483*4882a593Smuzhiyun 	struct fib6_nh *fib6_nh = rt->fib6_nh;
2484*4882a593Smuzhiyun 	unsigned int flags = rt->fib6_flags;
2485*4882a593Smuzhiyun 	const struct net_device *dev;
2486*4882a593Smuzhiyun 
2487*4882a593Smuzhiyun 	if (rt->nh)
2488*4882a593Smuzhiyun 		fib6_nh = nexthop_fib6_nh_bh(rt->nh);
2489*4882a593Smuzhiyun 
2490*4882a593Smuzhiyun 	seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
2491*4882a593Smuzhiyun 
2492*4882a593Smuzhiyun #ifdef CONFIG_IPV6_SUBTREES
2493*4882a593Smuzhiyun 	seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
2494*4882a593Smuzhiyun #else
2495*4882a593Smuzhiyun 	seq_puts(seq, "00000000000000000000000000000000 00 ");
2496*4882a593Smuzhiyun #endif
2497*4882a593Smuzhiyun 	if (fib6_nh->fib_nh_gw_family) {
2498*4882a593Smuzhiyun 		flags |= RTF_GATEWAY;
2499*4882a593Smuzhiyun 		seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
2500*4882a593Smuzhiyun 	} else {
2501*4882a593Smuzhiyun 		seq_puts(seq, "00000000000000000000000000000000");
2502*4882a593Smuzhiyun 	}
2503*4882a593Smuzhiyun 
2504*4882a593Smuzhiyun 	dev = fib6_nh->fib_nh_dev;
2505*4882a593Smuzhiyun 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
2506*4882a593Smuzhiyun 		   rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
2507*4882a593Smuzhiyun 		   flags, dev ? dev->name : "");
2508*4882a593Smuzhiyun 	iter->w.leaf = NULL;
2509*4882a593Smuzhiyun 	return 0;
2510*4882a593Smuzhiyun }
2511*4882a593Smuzhiyun 
ipv6_route_yield(struct fib6_walker * w)2512*4882a593Smuzhiyun static int ipv6_route_yield(struct fib6_walker *w)
2513*4882a593Smuzhiyun {
2514*4882a593Smuzhiyun 	struct ipv6_route_iter *iter = w->args;
2515*4882a593Smuzhiyun 
2516*4882a593Smuzhiyun 	if (!iter->skip)
2517*4882a593Smuzhiyun 		return 1;
2518*4882a593Smuzhiyun 
2519*4882a593Smuzhiyun 	do {
2520*4882a593Smuzhiyun 		iter->w.leaf = rcu_dereference_protected(
2521*4882a593Smuzhiyun 				iter->w.leaf->fib6_next,
2522*4882a593Smuzhiyun 				lockdep_is_held(&iter->tbl->tb6_lock));
2523*4882a593Smuzhiyun 		iter->skip--;
2524*4882a593Smuzhiyun 		if (!iter->skip && iter->w.leaf)
2525*4882a593Smuzhiyun 			return 1;
2526*4882a593Smuzhiyun 	} while (iter->w.leaf);
2527*4882a593Smuzhiyun 
2528*4882a593Smuzhiyun 	return 0;
2529*4882a593Smuzhiyun }
2530*4882a593Smuzhiyun 
ipv6_route_seq_setup_walk(struct ipv6_route_iter * iter,struct net * net)2531*4882a593Smuzhiyun static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
2532*4882a593Smuzhiyun 				      struct net *net)
2533*4882a593Smuzhiyun {
2534*4882a593Smuzhiyun 	memset(&iter->w, 0, sizeof(iter->w));
2535*4882a593Smuzhiyun 	iter->w.func = ipv6_route_yield;
2536*4882a593Smuzhiyun 	iter->w.root = &iter->tbl->tb6_root;
2537*4882a593Smuzhiyun 	iter->w.state = FWS_INIT;
2538*4882a593Smuzhiyun 	iter->w.node = iter->w.root;
2539*4882a593Smuzhiyun 	iter->w.args = iter;
2540*4882a593Smuzhiyun 	iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
2541*4882a593Smuzhiyun 	INIT_LIST_HEAD(&iter->w.lh);
2542*4882a593Smuzhiyun 	fib6_walker_link(net, &iter->w);
2543*4882a593Smuzhiyun }
2544*4882a593Smuzhiyun 
ipv6_route_seq_next_table(struct fib6_table * tbl,struct net * net)2545*4882a593Smuzhiyun static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
2546*4882a593Smuzhiyun 						    struct net *net)
2547*4882a593Smuzhiyun {
2548*4882a593Smuzhiyun 	unsigned int h;
2549*4882a593Smuzhiyun 	struct hlist_node *node;
2550*4882a593Smuzhiyun 
2551*4882a593Smuzhiyun 	if (tbl) {
2552*4882a593Smuzhiyun 		h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
2553*4882a593Smuzhiyun 		node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
2554*4882a593Smuzhiyun 	} else {
2555*4882a593Smuzhiyun 		h = 0;
2556*4882a593Smuzhiyun 		node = NULL;
2557*4882a593Smuzhiyun 	}
2558*4882a593Smuzhiyun 
2559*4882a593Smuzhiyun 	while (!node && h < FIB6_TABLE_HASHSZ) {
2560*4882a593Smuzhiyun 		node = rcu_dereference_bh(
2561*4882a593Smuzhiyun 			hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
2562*4882a593Smuzhiyun 	}
2563*4882a593Smuzhiyun 	return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
2564*4882a593Smuzhiyun }
2565*4882a593Smuzhiyun 
ipv6_route_check_sernum(struct ipv6_route_iter * iter)2566*4882a593Smuzhiyun static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
2567*4882a593Smuzhiyun {
2568*4882a593Smuzhiyun 	int sernum = READ_ONCE(iter->w.root->fn_sernum);
2569*4882a593Smuzhiyun 
2570*4882a593Smuzhiyun 	if (iter->sernum != sernum) {
2571*4882a593Smuzhiyun 		iter->sernum = sernum;
2572*4882a593Smuzhiyun 		iter->w.state = FWS_INIT;
2573*4882a593Smuzhiyun 		iter->w.node = iter->w.root;
2574*4882a593Smuzhiyun 		WARN_ON(iter->w.skip);
2575*4882a593Smuzhiyun 		iter->w.skip = iter->w.count;
2576*4882a593Smuzhiyun 	}
2577*4882a593Smuzhiyun }
2578*4882a593Smuzhiyun 
ipv6_route_seq_next(struct seq_file * seq,void * v,loff_t * pos)2579*4882a593Smuzhiyun static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2580*4882a593Smuzhiyun {
2581*4882a593Smuzhiyun 	int r;
2582*4882a593Smuzhiyun 	struct fib6_info *n;
2583*4882a593Smuzhiyun 	struct net *net = seq_file_net(seq);
2584*4882a593Smuzhiyun 	struct ipv6_route_iter *iter = seq->private;
2585*4882a593Smuzhiyun 
2586*4882a593Smuzhiyun 	++(*pos);
2587*4882a593Smuzhiyun 	if (!v)
2588*4882a593Smuzhiyun 		goto iter_table;
2589*4882a593Smuzhiyun 
2590*4882a593Smuzhiyun 	n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
2591*4882a593Smuzhiyun 	if (n)
2592*4882a593Smuzhiyun 		return n;
2593*4882a593Smuzhiyun 
2594*4882a593Smuzhiyun iter_table:
2595*4882a593Smuzhiyun 	ipv6_route_check_sernum(iter);
2596*4882a593Smuzhiyun 	spin_lock_bh(&iter->tbl->tb6_lock);
2597*4882a593Smuzhiyun 	r = fib6_walk_continue(&iter->w);
2598*4882a593Smuzhiyun 	spin_unlock_bh(&iter->tbl->tb6_lock);
2599*4882a593Smuzhiyun 	if (r > 0) {
2600*4882a593Smuzhiyun 		return iter->w.leaf;
2601*4882a593Smuzhiyun 	} else if (r < 0) {
2602*4882a593Smuzhiyun 		fib6_walker_unlink(net, &iter->w);
2603*4882a593Smuzhiyun 		return NULL;
2604*4882a593Smuzhiyun 	}
2605*4882a593Smuzhiyun 	fib6_walker_unlink(net, &iter->w);
2606*4882a593Smuzhiyun 
2607*4882a593Smuzhiyun 	iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
2608*4882a593Smuzhiyun 	if (!iter->tbl)
2609*4882a593Smuzhiyun 		return NULL;
2610*4882a593Smuzhiyun 
2611*4882a593Smuzhiyun 	ipv6_route_seq_setup_walk(iter, net);
2612*4882a593Smuzhiyun 	goto iter_table;
2613*4882a593Smuzhiyun }
2614*4882a593Smuzhiyun 
ipv6_route_seq_start(struct seq_file * seq,loff_t * pos)2615*4882a593Smuzhiyun static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
2616*4882a593Smuzhiyun 	__acquires(RCU_BH)
2617*4882a593Smuzhiyun {
2618*4882a593Smuzhiyun 	struct net *net = seq_file_net(seq);
2619*4882a593Smuzhiyun 	struct ipv6_route_iter *iter = seq->private;
2620*4882a593Smuzhiyun 
2621*4882a593Smuzhiyun 	rcu_read_lock_bh();
2622*4882a593Smuzhiyun 	iter->tbl = ipv6_route_seq_next_table(NULL, net);
2623*4882a593Smuzhiyun 	iter->skip = *pos;
2624*4882a593Smuzhiyun 
2625*4882a593Smuzhiyun 	if (iter->tbl) {
2626*4882a593Smuzhiyun 		loff_t p = 0;
2627*4882a593Smuzhiyun 
2628*4882a593Smuzhiyun 		ipv6_route_seq_setup_walk(iter, net);
2629*4882a593Smuzhiyun 		return ipv6_route_seq_next(seq, NULL, &p);
2630*4882a593Smuzhiyun 	} else {
2631*4882a593Smuzhiyun 		return NULL;
2632*4882a593Smuzhiyun 	}
2633*4882a593Smuzhiyun }
2634*4882a593Smuzhiyun 
ipv6_route_iter_active(struct ipv6_route_iter * iter)2635*4882a593Smuzhiyun static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
2636*4882a593Smuzhiyun {
2637*4882a593Smuzhiyun 	struct fib6_walker *w = &iter->w;
2638*4882a593Smuzhiyun 	return w->node && !(w->state == FWS_U && w->node == w->root);
2639*4882a593Smuzhiyun }
2640*4882a593Smuzhiyun 
ipv6_route_native_seq_stop(struct seq_file * seq,void * v)2641*4882a593Smuzhiyun static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
2642*4882a593Smuzhiyun 	__releases(RCU_BH)
2643*4882a593Smuzhiyun {
2644*4882a593Smuzhiyun 	struct net *net = seq_file_net(seq);
2645*4882a593Smuzhiyun 	struct ipv6_route_iter *iter = seq->private;
2646*4882a593Smuzhiyun 
2647*4882a593Smuzhiyun 	if (ipv6_route_iter_active(iter))
2648*4882a593Smuzhiyun 		fib6_walker_unlink(net, &iter->w);
2649*4882a593Smuzhiyun 
2650*4882a593Smuzhiyun 	rcu_read_unlock_bh();
2651*4882a593Smuzhiyun }
2652*4882a593Smuzhiyun 
2653*4882a593Smuzhiyun #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
ipv6_route_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,void * v)2654*4882a593Smuzhiyun static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
2655*4882a593Smuzhiyun 				    struct bpf_iter_meta *meta,
2656*4882a593Smuzhiyun 				    void *v)
2657*4882a593Smuzhiyun {
2658*4882a593Smuzhiyun 	struct bpf_iter__ipv6_route ctx;
2659*4882a593Smuzhiyun 
2660*4882a593Smuzhiyun 	ctx.meta = meta;
2661*4882a593Smuzhiyun 	ctx.rt = v;
2662*4882a593Smuzhiyun 	return bpf_iter_run_prog(prog, &ctx);
2663*4882a593Smuzhiyun }
2664*4882a593Smuzhiyun 
ipv6_route_seq_show(struct seq_file * seq,void * v)2665*4882a593Smuzhiyun static int ipv6_route_seq_show(struct seq_file *seq, void *v)
2666*4882a593Smuzhiyun {
2667*4882a593Smuzhiyun 	struct ipv6_route_iter *iter = seq->private;
2668*4882a593Smuzhiyun 	struct bpf_iter_meta meta;
2669*4882a593Smuzhiyun 	struct bpf_prog *prog;
2670*4882a593Smuzhiyun 	int ret;
2671*4882a593Smuzhiyun 
2672*4882a593Smuzhiyun 	meta.seq = seq;
2673*4882a593Smuzhiyun 	prog = bpf_iter_get_info(&meta, false);
2674*4882a593Smuzhiyun 	if (!prog)
2675*4882a593Smuzhiyun 		return ipv6_route_native_seq_show(seq, v);
2676*4882a593Smuzhiyun 
2677*4882a593Smuzhiyun 	ret = ipv6_route_prog_seq_show(prog, &meta, v);
2678*4882a593Smuzhiyun 	iter->w.leaf = NULL;
2679*4882a593Smuzhiyun 
2680*4882a593Smuzhiyun 	return ret;
2681*4882a593Smuzhiyun }
2682*4882a593Smuzhiyun 
ipv6_route_seq_stop(struct seq_file * seq,void * v)2683*4882a593Smuzhiyun static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2684*4882a593Smuzhiyun {
2685*4882a593Smuzhiyun 	struct bpf_iter_meta meta;
2686*4882a593Smuzhiyun 	struct bpf_prog *prog;
2687*4882a593Smuzhiyun 
2688*4882a593Smuzhiyun 	if (!v) {
2689*4882a593Smuzhiyun 		meta.seq = seq;
2690*4882a593Smuzhiyun 		prog = bpf_iter_get_info(&meta, true);
2691*4882a593Smuzhiyun 		if (prog)
2692*4882a593Smuzhiyun 			(void)ipv6_route_prog_seq_show(prog, &meta, v);
2693*4882a593Smuzhiyun 	}
2694*4882a593Smuzhiyun 
2695*4882a593Smuzhiyun 	ipv6_route_native_seq_stop(seq, v);
2696*4882a593Smuzhiyun }
2697*4882a593Smuzhiyun #else
ipv6_route_seq_show(struct seq_file * seq,void * v)2698*4882a593Smuzhiyun static int ipv6_route_seq_show(struct seq_file *seq, void *v)
2699*4882a593Smuzhiyun {
2700*4882a593Smuzhiyun 	return ipv6_route_native_seq_show(seq, v);
2701*4882a593Smuzhiyun }
2702*4882a593Smuzhiyun 
ipv6_route_seq_stop(struct seq_file * seq,void * v)2703*4882a593Smuzhiyun static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2704*4882a593Smuzhiyun {
2705*4882a593Smuzhiyun 	ipv6_route_native_seq_stop(seq, v);
2706*4882a593Smuzhiyun }
2707*4882a593Smuzhiyun #endif
2708*4882a593Smuzhiyun 
2709*4882a593Smuzhiyun const struct seq_operations ipv6_route_seq_ops = {
2710*4882a593Smuzhiyun 	.start	= ipv6_route_seq_start,
2711*4882a593Smuzhiyun 	.next	= ipv6_route_seq_next,
2712*4882a593Smuzhiyun 	.stop	= ipv6_route_seq_stop,
2713*4882a593Smuzhiyun 	.show	= ipv6_route_seq_show
2714*4882a593Smuzhiyun };
2715*4882a593Smuzhiyun #endif /* CONFIG_PROC_FS */
2716