1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2001 Momchil Velikov
4*4882a593Smuzhiyun * Portions Copyright (C) 2001 Christoph Hellwig
5*4882a593Smuzhiyun * Copyright (C) 2005 SGI, Christoph Lameter
6*4882a593Smuzhiyun * Copyright (C) 2006 Nick Piggin
7*4882a593Smuzhiyun * Copyright (C) 2012 Konstantin Khlebnikov
8*4882a593Smuzhiyun * Copyright (C) 2016 Intel, Matthew Wilcox
9*4882a593Smuzhiyun * Copyright (C) 2016 Intel, Ross Zwisler
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include <linux/bitmap.h>
13*4882a593Smuzhiyun #include <linux/bitops.h>
14*4882a593Smuzhiyun #include <linux/bug.h>
15*4882a593Smuzhiyun #include <linux/cpu.h>
16*4882a593Smuzhiyun #include <linux/errno.h>
17*4882a593Smuzhiyun #include <linux/export.h>
18*4882a593Smuzhiyun #include <linux/idr.h>
19*4882a593Smuzhiyun #include <linux/init.h>
20*4882a593Smuzhiyun #include <linux/kernel.h>
21*4882a593Smuzhiyun #include <linux/kmemleak.h>
22*4882a593Smuzhiyun #include <linux/percpu.h>
23*4882a593Smuzhiyun #include <linux/preempt.h> /* in_interrupt() */
24*4882a593Smuzhiyun #include <linux/radix-tree.h>
25*4882a593Smuzhiyun #include <linux/rcupdate.h>
26*4882a593Smuzhiyun #include <linux/slab.h>
27*4882a593Smuzhiyun #include <linux/string.h>
28*4882a593Smuzhiyun #include <linux/xarray.h>
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun /*
31*4882a593Smuzhiyun * Radix tree node cache.
32*4882a593Smuzhiyun */
33*4882a593Smuzhiyun struct kmem_cache *radix_tree_node_cachep;
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun * The radix tree is variable-height, so an insert operation not only has
37*4882a593Smuzhiyun * to build the branch to its corresponding item, it also has to build the
38*4882a593Smuzhiyun * branch to existing items if the size has to be increased (by
39*4882a593Smuzhiyun * radix_tree_extend).
40*4882a593Smuzhiyun *
41*4882a593Smuzhiyun * The worst case is a zero height tree with just a single item at index 0,
42*4882a593Smuzhiyun * and then inserting an item at index ULONG_MAX. This requires 2 new branches
43*4882a593Smuzhiyun * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
44*4882a593Smuzhiyun * Hence:
45*4882a593Smuzhiyun */
46*4882a593Smuzhiyun #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun * The IDR does not have to be as high as the radix tree since it uses
50*4882a593Smuzhiyun * signed integers, not unsigned longs.
51*4882a593Smuzhiyun */
52*4882a593Smuzhiyun #define IDR_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(int) - 1)
53*4882a593Smuzhiyun #define IDR_MAX_PATH (DIV_ROUND_UP(IDR_INDEX_BITS, \
54*4882a593Smuzhiyun RADIX_TREE_MAP_SHIFT))
55*4882a593Smuzhiyun #define IDR_PRELOAD_SIZE (IDR_MAX_PATH * 2 - 1)
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun /*
58*4882a593Smuzhiyun * Per-cpu pool of preloaded nodes
59*4882a593Smuzhiyun */
60*4882a593Smuzhiyun DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
61*4882a593Smuzhiyun .lock = INIT_LOCAL_LOCK(lock),
62*4882a593Smuzhiyun };
63*4882a593Smuzhiyun EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);
64*4882a593Smuzhiyun
entry_to_node(void * ptr)65*4882a593Smuzhiyun static inline struct radix_tree_node *entry_to_node(void *ptr)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
68*4882a593Smuzhiyun }
69*4882a593Smuzhiyun
node_to_entry(void * ptr)70*4882a593Smuzhiyun static inline void *node_to_entry(void *ptr)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
73*4882a593Smuzhiyun }
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun #define RADIX_TREE_RETRY XA_RETRY_ENTRY
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun static inline unsigned long
get_slot_offset(const struct radix_tree_node * parent,void __rcu ** slot)78*4882a593Smuzhiyun get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
79*4882a593Smuzhiyun {
80*4882a593Smuzhiyun return parent ? slot - parent->slots : 0;
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun
radix_tree_descend(const struct radix_tree_node * parent,struct radix_tree_node ** nodep,unsigned long index)83*4882a593Smuzhiyun static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
84*4882a593Smuzhiyun struct radix_tree_node **nodep, unsigned long index)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
87*4882a593Smuzhiyun void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun *nodep = (void *)entry;
90*4882a593Smuzhiyun return offset;
91*4882a593Smuzhiyun }
92*4882a593Smuzhiyun
root_gfp_mask(const struct radix_tree_root * root)93*4882a593Smuzhiyun static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
94*4882a593Smuzhiyun {
95*4882a593Smuzhiyun return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun
tag_set(struct radix_tree_node * node,unsigned int tag,int offset)98*4882a593Smuzhiyun static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
99*4882a593Smuzhiyun int offset)
100*4882a593Smuzhiyun {
101*4882a593Smuzhiyun __set_bit(offset, node->tags[tag]);
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
tag_clear(struct radix_tree_node * node,unsigned int tag,int offset)104*4882a593Smuzhiyun static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
105*4882a593Smuzhiyun int offset)
106*4882a593Smuzhiyun {
107*4882a593Smuzhiyun __clear_bit(offset, node->tags[tag]);
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun
tag_get(const struct radix_tree_node * node,unsigned int tag,int offset)110*4882a593Smuzhiyun static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
111*4882a593Smuzhiyun int offset)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun return test_bit(offset, node->tags[tag]);
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun
root_tag_set(struct radix_tree_root * root,unsigned tag)116*4882a593Smuzhiyun static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
117*4882a593Smuzhiyun {
118*4882a593Smuzhiyun root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun
root_tag_clear(struct radix_tree_root * root,unsigned tag)121*4882a593Smuzhiyun static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
122*4882a593Smuzhiyun {
123*4882a593Smuzhiyun root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
root_tag_clear_all(struct radix_tree_root * root)126*4882a593Smuzhiyun static inline void root_tag_clear_all(struct radix_tree_root *root)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun
root_tag_get(const struct radix_tree_root * root,unsigned tag)131*4882a593Smuzhiyun static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
132*4882a593Smuzhiyun {
133*4882a593Smuzhiyun return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
134*4882a593Smuzhiyun }
135*4882a593Smuzhiyun
root_tags_get(const struct radix_tree_root * root)136*4882a593Smuzhiyun static inline unsigned root_tags_get(const struct radix_tree_root *root)
137*4882a593Smuzhiyun {
138*4882a593Smuzhiyun return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun
is_idr(const struct radix_tree_root * root)141*4882a593Smuzhiyun static inline bool is_idr(const struct radix_tree_root *root)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun return !!(root->xa_flags & ROOT_IS_IDR);
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun /*
147*4882a593Smuzhiyun * Returns 1 if any slot in the node has this tag set.
148*4882a593Smuzhiyun * Otherwise returns 0.
149*4882a593Smuzhiyun */
any_tag_set(const struct radix_tree_node * node,unsigned int tag)150*4882a593Smuzhiyun static inline int any_tag_set(const struct radix_tree_node *node,
151*4882a593Smuzhiyun unsigned int tag)
152*4882a593Smuzhiyun {
153*4882a593Smuzhiyun unsigned idx;
154*4882a593Smuzhiyun for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
155*4882a593Smuzhiyun if (node->tags[tag][idx])
156*4882a593Smuzhiyun return 1;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun return 0;
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun
all_tag_set(struct radix_tree_node * node,unsigned int tag)161*4882a593Smuzhiyun static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
162*4882a593Smuzhiyun {
163*4882a593Smuzhiyun bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun /**
167*4882a593Smuzhiyun * radix_tree_find_next_bit - find the next set bit in a memory region
168*4882a593Smuzhiyun *
169*4882a593Smuzhiyun * @addr: The address to base the search on
170*4882a593Smuzhiyun * @size: The bitmap size in bits
171*4882a593Smuzhiyun * @offset: The bitnumber to start searching at
172*4882a593Smuzhiyun *
173*4882a593Smuzhiyun * Unrollable variant of find_next_bit() for constant size arrays.
174*4882a593Smuzhiyun * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
175*4882a593Smuzhiyun * Returns next bit offset, or size if nothing found.
176*4882a593Smuzhiyun */
177*4882a593Smuzhiyun static __always_inline unsigned long
radix_tree_find_next_bit(struct radix_tree_node * node,unsigned int tag,unsigned long offset)178*4882a593Smuzhiyun radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
179*4882a593Smuzhiyun unsigned long offset)
180*4882a593Smuzhiyun {
181*4882a593Smuzhiyun const unsigned long *addr = node->tags[tag];
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun if (offset < RADIX_TREE_MAP_SIZE) {
184*4882a593Smuzhiyun unsigned long tmp;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun addr += offset / BITS_PER_LONG;
187*4882a593Smuzhiyun tmp = *addr >> (offset % BITS_PER_LONG);
188*4882a593Smuzhiyun if (tmp)
189*4882a593Smuzhiyun return __ffs(tmp) + offset;
190*4882a593Smuzhiyun offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
191*4882a593Smuzhiyun while (offset < RADIX_TREE_MAP_SIZE) {
192*4882a593Smuzhiyun tmp = *++addr;
193*4882a593Smuzhiyun if (tmp)
194*4882a593Smuzhiyun return __ffs(tmp) + offset;
195*4882a593Smuzhiyun offset += BITS_PER_LONG;
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun return RADIX_TREE_MAP_SIZE;
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun
iter_offset(const struct radix_tree_iter * iter)201*4882a593Smuzhiyun static unsigned int iter_offset(const struct radix_tree_iter *iter)
202*4882a593Smuzhiyun {
203*4882a593Smuzhiyun return iter->index & RADIX_TREE_MAP_MASK;
204*4882a593Smuzhiyun }
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun /*
207*4882a593Smuzhiyun * The maximum index which can be stored in a radix tree
208*4882a593Smuzhiyun */
shift_maxindex(unsigned int shift)209*4882a593Smuzhiyun static inline unsigned long shift_maxindex(unsigned int shift)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun return (RADIX_TREE_MAP_SIZE << shift) - 1;
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun
node_maxindex(const struct radix_tree_node * node)214*4882a593Smuzhiyun static inline unsigned long node_maxindex(const struct radix_tree_node *node)
215*4882a593Smuzhiyun {
216*4882a593Smuzhiyun return shift_maxindex(node->shift);
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun
next_index(unsigned long index,const struct radix_tree_node * node,unsigned long offset)219*4882a593Smuzhiyun static unsigned long next_index(unsigned long index,
220*4882a593Smuzhiyun const struct radix_tree_node *node,
221*4882a593Smuzhiyun unsigned long offset)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun return (index & ~node_maxindex(node)) + (offset << node->shift);
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun /*
227*4882a593Smuzhiyun * This assumes that the caller has performed appropriate preallocation, and
228*4882a593Smuzhiyun * that the caller has pinned this thread of control to the current CPU.
229*4882a593Smuzhiyun */
230*4882a593Smuzhiyun static struct radix_tree_node *
radix_tree_node_alloc(gfp_t gfp_mask,struct radix_tree_node * parent,struct radix_tree_root * root,unsigned int shift,unsigned int offset,unsigned int count,unsigned int nr_values)231*4882a593Smuzhiyun radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
232*4882a593Smuzhiyun struct radix_tree_root *root,
233*4882a593Smuzhiyun unsigned int shift, unsigned int offset,
234*4882a593Smuzhiyun unsigned int count, unsigned int nr_values)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun struct radix_tree_node *ret = NULL;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun /*
239*4882a593Smuzhiyun * Preload code isn't irq safe and it doesn't make sense to use
240*4882a593Smuzhiyun * preloading during an interrupt anyway as all the allocations have
241*4882a593Smuzhiyun * to be atomic. So just do normal allocation when in interrupt.
242*4882a593Smuzhiyun */
243*4882a593Smuzhiyun if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
244*4882a593Smuzhiyun struct radix_tree_preload *rtp;
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun /*
247*4882a593Smuzhiyun * Even if the caller has preloaded, try to allocate from the
248*4882a593Smuzhiyun * cache first for the new node to get accounted to the memory
249*4882a593Smuzhiyun * cgroup.
250*4882a593Smuzhiyun */
251*4882a593Smuzhiyun ret = kmem_cache_alloc(radix_tree_node_cachep,
252*4882a593Smuzhiyun gfp_mask | __GFP_NOWARN);
253*4882a593Smuzhiyun if (ret)
254*4882a593Smuzhiyun goto out;
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun /*
257*4882a593Smuzhiyun * Provided the caller has preloaded here, we will always
258*4882a593Smuzhiyun * succeed in getting a node here (and never reach
259*4882a593Smuzhiyun * kmem_cache_alloc)
260*4882a593Smuzhiyun */
261*4882a593Smuzhiyun rtp = this_cpu_ptr(&radix_tree_preloads);
262*4882a593Smuzhiyun if (rtp->nr) {
263*4882a593Smuzhiyun ret = rtp->nodes;
264*4882a593Smuzhiyun rtp->nodes = ret->parent;
265*4882a593Smuzhiyun rtp->nr--;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun /*
268*4882a593Smuzhiyun * Update the allocation stack trace as this is more useful
269*4882a593Smuzhiyun * for debugging.
270*4882a593Smuzhiyun */
271*4882a593Smuzhiyun kmemleak_update_trace(ret);
272*4882a593Smuzhiyun goto out;
273*4882a593Smuzhiyun }
274*4882a593Smuzhiyun ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
275*4882a593Smuzhiyun out:
276*4882a593Smuzhiyun BUG_ON(radix_tree_is_internal_node(ret));
277*4882a593Smuzhiyun if (ret) {
278*4882a593Smuzhiyun ret->shift = shift;
279*4882a593Smuzhiyun ret->offset = offset;
280*4882a593Smuzhiyun ret->count = count;
281*4882a593Smuzhiyun ret->nr_values = nr_values;
282*4882a593Smuzhiyun ret->parent = parent;
283*4882a593Smuzhiyun ret->array = root;
284*4882a593Smuzhiyun }
285*4882a593Smuzhiyun return ret;
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun
radix_tree_node_rcu_free(struct rcu_head * head)288*4882a593Smuzhiyun void radix_tree_node_rcu_free(struct rcu_head *head)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun struct radix_tree_node *node =
291*4882a593Smuzhiyun container_of(head, struct radix_tree_node, rcu_head);
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun /*
294*4882a593Smuzhiyun * Must only free zeroed nodes into the slab. We can be left with
295*4882a593Smuzhiyun * non-NULL entries by radix_tree_free_nodes, so clear the entries
296*4882a593Smuzhiyun * and tags here.
297*4882a593Smuzhiyun */
298*4882a593Smuzhiyun memset(node->slots, 0, sizeof(node->slots));
299*4882a593Smuzhiyun memset(node->tags, 0, sizeof(node->tags));
300*4882a593Smuzhiyun INIT_LIST_HEAD(&node->private_list);
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun kmem_cache_free(radix_tree_node_cachep, node);
303*4882a593Smuzhiyun }
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun static inline void
radix_tree_node_free(struct radix_tree_node * node)306*4882a593Smuzhiyun radix_tree_node_free(struct radix_tree_node *node)
307*4882a593Smuzhiyun {
308*4882a593Smuzhiyun call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun /*
312*4882a593Smuzhiyun * Load up this CPU's radix_tree_node buffer with sufficient objects to
313*4882a593Smuzhiyun * ensure that the addition of a single element in the tree cannot fail. On
314*4882a593Smuzhiyun * success, return zero, with preemption disabled. On error, return -ENOMEM
315*4882a593Smuzhiyun * with preemption not disabled.
316*4882a593Smuzhiyun *
317*4882a593Smuzhiyun * To make use of this facility, the radix tree must be initialised without
318*4882a593Smuzhiyun * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
319*4882a593Smuzhiyun */
__radix_tree_preload(gfp_t gfp_mask,unsigned nr)320*4882a593Smuzhiyun static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
321*4882a593Smuzhiyun {
322*4882a593Smuzhiyun struct radix_tree_preload *rtp;
323*4882a593Smuzhiyun struct radix_tree_node *node;
324*4882a593Smuzhiyun int ret = -ENOMEM;
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun /*
327*4882a593Smuzhiyun * Nodes preloaded by one cgroup can be used by another cgroup, so
328*4882a593Smuzhiyun * they should never be accounted to any particular memory cgroup.
329*4882a593Smuzhiyun */
330*4882a593Smuzhiyun gfp_mask &= ~__GFP_ACCOUNT;
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun local_lock(&radix_tree_preloads.lock);
333*4882a593Smuzhiyun rtp = this_cpu_ptr(&radix_tree_preloads);
334*4882a593Smuzhiyun while (rtp->nr < nr) {
335*4882a593Smuzhiyun local_unlock(&radix_tree_preloads.lock);
336*4882a593Smuzhiyun node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
337*4882a593Smuzhiyun if (node == NULL)
338*4882a593Smuzhiyun goto out;
339*4882a593Smuzhiyun local_lock(&radix_tree_preloads.lock);
340*4882a593Smuzhiyun rtp = this_cpu_ptr(&radix_tree_preloads);
341*4882a593Smuzhiyun if (rtp->nr < nr) {
342*4882a593Smuzhiyun node->parent = rtp->nodes;
343*4882a593Smuzhiyun rtp->nodes = node;
344*4882a593Smuzhiyun rtp->nr++;
345*4882a593Smuzhiyun } else {
346*4882a593Smuzhiyun kmem_cache_free(radix_tree_node_cachep, node);
347*4882a593Smuzhiyun }
348*4882a593Smuzhiyun }
349*4882a593Smuzhiyun ret = 0;
350*4882a593Smuzhiyun out:
351*4882a593Smuzhiyun return ret;
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun /*
355*4882a593Smuzhiyun * Load up this CPU's radix_tree_node buffer with sufficient objects to
356*4882a593Smuzhiyun * ensure that the addition of a single element in the tree cannot fail. On
357*4882a593Smuzhiyun * success, return zero, with preemption disabled. On error, return -ENOMEM
358*4882a593Smuzhiyun * with preemption not disabled.
359*4882a593Smuzhiyun *
360*4882a593Smuzhiyun * To make use of this facility, the radix tree must be initialised without
361*4882a593Smuzhiyun * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
362*4882a593Smuzhiyun */
radix_tree_preload(gfp_t gfp_mask)363*4882a593Smuzhiyun int radix_tree_preload(gfp_t gfp_mask)
364*4882a593Smuzhiyun {
365*4882a593Smuzhiyun /* Warn on non-sensical use... */
366*4882a593Smuzhiyun WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
367*4882a593Smuzhiyun return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_preload);
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun /*
372*4882a593Smuzhiyun * The same as above function, except we don't guarantee preloading happens.
373*4882a593Smuzhiyun * We do it, if we decide it helps. On success, return zero with preemption
374*4882a593Smuzhiyun * disabled. On error, return -ENOMEM with preemption not disabled.
375*4882a593Smuzhiyun */
radix_tree_maybe_preload(gfp_t gfp_mask)376*4882a593Smuzhiyun int radix_tree_maybe_preload(gfp_t gfp_mask)
377*4882a593Smuzhiyun {
378*4882a593Smuzhiyun if (gfpflags_allow_blocking(gfp_mask))
379*4882a593Smuzhiyun return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
380*4882a593Smuzhiyun /* Preloading doesn't help anything with this gfp mask, skip it */
381*4882a593Smuzhiyun local_lock(&radix_tree_preloads.lock);
382*4882a593Smuzhiyun return 0;
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_maybe_preload);
385*4882a593Smuzhiyun
radix_tree_load_root(const struct radix_tree_root * root,struct radix_tree_node ** nodep,unsigned long * maxindex)386*4882a593Smuzhiyun static unsigned radix_tree_load_root(const struct radix_tree_root *root,
387*4882a593Smuzhiyun struct radix_tree_node **nodep, unsigned long *maxindex)
388*4882a593Smuzhiyun {
389*4882a593Smuzhiyun struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun *nodep = node;
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun if (likely(radix_tree_is_internal_node(node))) {
394*4882a593Smuzhiyun node = entry_to_node(node);
395*4882a593Smuzhiyun *maxindex = node_maxindex(node);
396*4882a593Smuzhiyun return node->shift + RADIX_TREE_MAP_SHIFT;
397*4882a593Smuzhiyun }
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun *maxindex = 0;
400*4882a593Smuzhiyun return 0;
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun /*
404*4882a593Smuzhiyun * Extend a radix tree so it can store key @index.
405*4882a593Smuzhiyun */
radix_tree_extend(struct radix_tree_root * root,gfp_t gfp,unsigned long index,unsigned int shift)406*4882a593Smuzhiyun static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
407*4882a593Smuzhiyun unsigned long index, unsigned int shift)
408*4882a593Smuzhiyun {
409*4882a593Smuzhiyun void *entry;
410*4882a593Smuzhiyun unsigned int maxshift;
411*4882a593Smuzhiyun int tag;
412*4882a593Smuzhiyun
413*4882a593Smuzhiyun /* Figure out what the shift should be. */
414*4882a593Smuzhiyun maxshift = shift;
415*4882a593Smuzhiyun while (index > shift_maxindex(maxshift))
416*4882a593Smuzhiyun maxshift += RADIX_TREE_MAP_SHIFT;
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun entry = rcu_dereference_raw(root->xa_head);
419*4882a593Smuzhiyun if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
420*4882a593Smuzhiyun goto out;
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun do {
423*4882a593Smuzhiyun struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
424*4882a593Smuzhiyun root, shift, 0, 1, 0);
425*4882a593Smuzhiyun if (!node)
426*4882a593Smuzhiyun return -ENOMEM;
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun if (is_idr(root)) {
429*4882a593Smuzhiyun all_tag_set(node, IDR_FREE);
430*4882a593Smuzhiyun if (!root_tag_get(root, IDR_FREE)) {
431*4882a593Smuzhiyun tag_clear(node, IDR_FREE, 0);
432*4882a593Smuzhiyun root_tag_set(root, IDR_FREE);
433*4882a593Smuzhiyun }
434*4882a593Smuzhiyun } else {
435*4882a593Smuzhiyun /* Propagate the aggregated tag info to the new child */
436*4882a593Smuzhiyun for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
437*4882a593Smuzhiyun if (root_tag_get(root, tag))
438*4882a593Smuzhiyun tag_set(node, tag, 0);
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun }
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun BUG_ON(shift > BITS_PER_LONG);
443*4882a593Smuzhiyun if (radix_tree_is_internal_node(entry)) {
444*4882a593Smuzhiyun entry_to_node(entry)->parent = node;
445*4882a593Smuzhiyun } else if (xa_is_value(entry)) {
446*4882a593Smuzhiyun /* Moving a value entry root->xa_head to a node */
447*4882a593Smuzhiyun node->nr_values = 1;
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun /*
450*4882a593Smuzhiyun * entry was already in the radix tree, so we do not need
451*4882a593Smuzhiyun * rcu_assign_pointer here
452*4882a593Smuzhiyun */
453*4882a593Smuzhiyun node->slots[0] = (void __rcu *)entry;
454*4882a593Smuzhiyun entry = node_to_entry(node);
455*4882a593Smuzhiyun rcu_assign_pointer(root->xa_head, entry);
456*4882a593Smuzhiyun shift += RADIX_TREE_MAP_SHIFT;
457*4882a593Smuzhiyun } while (shift <= maxshift);
458*4882a593Smuzhiyun out:
459*4882a593Smuzhiyun return maxshift + RADIX_TREE_MAP_SHIFT;
460*4882a593Smuzhiyun }
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun /**
463*4882a593Smuzhiyun * radix_tree_shrink - shrink radix tree to minimum height
464*4882a593Smuzhiyun * @root radix tree root
465*4882a593Smuzhiyun */
radix_tree_shrink(struct radix_tree_root * root)466*4882a593Smuzhiyun static inline bool radix_tree_shrink(struct radix_tree_root *root)
467*4882a593Smuzhiyun {
468*4882a593Smuzhiyun bool shrunk = false;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun for (;;) {
471*4882a593Smuzhiyun struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
472*4882a593Smuzhiyun struct radix_tree_node *child;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun if (!radix_tree_is_internal_node(node))
475*4882a593Smuzhiyun break;
476*4882a593Smuzhiyun node = entry_to_node(node);
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun /*
479*4882a593Smuzhiyun * The candidate node has more than one child, or its child
480*4882a593Smuzhiyun * is not at the leftmost slot, we cannot shrink.
481*4882a593Smuzhiyun */
482*4882a593Smuzhiyun if (node->count != 1)
483*4882a593Smuzhiyun break;
484*4882a593Smuzhiyun child = rcu_dereference_raw(node->slots[0]);
485*4882a593Smuzhiyun if (!child)
486*4882a593Smuzhiyun break;
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun /*
489*4882a593Smuzhiyun * For an IDR, we must not shrink entry 0 into the root in
490*4882a593Smuzhiyun * case somebody calls idr_replace() with a pointer that
491*4882a593Smuzhiyun * appears to be an internal entry
492*4882a593Smuzhiyun */
493*4882a593Smuzhiyun if (!node->shift && is_idr(root))
494*4882a593Smuzhiyun break;
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun if (radix_tree_is_internal_node(child))
497*4882a593Smuzhiyun entry_to_node(child)->parent = NULL;
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun /*
500*4882a593Smuzhiyun * We don't need rcu_assign_pointer(), since we are simply
501*4882a593Smuzhiyun * moving the node from one part of the tree to another: if it
502*4882a593Smuzhiyun * was safe to dereference the old pointer to it
503*4882a593Smuzhiyun * (node->slots[0]), it will be safe to dereference the new
504*4882a593Smuzhiyun * one (root->xa_head) as far as dependent read barriers go.
505*4882a593Smuzhiyun */
506*4882a593Smuzhiyun root->xa_head = (void __rcu *)child;
507*4882a593Smuzhiyun if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
508*4882a593Smuzhiyun root_tag_clear(root, IDR_FREE);
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun /*
511*4882a593Smuzhiyun * We have a dilemma here. The node's slot[0] must not be
512*4882a593Smuzhiyun * NULLed in case there are concurrent lookups expecting to
513*4882a593Smuzhiyun * find the item. However if this was a bottom-level node,
514*4882a593Smuzhiyun * then it may be subject to the slot pointer being visible
515*4882a593Smuzhiyun * to callers dereferencing it. If item corresponding to
516*4882a593Smuzhiyun * slot[0] is subsequently deleted, these callers would expect
517*4882a593Smuzhiyun * their slot to become empty sooner or later.
518*4882a593Smuzhiyun *
519*4882a593Smuzhiyun * For example, lockless pagecache will look up a slot, deref
520*4882a593Smuzhiyun * the page pointer, and if the page has 0 refcount it means it
521*4882a593Smuzhiyun * was concurrently deleted from pagecache so try the deref
522*4882a593Smuzhiyun * again. Fortunately there is already a requirement for logic
523*4882a593Smuzhiyun * to retry the entire slot lookup -- the indirect pointer
524*4882a593Smuzhiyun * problem (replacing direct root node with an indirect pointer
525*4882a593Smuzhiyun * also results in a stale slot). So tag the slot as indirect
526*4882a593Smuzhiyun * to force callers to retry.
527*4882a593Smuzhiyun */
528*4882a593Smuzhiyun node->count = 0;
529*4882a593Smuzhiyun if (!radix_tree_is_internal_node(child)) {
530*4882a593Smuzhiyun node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
531*4882a593Smuzhiyun }
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun WARN_ON_ONCE(!list_empty(&node->private_list));
534*4882a593Smuzhiyun radix_tree_node_free(node);
535*4882a593Smuzhiyun shrunk = true;
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun return shrunk;
539*4882a593Smuzhiyun }
540*4882a593Smuzhiyun
delete_node(struct radix_tree_root * root,struct radix_tree_node * node)541*4882a593Smuzhiyun static bool delete_node(struct radix_tree_root *root,
542*4882a593Smuzhiyun struct radix_tree_node *node)
543*4882a593Smuzhiyun {
544*4882a593Smuzhiyun bool deleted = false;
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun do {
547*4882a593Smuzhiyun struct radix_tree_node *parent;
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun if (node->count) {
550*4882a593Smuzhiyun if (node_to_entry(node) ==
551*4882a593Smuzhiyun rcu_dereference_raw(root->xa_head))
552*4882a593Smuzhiyun deleted |= radix_tree_shrink(root);
553*4882a593Smuzhiyun return deleted;
554*4882a593Smuzhiyun }
555*4882a593Smuzhiyun
556*4882a593Smuzhiyun parent = node->parent;
557*4882a593Smuzhiyun if (parent) {
558*4882a593Smuzhiyun parent->slots[node->offset] = NULL;
559*4882a593Smuzhiyun parent->count--;
560*4882a593Smuzhiyun } else {
561*4882a593Smuzhiyun /*
562*4882a593Smuzhiyun * Shouldn't the tags already have all been cleared
563*4882a593Smuzhiyun * by the caller?
564*4882a593Smuzhiyun */
565*4882a593Smuzhiyun if (!is_idr(root))
566*4882a593Smuzhiyun root_tag_clear_all(root);
567*4882a593Smuzhiyun root->xa_head = NULL;
568*4882a593Smuzhiyun }
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun WARN_ON_ONCE(!list_empty(&node->private_list));
571*4882a593Smuzhiyun radix_tree_node_free(node);
572*4882a593Smuzhiyun deleted = true;
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun node = parent;
575*4882a593Smuzhiyun } while (node);
576*4882a593Smuzhiyun
577*4882a593Smuzhiyun return deleted;
578*4882a593Smuzhiyun }
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun /**
581*4882a593Smuzhiyun * __radix_tree_create - create a slot in a radix tree
582*4882a593Smuzhiyun * @root: radix tree root
583*4882a593Smuzhiyun * @index: index key
584*4882a593Smuzhiyun * @nodep: returns node
585*4882a593Smuzhiyun * @slotp: returns slot
586*4882a593Smuzhiyun *
587*4882a593Smuzhiyun * Create, if necessary, and return the node and slot for an item
588*4882a593Smuzhiyun * at position @index in the radix tree @root.
589*4882a593Smuzhiyun *
590*4882a593Smuzhiyun * Until there is more than one item in the tree, no nodes are
591*4882a593Smuzhiyun * allocated and @root->xa_head is used as a direct slot instead of
592*4882a593Smuzhiyun * pointing to a node, in which case *@nodep will be NULL.
593*4882a593Smuzhiyun *
594*4882a593Smuzhiyun * Returns -ENOMEM, or 0 for success.
595*4882a593Smuzhiyun */
__radix_tree_create(struct radix_tree_root * root,unsigned long index,struct radix_tree_node ** nodep,void __rcu *** slotp)596*4882a593Smuzhiyun static int __radix_tree_create(struct radix_tree_root *root,
597*4882a593Smuzhiyun unsigned long index, struct radix_tree_node **nodep,
598*4882a593Smuzhiyun void __rcu ***slotp)
599*4882a593Smuzhiyun {
600*4882a593Smuzhiyun struct radix_tree_node *node = NULL, *child;
601*4882a593Smuzhiyun void __rcu **slot = (void __rcu **)&root->xa_head;
602*4882a593Smuzhiyun unsigned long maxindex;
603*4882a593Smuzhiyun unsigned int shift, offset = 0;
604*4882a593Smuzhiyun unsigned long max = index;
605*4882a593Smuzhiyun gfp_t gfp = root_gfp_mask(root);
606*4882a593Smuzhiyun
607*4882a593Smuzhiyun shift = radix_tree_load_root(root, &child, &maxindex);
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun /* Make sure the tree is high enough. */
610*4882a593Smuzhiyun if (max > maxindex) {
611*4882a593Smuzhiyun int error = radix_tree_extend(root, gfp, max, shift);
612*4882a593Smuzhiyun if (error < 0)
613*4882a593Smuzhiyun return error;
614*4882a593Smuzhiyun shift = error;
615*4882a593Smuzhiyun child = rcu_dereference_raw(root->xa_head);
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun while (shift > 0) {
619*4882a593Smuzhiyun shift -= RADIX_TREE_MAP_SHIFT;
620*4882a593Smuzhiyun if (child == NULL) {
621*4882a593Smuzhiyun /* Have to add a child node. */
622*4882a593Smuzhiyun child = radix_tree_node_alloc(gfp, node, root, shift,
623*4882a593Smuzhiyun offset, 0, 0);
624*4882a593Smuzhiyun if (!child)
625*4882a593Smuzhiyun return -ENOMEM;
626*4882a593Smuzhiyun rcu_assign_pointer(*slot, node_to_entry(child));
627*4882a593Smuzhiyun if (node)
628*4882a593Smuzhiyun node->count++;
629*4882a593Smuzhiyun } else if (!radix_tree_is_internal_node(child))
630*4882a593Smuzhiyun break;
631*4882a593Smuzhiyun
632*4882a593Smuzhiyun /* Go a level down */
633*4882a593Smuzhiyun node = entry_to_node(child);
634*4882a593Smuzhiyun offset = radix_tree_descend(node, &child, index);
635*4882a593Smuzhiyun slot = &node->slots[offset];
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun if (nodep)
639*4882a593Smuzhiyun *nodep = node;
640*4882a593Smuzhiyun if (slotp)
641*4882a593Smuzhiyun *slotp = slot;
642*4882a593Smuzhiyun return 0;
643*4882a593Smuzhiyun }
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun /*
646*4882a593Smuzhiyun * Free any nodes below this node. The tree is presumed to not need
647*4882a593Smuzhiyun * shrinking, and any user data in the tree is presumed to not need a
648*4882a593Smuzhiyun * destructor called on it. If we need to add a destructor, we can
649*4882a593Smuzhiyun * add that functionality later. Note that we may not clear tags or
650*4882a593Smuzhiyun * slots from the tree as an RCU walker may still have a pointer into
651*4882a593Smuzhiyun * this subtree. We could replace the entries with RADIX_TREE_RETRY,
652*4882a593Smuzhiyun * but we'll still have to clear those in rcu_free.
653*4882a593Smuzhiyun */
radix_tree_free_nodes(struct radix_tree_node * node)654*4882a593Smuzhiyun static void radix_tree_free_nodes(struct radix_tree_node *node)
655*4882a593Smuzhiyun {
656*4882a593Smuzhiyun unsigned offset = 0;
657*4882a593Smuzhiyun struct radix_tree_node *child = entry_to_node(node);
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun for (;;) {
660*4882a593Smuzhiyun void *entry = rcu_dereference_raw(child->slots[offset]);
661*4882a593Smuzhiyun if (xa_is_node(entry) && child->shift) {
662*4882a593Smuzhiyun child = entry_to_node(entry);
663*4882a593Smuzhiyun offset = 0;
664*4882a593Smuzhiyun continue;
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun offset++;
667*4882a593Smuzhiyun while (offset == RADIX_TREE_MAP_SIZE) {
668*4882a593Smuzhiyun struct radix_tree_node *old = child;
669*4882a593Smuzhiyun offset = child->offset + 1;
670*4882a593Smuzhiyun child = child->parent;
671*4882a593Smuzhiyun WARN_ON_ONCE(!list_empty(&old->private_list));
672*4882a593Smuzhiyun radix_tree_node_free(old);
673*4882a593Smuzhiyun if (old == entry_to_node(node))
674*4882a593Smuzhiyun return;
675*4882a593Smuzhiyun }
676*4882a593Smuzhiyun }
677*4882a593Smuzhiyun }
678*4882a593Smuzhiyun
insert_entries(struct radix_tree_node * node,void __rcu ** slot,void * item,bool replace)679*4882a593Smuzhiyun static inline int insert_entries(struct radix_tree_node *node,
680*4882a593Smuzhiyun void __rcu **slot, void *item, bool replace)
681*4882a593Smuzhiyun {
682*4882a593Smuzhiyun if (*slot)
683*4882a593Smuzhiyun return -EEXIST;
684*4882a593Smuzhiyun rcu_assign_pointer(*slot, item);
685*4882a593Smuzhiyun if (node) {
686*4882a593Smuzhiyun node->count++;
687*4882a593Smuzhiyun if (xa_is_value(item))
688*4882a593Smuzhiyun node->nr_values++;
689*4882a593Smuzhiyun }
690*4882a593Smuzhiyun return 1;
691*4882a593Smuzhiyun }
692*4882a593Smuzhiyun
693*4882a593Smuzhiyun /**
694*4882a593Smuzhiyun * __radix_tree_insert - insert into a radix tree
695*4882a593Smuzhiyun * @root: radix tree root
696*4882a593Smuzhiyun * @index: index key
697*4882a593Smuzhiyun * @item: item to insert
698*4882a593Smuzhiyun *
699*4882a593Smuzhiyun * Insert an item into the radix tree at position @index.
700*4882a593Smuzhiyun */
radix_tree_insert(struct radix_tree_root * root,unsigned long index,void * item)701*4882a593Smuzhiyun int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
702*4882a593Smuzhiyun void *item)
703*4882a593Smuzhiyun {
704*4882a593Smuzhiyun struct radix_tree_node *node;
705*4882a593Smuzhiyun void __rcu **slot;
706*4882a593Smuzhiyun int error;
707*4882a593Smuzhiyun
708*4882a593Smuzhiyun BUG_ON(radix_tree_is_internal_node(item));
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun error = __radix_tree_create(root, index, &node, &slot);
711*4882a593Smuzhiyun if (error)
712*4882a593Smuzhiyun return error;
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun error = insert_entries(node, slot, item, false);
715*4882a593Smuzhiyun if (error < 0)
716*4882a593Smuzhiyun return error;
717*4882a593Smuzhiyun
718*4882a593Smuzhiyun if (node) {
719*4882a593Smuzhiyun unsigned offset = get_slot_offset(node, slot);
720*4882a593Smuzhiyun BUG_ON(tag_get(node, 0, offset));
721*4882a593Smuzhiyun BUG_ON(tag_get(node, 1, offset));
722*4882a593Smuzhiyun BUG_ON(tag_get(node, 2, offset));
723*4882a593Smuzhiyun } else {
724*4882a593Smuzhiyun BUG_ON(root_tags_get(root));
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun
727*4882a593Smuzhiyun return 0;
728*4882a593Smuzhiyun }
729*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_insert);
730*4882a593Smuzhiyun
731*4882a593Smuzhiyun /**
732*4882a593Smuzhiyun * __radix_tree_lookup - lookup an item in a radix tree
733*4882a593Smuzhiyun * @root: radix tree root
734*4882a593Smuzhiyun * @index: index key
735*4882a593Smuzhiyun * @nodep: returns node
736*4882a593Smuzhiyun * @slotp: returns slot
737*4882a593Smuzhiyun *
738*4882a593Smuzhiyun * Lookup and return the item at position @index in the radix
739*4882a593Smuzhiyun * tree @root.
740*4882a593Smuzhiyun *
741*4882a593Smuzhiyun * Until there is more than one item in the tree, no nodes are
742*4882a593Smuzhiyun * allocated and @root->xa_head is used as a direct slot instead of
743*4882a593Smuzhiyun * pointing to a node, in which case *@nodep will be NULL.
744*4882a593Smuzhiyun */
__radix_tree_lookup(const struct radix_tree_root * root,unsigned long index,struct radix_tree_node ** nodep,void __rcu *** slotp)745*4882a593Smuzhiyun void *__radix_tree_lookup(const struct radix_tree_root *root,
746*4882a593Smuzhiyun unsigned long index, struct radix_tree_node **nodep,
747*4882a593Smuzhiyun void __rcu ***slotp)
748*4882a593Smuzhiyun {
749*4882a593Smuzhiyun struct radix_tree_node *node, *parent;
750*4882a593Smuzhiyun unsigned long maxindex;
751*4882a593Smuzhiyun void __rcu **slot;
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun restart:
754*4882a593Smuzhiyun parent = NULL;
755*4882a593Smuzhiyun slot = (void __rcu **)&root->xa_head;
756*4882a593Smuzhiyun radix_tree_load_root(root, &node, &maxindex);
757*4882a593Smuzhiyun if (index > maxindex)
758*4882a593Smuzhiyun return NULL;
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun while (radix_tree_is_internal_node(node)) {
761*4882a593Smuzhiyun unsigned offset;
762*4882a593Smuzhiyun
763*4882a593Smuzhiyun parent = entry_to_node(node);
764*4882a593Smuzhiyun offset = radix_tree_descend(parent, &node, index);
765*4882a593Smuzhiyun slot = parent->slots + offset;
766*4882a593Smuzhiyun if (node == RADIX_TREE_RETRY)
767*4882a593Smuzhiyun goto restart;
768*4882a593Smuzhiyun if (parent->shift == 0)
769*4882a593Smuzhiyun break;
770*4882a593Smuzhiyun }
771*4882a593Smuzhiyun
772*4882a593Smuzhiyun if (nodep)
773*4882a593Smuzhiyun *nodep = parent;
774*4882a593Smuzhiyun if (slotp)
775*4882a593Smuzhiyun *slotp = slot;
776*4882a593Smuzhiyun return node;
777*4882a593Smuzhiyun }
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun /**
780*4882a593Smuzhiyun * radix_tree_lookup_slot - lookup a slot in a radix tree
781*4882a593Smuzhiyun * @root: radix tree root
782*4882a593Smuzhiyun * @index: index key
783*4882a593Smuzhiyun *
784*4882a593Smuzhiyun * Returns: the slot corresponding to the position @index in the
785*4882a593Smuzhiyun * radix tree @root. This is useful for update-if-exists operations.
786*4882a593Smuzhiyun *
787*4882a593Smuzhiyun * This function can be called under rcu_read_lock iff the slot is not
788*4882a593Smuzhiyun * modified by radix_tree_replace_slot, otherwise it must be called
789*4882a593Smuzhiyun * exclusive from other writers. Any dereference of the slot must be done
790*4882a593Smuzhiyun * using radix_tree_deref_slot.
791*4882a593Smuzhiyun */
radix_tree_lookup_slot(const struct radix_tree_root * root,unsigned long index)792*4882a593Smuzhiyun void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
793*4882a593Smuzhiyun unsigned long index)
794*4882a593Smuzhiyun {
795*4882a593Smuzhiyun void __rcu **slot;
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun if (!__radix_tree_lookup(root, index, NULL, &slot))
798*4882a593Smuzhiyun return NULL;
799*4882a593Smuzhiyun return slot;
800*4882a593Smuzhiyun }
801*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_lookup_slot);
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun /**
804*4882a593Smuzhiyun * radix_tree_lookup - perform lookup operation on a radix tree
805*4882a593Smuzhiyun * @root: radix tree root
806*4882a593Smuzhiyun * @index: index key
807*4882a593Smuzhiyun *
808*4882a593Smuzhiyun * Lookup the item at the position @index in the radix tree @root.
809*4882a593Smuzhiyun *
810*4882a593Smuzhiyun * This function can be called under rcu_read_lock, however the caller
811*4882a593Smuzhiyun * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
812*4882a593Smuzhiyun * them safely). No RCU barriers are required to access or modify the
813*4882a593Smuzhiyun * returned item, however.
814*4882a593Smuzhiyun */
radix_tree_lookup(const struct radix_tree_root * root,unsigned long index)815*4882a593Smuzhiyun void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
816*4882a593Smuzhiyun {
817*4882a593Smuzhiyun return __radix_tree_lookup(root, index, NULL, NULL);
818*4882a593Smuzhiyun }
819*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_lookup);
820*4882a593Smuzhiyun
replace_slot(void __rcu ** slot,void * item,struct radix_tree_node * node,int count,int values)821*4882a593Smuzhiyun static void replace_slot(void __rcu **slot, void *item,
822*4882a593Smuzhiyun struct radix_tree_node *node, int count, int values)
823*4882a593Smuzhiyun {
824*4882a593Smuzhiyun if (node && (count || values)) {
825*4882a593Smuzhiyun node->count += count;
826*4882a593Smuzhiyun node->nr_values += values;
827*4882a593Smuzhiyun }
828*4882a593Smuzhiyun
829*4882a593Smuzhiyun rcu_assign_pointer(*slot, item);
830*4882a593Smuzhiyun }
831*4882a593Smuzhiyun
node_tag_get(const struct radix_tree_root * root,const struct radix_tree_node * node,unsigned int tag,unsigned int offset)832*4882a593Smuzhiyun static bool node_tag_get(const struct radix_tree_root *root,
833*4882a593Smuzhiyun const struct radix_tree_node *node,
834*4882a593Smuzhiyun unsigned int tag, unsigned int offset)
835*4882a593Smuzhiyun {
836*4882a593Smuzhiyun if (node)
837*4882a593Smuzhiyun return tag_get(node, tag, offset);
838*4882a593Smuzhiyun return root_tag_get(root, tag);
839*4882a593Smuzhiyun }
840*4882a593Smuzhiyun
841*4882a593Smuzhiyun /*
842*4882a593Smuzhiyun * IDR users want to be able to store NULL in the tree, so if the slot isn't
843*4882a593Smuzhiyun * free, don't adjust the count, even if it's transitioning between NULL and
844*4882a593Smuzhiyun * non-NULL. For the IDA, we mark slots as being IDR_FREE while they still
845*4882a593Smuzhiyun * have empty bits, but it only stores NULL in slots when they're being
846*4882a593Smuzhiyun * deleted.
847*4882a593Smuzhiyun */
calculate_count(struct radix_tree_root * root,struct radix_tree_node * node,void __rcu ** slot,void * item,void * old)848*4882a593Smuzhiyun static int calculate_count(struct radix_tree_root *root,
849*4882a593Smuzhiyun struct radix_tree_node *node, void __rcu **slot,
850*4882a593Smuzhiyun void *item, void *old)
851*4882a593Smuzhiyun {
852*4882a593Smuzhiyun if (is_idr(root)) {
853*4882a593Smuzhiyun unsigned offset = get_slot_offset(node, slot);
854*4882a593Smuzhiyun bool free = node_tag_get(root, node, IDR_FREE, offset);
855*4882a593Smuzhiyun if (!free)
856*4882a593Smuzhiyun return 0;
857*4882a593Smuzhiyun if (!old)
858*4882a593Smuzhiyun return 1;
859*4882a593Smuzhiyun }
860*4882a593Smuzhiyun return !!item - !!old;
861*4882a593Smuzhiyun }
862*4882a593Smuzhiyun
863*4882a593Smuzhiyun /**
864*4882a593Smuzhiyun * __radix_tree_replace - replace item in a slot
865*4882a593Smuzhiyun * @root: radix tree root
866*4882a593Smuzhiyun * @node: pointer to tree node
867*4882a593Smuzhiyun * @slot: pointer to slot in @node
868*4882a593Smuzhiyun * @item: new item to store in the slot.
869*4882a593Smuzhiyun *
870*4882a593Smuzhiyun * For use with __radix_tree_lookup(). Caller must hold tree write locked
871*4882a593Smuzhiyun * across slot lookup and replacement.
872*4882a593Smuzhiyun */
__radix_tree_replace(struct radix_tree_root * root,struct radix_tree_node * node,void __rcu ** slot,void * item)873*4882a593Smuzhiyun void __radix_tree_replace(struct radix_tree_root *root,
874*4882a593Smuzhiyun struct radix_tree_node *node,
875*4882a593Smuzhiyun void __rcu **slot, void *item)
876*4882a593Smuzhiyun {
877*4882a593Smuzhiyun void *old = rcu_dereference_raw(*slot);
878*4882a593Smuzhiyun int values = !!xa_is_value(item) - !!xa_is_value(old);
879*4882a593Smuzhiyun int count = calculate_count(root, node, slot, item, old);
880*4882a593Smuzhiyun
881*4882a593Smuzhiyun /*
882*4882a593Smuzhiyun * This function supports replacing value entries and
883*4882a593Smuzhiyun * deleting entries, but that needs accounting against the
884*4882a593Smuzhiyun * node unless the slot is root->xa_head.
885*4882a593Smuzhiyun */
886*4882a593Smuzhiyun WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
887*4882a593Smuzhiyun (count || values));
888*4882a593Smuzhiyun replace_slot(slot, item, node, count, values);
889*4882a593Smuzhiyun
890*4882a593Smuzhiyun if (!node)
891*4882a593Smuzhiyun return;
892*4882a593Smuzhiyun
893*4882a593Smuzhiyun delete_node(root, node);
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun
896*4882a593Smuzhiyun /**
897*4882a593Smuzhiyun * radix_tree_replace_slot - replace item in a slot
898*4882a593Smuzhiyun * @root: radix tree root
899*4882a593Smuzhiyun * @slot: pointer to slot
900*4882a593Smuzhiyun * @item: new item to store in the slot.
901*4882a593Smuzhiyun *
902*4882a593Smuzhiyun * For use with radix_tree_lookup_slot() and
903*4882a593Smuzhiyun * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
904*4882a593Smuzhiyun * across slot lookup and replacement.
905*4882a593Smuzhiyun *
906*4882a593Smuzhiyun * NOTE: This cannot be used to switch between non-entries (empty slots),
907*4882a593Smuzhiyun * regular entries, and value entries, as that requires accounting
908*4882a593Smuzhiyun * inside the radix tree node. When switching from one type of entry or
909*4882a593Smuzhiyun * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
910*4882a593Smuzhiyun * radix_tree_iter_replace().
911*4882a593Smuzhiyun */
radix_tree_replace_slot(struct radix_tree_root * root,void __rcu ** slot,void * item)912*4882a593Smuzhiyun void radix_tree_replace_slot(struct radix_tree_root *root,
913*4882a593Smuzhiyun void __rcu **slot, void *item)
914*4882a593Smuzhiyun {
915*4882a593Smuzhiyun __radix_tree_replace(root, NULL, slot, item);
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_replace_slot);
918*4882a593Smuzhiyun
919*4882a593Smuzhiyun /**
920*4882a593Smuzhiyun * radix_tree_iter_replace - replace item in a slot
921*4882a593Smuzhiyun * @root: radix tree root
922*4882a593Smuzhiyun * @slot: pointer to slot
923*4882a593Smuzhiyun * @item: new item to store in the slot.
924*4882a593Smuzhiyun *
925*4882a593Smuzhiyun * For use with radix_tree_for_each_slot().
926*4882a593Smuzhiyun * Caller must hold tree write locked.
927*4882a593Smuzhiyun */
radix_tree_iter_replace(struct radix_tree_root * root,const struct radix_tree_iter * iter,void __rcu ** slot,void * item)928*4882a593Smuzhiyun void radix_tree_iter_replace(struct radix_tree_root *root,
929*4882a593Smuzhiyun const struct radix_tree_iter *iter,
930*4882a593Smuzhiyun void __rcu **slot, void *item)
931*4882a593Smuzhiyun {
932*4882a593Smuzhiyun __radix_tree_replace(root, iter->node, slot, item);
933*4882a593Smuzhiyun }
934*4882a593Smuzhiyun
node_tag_set(struct radix_tree_root * root,struct radix_tree_node * node,unsigned int tag,unsigned int offset)935*4882a593Smuzhiyun static void node_tag_set(struct radix_tree_root *root,
936*4882a593Smuzhiyun struct radix_tree_node *node,
937*4882a593Smuzhiyun unsigned int tag, unsigned int offset)
938*4882a593Smuzhiyun {
939*4882a593Smuzhiyun while (node) {
940*4882a593Smuzhiyun if (tag_get(node, tag, offset))
941*4882a593Smuzhiyun return;
942*4882a593Smuzhiyun tag_set(node, tag, offset);
943*4882a593Smuzhiyun offset = node->offset;
944*4882a593Smuzhiyun node = node->parent;
945*4882a593Smuzhiyun }
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun if (!root_tag_get(root, tag))
948*4882a593Smuzhiyun root_tag_set(root, tag);
949*4882a593Smuzhiyun }
950*4882a593Smuzhiyun
951*4882a593Smuzhiyun /**
952*4882a593Smuzhiyun * radix_tree_tag_set - set a tag on a radix tree node
953*4882a593Smuzhiyun * @root: radix tree root
954*4882a593Smuzhiyun * @index: index key
955*4882a593Smuzhiyun * @tag: tag index
956*4882a593Smuzhiyun *
957*4882a593Smuzhiyun * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
958*4882a593Smuzhiyun * corresponding to @index in the radix tree. From
959*4882a593Smuzhiyun * the root all the way down to the leaf node.
960*4882a593Smuzhiyun *
961*4882a593Smuzhiyun * Returns the address of the tagged item. Setting a tag on a not-present
962*4882a593Smuzhiyun * item is a bug.
963*4882a593Smuzhiyun */
radix_tree_tag_set(struct radix_tree_root * root,unsigned long index,unsigned int tag)964*4882a593Smuzhiyun void *radix_tree_tag_set(struct radix_tree_root *root,
965*4882a593Smuzhiyun unsigned long index, unsigned int tag)
966*4882a593Smuzhiyun {
967*4882a593Smuzhiyun struct radix_tree_node *node, *parent;
968*4882a593Smuzhiyun unsigned long maxindex;
969*4882a593Smuzhiyun
970*4882a593Smuzhiyun radix_tree_load_root(root, &node, &maxindex);
971*4882a593Smuzhiyun BUG_ON(index > maxindex);
972*4882a593Smuzhiyun
973*4882a593Smuzhiyun while (radix_tree_is_internal_node(node)) {
974*4882a593Smuzhiyun unsigned offset;
975*4882a593Smuzhiyun
976*4882a593Smuzhiyun parent = entry_to_node(node);
977*4882a593Smuzhiyun offset = radix_tree_descend(parent, &node, index);
978*4882a593Smuzhiyun BUG_ON(!node);
979*4882a593Smuzhiyun
980*4882a593Smuzhiyun if (!tag_get(parent, tag, offset))
981*4882a593Smuzhiyun tag_set(parent, tag, offset);
982*4882a593Smuzhiyun }
983*4882a593Smuzhiyun
984*4882a593Smuzhiyun /* set the root's tag bit */
985*4882a593Smuzhiyun if (!root_tag_get(root, tag))
986*4882a593Smuzhiyun root_tag_set(root, tag);
987*4882a593Smuzhiyun
988*4882a593Smuzhiyun return node;
989*4882a593Smuzhiyun }
990*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_tag_set);
991*4882a593Smuzhiyun
node_tag_clear(struct radix_tree_root * root,struct radix_tree_node * node,unsigned int tag,unsigned int offset)992*4882a593Smuzhiyun static void node_tag_clear(struct radix_tree_root *root,
993*4882a593Smuzhiyun struct radix_tree_node *node,
994*4882a593Smuzhiyun unsigned int tag, unsigned int offset)
995*4882a593Smuzhiyun {
996*4882a593Smuzhiyun while (node) {
997*4882a593Smuzhiyun if (!tag_get(node, tag, offset))
998*4882a593Smuzhiyun return;
999*4882a593Smuzhiyun tag_clear(node, tag, offset);
1000*4882a593Smuzhiyun if (any_tag_set(node, tag))
1001*4882a593Smuzhiyun return;
1002*4882a593Smuzhiyun
1003*4882a593Smuzhiyun offset = node->offset;
1004*4882a593Smuzhiyun node = node->parent;
1005*4882a593Smuzhiyun }
1006*4882a593Smuzhiyun
1007*4882a593Smuzhiyun /* clear the root's tag bit */
1008*4882a593Smuzhiyun if (root_tag_get(root, tag))
1009*4882a593Smuzhiyun root_tag_clear(root, tag);
1010*4882a593Smuzhiyun }
1011*4882a593Smuzhiyun
1012*4882a593Smuzhiyun /**
1013*4882a593Smuzhiyun * radix_tree_tag_clear - clear a tag on a radix tree node
1014*4882a593Smuzhiyun * @root: radix tree root
1015*4882a593Smuzhiyun * @index: index key
1016*4882a593Smuzhiyun * @tag: tag index
1017*4882a593Smuzhiyun *
1018*4882a593Smuzhiyun * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
1019*4882a593Smuzhiyun * corresponding to @index in the radix tree. If this causes
1020*4882a593Smuzhiyun * the leaf node to have no tags set then clear the tag in the
1021*4882a593Smuzhiyun * next-to-leaf node, etc.
1022*4882a593Smuzhiyun *
1023*4882a593Smuzhiyun * Returns the address of the tagged item on success, else NULL. ie:
1024*4882a593Smuzhiyun * has the same return value and semantics as radix_tree_lookup().
1025*4882a593Smuzhiyun */
radix_tree_tag_clear(struct radix_tree_root * root,unsigned long index,unsigned int tag)1026*4882a593Smuzhiyun void *radix_tree_tag_clear(struct radix_tree_root *root,
1027*4882a593Smuzhiyun unsigned long index, unsigned int tag)
1028*4882a593Smuzhiyun {
1029*4882a593Smuzhiyun struct radix_tree_node *node, *parent;
1030*4882a593Smuzhiyun unsigned long maxindex;
1031*4882a593Smuzhiyun int offset;
1032*4882a593Smuzhiyun
1033*4882a593Smuzhiyun radix_tree_load_root(root, &node, &maxindex);
1034*4882a593Smuzhiyun if (index > maxindex)
1035*4882a593Smuzhiyun return NULL;
1036*4882a593Smuzhiyun
1037*4882a593Smuzhiyun parent = NULL;
1038*4882a593Smuzhiyun
1039*4882a593Smuzhiyun while (radix_tree_is_internal_node(node)) {
1040*4882a593Smuzhiyun parent = entry_to_node(node);
1041*4882a593Smuzhiyun offset = radix_tree_descend(parent, &node, index);
1042*4882a593Smuzhiyun }
1043*4882a593Smuzhiyun
1044*4882a593Smuzhiyun if (node)
1045*4882a593Smuzhiyun node_tag_clear(root, parent, tag, offset);
1046*4882a593Smuzhiyun
1047*4882a593Smuzhiyun return node;
1048*4882a593Smuzhiyun }
1049*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_tag_clear);
1050*4882a593Smuzhiyun
1051*4882a593Smuzhiyun /**
1052*4882a593Smuzhiyun * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
1053*4882a593Smuzhiyun * @root: radix tree root
1054*4882a593Smuzhiyun * @iter: iterator state
1055*4882a593Smuzhiyun * @tag: tag to clear
1056*4882a593Smuzhiyun */
radix_tree_iter_tag_clear(struct radix_tree_root * root,const struct radix_tree_iter * iter,unsigned int tag)1057*4882a593Smuzhiyun void radix_tree_iter_tag_clear(struct radix_tree_root *root,
1058*4882a593Smuzhiyun const struct radix_tree_iter *iter, unsigned int tag)
1059*4882a593Smuzhiyun {
1060*4882a593Smuzhiyun node_tag_clear(root, iter->node, tag, iter_offset(iter));
1061*4882a593Smuzhiyun }
1062*4882a593Smuzhiyun
1063*4882a593Smuzhiyun /**
1064*4882a593Smuzhiyun * radix_tree_tag_get - get a tag on a radix tree node
1065*4882a593Smuzhiyun * @root: radix tree root
1066*4882a593Smuzhiyun * @index: index key
1067*4882a593Smuzhiyun * @tag: tag index (< RADIX_TREE_MAX_TAGS)
1068*4882a593Smuzhiyun *
1069*4882a593Smuzhiyun * Return values:
1070*4882a593Smuzhiyun *
1071*4882a593Smuzhiyun * 0: tag not present or not set
1072*4882a593Smuzhiyun * 1: tag set
1073*4882a593Smuzhiyun *
1074*4882a593Smuzhiyun * Note that the return value of this function may not be relied on, even if
1075*4882a593Smuzhiyun * the RCU lock is held, unless tag modification and node deletion are excluded
1076*4882a593Smuzhiyun * from concurrency.
1077*4882a593Smuzhiyun */
radix_tree_tag_get(const struct radix_tree_root * root,unsigned long index,unsigned int tag)1078*4882a593Smuzhiyun int radix_tree_tag_get(const struct radix_tree_root *root,
1079*4882a593Smuzhiyun unsigned long index, unsigned int tag)
1080*4882a593Smuzhiyun {
1081*4882a593Smuzhiyun struct radix_tree_node *node, *parent;
1082*4882a593Smuzhiyun unsigned long maxindex;
1083*4882a593Smuzhiyun
1084*4882a593Smuzhiyun if (!root_tag_get(root, tag))
1085*4882a593Smuzhiyun return 0;
1086*4882a593Smuzhiyun
1087*4882a593Smuzhiyun radix_tree_load_root(root, &node, &maxindex);
1088*4882a593Smuzhiyun if (index > maxindex)
1089*4882a593Smuzhiyun return 0;
1090*4882a593Smuzhiyun
1091*4882a593Smuzhiyun while (radix_tree_is_internal_node(node)) {
1092*4882a593Smuzhiyun unsigned offset;
1093*4882a593Smuzhiyun
1094*4882a593Smuzhiyun parent = entry_to_node(node);
1095*4882a593Smuzhiyun offset = radix_tree_descend(parent, &node, index);
1096*4882a593Smuzhiyun
1097*4882a593Smuzhiyun if (!tag_get(parent, tag, offset))
1098*4882a593Smuzhiyun return 0;
1099*4882a593Smuzhiyun if (node == RADIX_TREE_RETRY)
1100*4882a593Smuzhiyun break;
1101*4882a593Smuzhiyun }
1102*4882a593Smuzhiyun
1103*4882a593Smuzhiyun return 1;
1104*4882a593Smuzhiyun }
1105*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_tag_get);
1106*4882a593Smuzhiyun
1107*4882a593Smuzhiyun /* Construct iter->tags bit-mask from node->tags[tag] array */
set_iter_tags(struct radix_tree_iter * iter,struct radix_tree_node * node,unsigned offset,unsigned tag)1108*4882a593Smuzhiyun static void set_iter_tags(struct radix_tree_iter *iter,
1109*4882a593Smuzhiyun struct radix_tree_node *node, unsigned offset,
1110*4882a593Smuzhiyun unsigned tag)
1111*4882a593Smuzhiyun {
1112*4882a593Smuzhiyun unsigned tag_long = offset / BITS_PER_LONG;
1113*4882a593Smuzhiyun unsigned tag_bit = offset % BITS_PER_LONG;
1114*4882a593Smuzhiyun
1115*4882a593Smuzhiyun if (!node) {
1116*4882a593Smuzhiyun iter->tags = 1;
1117*4882a593Smuzhiyun return;
1118*4882a593Smuzhiyun }
1119*4882a593Smuzhiyun
1120*4882a593Smuzhiyun iter->tags = node->tags[tag][tag_long] >> tag_bit;
1121*4882a593Smuzhiyun
1122*4882a593Smuzhiyun /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
1123*4882a593Smuzhiyun if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
1124*4882a593Smuzhiyun /* Pick tags from next element */
1125*4882a593Smuzhiyun if (tag_bit)
1126*4882a593Smuzhiyun iter->tags |= node->tags[tag][tag_long + 1] <<
1127*4882a593Smuzhiyun (BITS_PER_LONG - tag_bit);
1128*4882a593Smuzhiyun /* Clip chunk size, here only BITS_PER_LONG tags */
1129*4882a593Smuzhiyun iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
1130*4882a593Smuzhiyun }
1131*4882a593Smuzhiyun }
1132*4882a593Smuzhiyun
radix_tree_iter_resume(void __rcu ** slot,struct radix_tree_iter * iter)1133*4882a593Smuzhiyun void __rcu **radix_tree_iter_resume(void __rcu **slot,
1134*4882a593Smuzhiyun struct radix_tree_iter *iter)
1135*4882a593Smuzhiyun {
1136*4882a593Smuzhiyun slot++;
1137*4882a593Smuzhiyun iter->index = __radix_tree_iter_add(iter, 1);
1138*4882a593Smuzhiyun iter->next_index = iter->index;
1139*4882a593Smuzhiyun iter->tags = 0;
1140*4882a593Smuzhiyun return NULL;
1141*4882a593Smuzhiyun }
1142*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_iter_resume);
1143*4882a593Smuzhiyun
1144*4882a593Smuzhiyun /**
1145*4882a593Smuzhiyun * radix_tree_next_chunk - find next chunk of slots for iteration
1146*4882a593Smuzhiyun *
1147*4882a593Smuzhiyun * @root: radix tree root
1148*4882a593Smuzhiyun * @iter: iterator state
1149*4882a593Smuzhiyun * @flags: RADIX_TREE_ITER_* flags and tag index
1150*4882a593Smuzhiyun * Returns: pointer to chunk first slot, or NULL if iteration is over
1151*4882a593Smuzhiyun */
radix_tree_next_chunk(const struct radix_tree_root * root,struct radix_tree_iter * iter,unsigned flags)1152*4882a593Smuzhiyun void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
1153*4882a593Smuzhiyun struct radix_tree_iter *iter, unsigned flags)
1154*4882a593Smuzhiyun {
1155*4882a593Smuzhiyun unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
1156*4882a593Smuzhiyun struct radix_tree_node *node, *child;
1157*4882a593Smuzhiyun unsigned long index, offset, maxindex;
1158*4882a593Smuzhiyun
1159*4882a593Smuzhiyun if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
1160*4882a593Smuzhiyun return NULL;
1161*4882a593Smuzhiyun
1162*4882a593Smuzhiyun /*
1163*4882a593Smuzhiyun * Catch next_index overflow after ~0UL. iter->index never overflows
1164*4882a593Smuzhiyun * during iterating; it can be zero only at the beginning.
1165*4882a593Smuzhiyun * And we cannot overflow iter->next_index in a single step,
1166*4882a593Smuzhiyun * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
1167*4882a593Smuzhiyun *
1168*4882a593Smuzhiyun * This condition also used by radix_tree_next_slot() to stop
1169*4882a593Smuzhiyun * contiguous iterating, and forbid switching to the next chunk.
1170*4882a593Smuzhiyun */
1171*4882a593Smuzhiyun index = iter->next_index;
1172*4882a593Smuzhiyun if (!index && iter->index)
1173*4882a593Smuzhiyun return NULL;
1174*4882a593Smuzhiyun
1175*4882a593Smuzhiyun restart:
1176*4882a593Smuzhiyun radix_tree_load_root(root, &child, &maxindex);
1177*4882a593Smuzhiyun if (index > maxindex)
1178*4882a593Smuzhiyun return NULL;
1179*4882a593Smuzhiyun if (!child)
1180*4882a593Smuzhiyun return NULL;
1181*4882a593Smuzhiyun
1182*4882a593Smuzhiyun if (!radix_tree_is_internal_node(child)) {
1183*4882a593Smuzhiyun /* Single-slot tree */
1184*4882a593Smuzhiyun iter->index = index;
1185*4882a593Smuzhiyun iter->next_index = maxindex + 1;
1186*4882a593Smuzhiyun iter->tags = 1;
1187*4882a593Smuzhiyun iter->node = NULL;
1188*4882a593Smuzhiyun return (void __rcu **)&root->xa_head;
1189*4882a593Smuzhiyun }
1190*4882a593Smuzhiyun
1191*4882a593Smuzhiyun do {
1192*4882a593Smuzhiyun node = entry_to_node(child);
1193*4882a593Smuzhiyun offset = radix_tree_descend(node, &child, index);
1194*4882a593Smuzhiyun
1195*4882a593Smuzhiyun if ((flags & RADIX_TREE_ITER_TAGGED) ?
1196*4882a593Smuzhiyun !tag_get(node, tag, offset) : !child) {
1197*4882a593Smuzhiyun /* Hole detected */
1198*4882a593Smuzhiyun if (flags & RADIX_TREE_ITER_CONTIG)
1199*4882a593Smuzhiyun return NULL;
1200*4882a593Smuzhiyun
1201*4882a593Smuzhiyun if (flags & RADIX_TREE_ITER_TAGGED)
1202*4882a593Smuzhiyun offset = radix_tree_find_next_bit(node, tag,
1203*4882a593Smuzhiyun offset + 1);
1204*4882a593Smuzhiyun else
1205*4882a593Smuzhiyun while (++offset < RADIX_TREE_MAP_SIZE) {
1206*4882a593Smuzhiyun void *slot = rcu_dereference_raw(
1207*4882a593Smuzhiyun node->slots[offset]);
1208*4882a593Smuzhiyun if (slot)
1209*4882a593Smuzhiyun break;
1210*4882a593Smuzhiyun }
1211*4882a593Smuzhiyun index &= ~node_maxindex(node);
1212*4882a593Smuzhiyun index += offset << node->shift;
1213*4882a593Smuzhiyun /* Overflow after ~0UL */
1214*4882a593Smuzhiyun if (!index)
1215*4882a593Smuzhiyun return NULL;
1216*4882a593Smuzhiyun if (offset == RADIX_TREE_MAP_SIZE)
1217*4882a593Smuzhiyun goto restart;
1218*4882a593Smuzhiyun child = rcu_dereference_raw(node->slots[offset]);
1219*4882a593Smuzhiyun }
1220*4882a593Smuzhiyun
1221*4882a593Smuzhiyun if (!child)
1222*4882a593Smuzhiyun goto restart;
1223*4882a593Smuzhiyun if (child == RADIX_TREE_RETRY)
1224*4882a593Smuzhiyun break;
1225*4882a593Smuzhiyun } while (node->shift && radix_tree_is_internal_node(child));
1226*4882a593Smuzhiyun
1227*4882a593Smuzhiyun /* Update the iterator state */
1228*4882a593Smuzhiyun iter->index = (index &~ node_maxindex(node)) | offset;
1229*4882a593Smuzhiyun iter->next_index = (index | node_maxindex(node)) + 1;
1230*4882a593Smuzhiyun iter->node = node;
1231*4882a593Smuzhiyun
1232*4882a593Smuzhiyun if (flags & RADIX_TREE_ITER_TAGGED)
1233*4882a593Smuzhiyun set_iter_tags(iter, node, offset, tag);
1234*4882a593Smuzhiyun
1235*4882a593Smuzhiyun return node->slots + offset;
1236*4882a593Smuzhiyun }
1237*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_next_chunk);
1238*4882a593Smuzhiyun
1239*4882a593Smuzhiyun /**
1240*4882a593Smuzhiyun * radix_tree_gang_lookup - perform multiple lookup on a radix tree
1241*4882a593Smuzhiyun * @root: radix tree root
1242*4882a593Smuzhiyun * @results: where the results of the lookup are placed
1243*4882a593Smuzhiyun * @first_index: start the lookup from this key
1244*4882a593Smuzhiyun * @max_items: place up to this many items at *results
1245*4882a593Smuzhiyun *
1246*4882a593Smuzhiyun * Performs an index-ascending scan of the tree for present items. Places
1247*4882a593Smuzhiyun * them at *@results and returns the number of items which were placed at
1248*4882a593Smuzhiyun * *@results.
1249*4882a593Smuzhiyun *
1250*4882a593Smuzhiyun * The implementation is naive.
1251*4882a593Smuzhiyun *
1252*4882a593Smuzhiyun * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
1253*4882a593Smuzhiyun * rcu_read_lock. In this case, rather than the returned results being
1254*4882a593Smuzhiyun * an atomic snapshot of the tree at a single point in time, the
1255*4882a593Smuzhiyun * semantics of an RCU protected gang lookup are as though multiple
1256*4882a593Smuzhiyun * radix_tree_lookups have been issued in individual locks, and results
1257*4882a593Smuzhiyun * stored in 'results'.
1258*4882a593Smuzhiyun */
1259*4882a593Smuzhiyun unsigned int
radix_tree_gang_lookup(const struct radix_tree_root * root,void ** results,unsigned long first_index,unsigned int max_items)1260*4882a593Smuzhiyun radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
1261*4882a593Smuzhiyun unsigned long first_index, unsigned int max_items)
1262*4882a593Smuzhiyun {
1263*4882a593Smuzhiyun struct radix_tree_iter iter;
1264*4882a593Smuzhiyun void __rcu **slot;
1265*4882a593Smuzhiyun unsigned int ret = 0;
1266*4882a593Smuzhiyun
1267*4882a593Smuzhiyun if (unlikely(!max_items))
1268*4882a593Smuzhiyun return 0;
1269*4882a593Smuzhiyun
1270*4882a593Smuzhiyun radix_tree_for_each_slot(slot, root, &iter, first_index) {
1271*4882a593Smuzhiyun results[ret] = rcu_dereference_raw(*slot);
1272*4882a593Smuzhiyun if (!results[ret])
1273*4882a593Smuzhiyun continue;
1274*4882a593Smuzhiyun if (radix_tree_is_internal_node(results[ret])) {
1275*4882a593Smuzhiyun slot = radix_tree_iter_retry(&iter);
1276*4882a593Smuzhiyun continue;
1277*4882a593Smuzhiyun }
1278*4882a593Smuzhiyun if (++ret == max_items)
1279*4882a593Smuzhiyun break;
1280*4882a593Smuzhiyun }
1281*4882a593Smuzhiyun
1282*4882a593Smuzhiyun return ret;
1283*4882a593Smuzhiyun }
1284*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_gang_lookup);
1285*4882a593Smuzhiyun
1286*4882a593Smuzhiyun /**
1287*4882a593Smuzhiyun * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
1288*4882a593Smuzhiyun * based on a tag
1289*4882a593Smuzhiyun * @root: radix tree root
1290*4882a593Smuzhiyun * @results: where the results of the lookup are placed
1291*4882a593Smuzhiyun * @first_index: start the lookup from this key
1292*4882a593Smuzhiyun * @max_items: place up to this many items at *results
1293*4882a593Smuzhiyun * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
1294*4882a593Smuzhiyun *
1295*4882a593Smuzhiyun * Performs an index-ascending scan of the tree for present items which
1296*4882a593Smuzhiyun * have the tag indexed by @tag set. Places the items at *@results and
1297*4882a593Smuzhiyun * returns the number of items which were placed at *@results.
1298*4882a593Smuzhiyun */
1299*4882a593Smuzhiyun unsigned int
radix_tree_gang_lookup_tag(const struct radix_tree_root * root,void ** results,unsigned long first_index,unsigned int max_items,unsigned int tag)1300*4882a593Smuzhiyun radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
1301*4882a593Smuzhiyun unsigned long first_index, unsigned int max_items,
1302*4882a593Smuzhiyun unsigned int tag)
1303*4882a593Smuzhiyun {
1304*4882a593Smuzhiyun struct radix_tree_iter iter;
1305*4882a593Smuzhiyun void __rcu **slot;
1306*4882a593Smuzhiyun unsigned int ret = 0;
1307*4882a593Smuzhiyun
1308*4882a593Smuzhiyun if (unlikely(!max_items))
1309*4882a593Smuzhiyun return 0;
1310*4882a593Smuzhiyun
1311*4882a593Smuzhiyun radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1312*4882a593Smuzhiyun results[ret] = rcu_dereference_raw(*slot);
1313*4882a593Smuzhiyun if (!results[ret])
1314*4882a593Smuzhiyun continue;
1315*4882a593Smuzhiyun if (radix_tree_is_internal_node(results[ret])) {
1316*4882a593Smuzhiyun slot = radix_tree_iter_retry(&iter);
1317*4882a593Smuzhiyun continue;
1318*4882a593Smuzhiyun }
1319*4882a593Smuzhiyun if (++ret == max_items)
1320*4882a593Smuzhiyun break;
1321*4882a593Smuzhiyun }
1322*4882a593Smuzhiyun
1323*4882a593Smuzhiyun return ret;
1324*4882a593Smuzhiyun }
1325*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
1326*4882a593Smuzhiyun
1327*4882a593Smuzhiyun /**
1328*4882a593Smuzhiyun * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
1329*4882a593Smuzhiyun * radix tree based on a tag
1330*4882a593Smuzhiyun * @root: radix tree root
1331*4882a593Smuzhiyun * @results: where the results of the lookup are placed
1332*4882a593Smuzhiyun * @first_index: start the lookup from this key
1333*4882a593Smuzhiyun * @max_items: place up to this many items at *results
1334*4882a593Smuzhiyun * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
1335*4882a593Smuzhiyun *
1336*4882a593Smuzhiyun * Performs an index-ascending scan of the tree for present items which
1337*4882a593Smuzhiyun * have the tag indexed by @tag set. Places the slots at *@results and
1338*4882a593Smuzhiyun * returns the number of slots which were placed at *@results.
1339*4882a593Smuzhiyun */
1340*4882a593Smuzhiyun unsigned int
radix_tree_gang_lookup_tag_slot(const struct radix_tree_root * root,void __rcu *** results,unsigned long first_index,unsigned int max_items,unsigned int tag)1341*4882a593Smuzhiyun radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
1342*4882a593Smuzhiyun void __rcu ***results, unsigned long first_index,
1343*4882a593Smuzhiyun unsigned int max_items, unsigned int tag)
1344*4882a593Smuzhiyun {
1345*4882a593Smuzhiyun struct radix_tree_iter iter;
1346*4882a593Smuzhiyun void __rcu **slot;
1347*4882a593Smuzhiyun unsigned int ret = 0;
1348*4882a593Smuzhiyun
1349*4882a593Smuzhiyun if (unlikely(!max_items))
1350*4882a593Smuzhiyun return 0;
1351*4882a593Smuzhiyun
1352*4882a593Smuzhiyun radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1353*4882a593Smuzhiyun results[ret] = slot;
1354*4882a593Smuzhiyun if (++ret == max_items)
1355*4882a593Smuzhiyun break;
1356*4882a593Smuzhiyun }
1357*4882a593Smuzhiyun
1358*4882a593Smuzhiyun return ret;
1359*4882a593Smuzhiyun }
1360*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1361*4882a593Smuzhiyun
__radix_tree_delete(struct radix_tree_root * root,struct radix_tree_node * node,void __rcu ** slot)1362*4882a593Smuzhiyun static bool __radix_tree_delete(struct radix_tree_root *root,
1363*4882a593Smuzhiyun struct radix_tree_node *node, void __rcu **slot)
1364*4882a593Smuzhiyun {
1365*4882a593Smuzhiyun void *old = rcu_dereference_raw(*slot);
1366*4882a593Smuzhiyun int values = xa_is_value(old) ? -1 : 0;
1367*4882a593Smuzhiyun unsigned offset = get_slot_offset(node, slot);
1368*4882a593Smuzhiyun int tag;
1369*4882a593Smuzhiyun
1370*4882a593Smuzhiyun if (is_idr(root))
1371*4882a593Smuzhiyun node_tag_set(root, node, IDR_FREE, offset);
1372*4882a593Smuzhiyun else
1373*4882a593Smuzhiyun for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1374*4882a593Smuzhiyun node_tag_clear(root, node, tag, offset);
1375*4882a593Smuzhiyun
1376*4882a593Smuzhiyun replace_slot(slot, NULL, node, -1, values);
1377*4882a593Smuzhiyun return node && delete_node(root, node);
1378*4882a593Smuzhiyun }
1379*4882a593Smuzhiyun
1380*4882a593Smuzhiyun /**
1381*4882a593Smuzhiyun * radix_tree_iter_delete - delete the entry at this iterator position
1382*4882a593Smuzhiyun * @root: radix tree root
1383*4882a593Smuzhiyun * @iter: iterator state
1384*4882a593Smuzhiyun * @slot: pointer to slot
1385*4882a593Smuzhiyun *
1386*4882a593Smuzhiyun * Delete the entry at the position currently pointed to by the iterator.
1387*4882a593Smuzhiyun * This may result in the current node being freed; if it is, the iterator
1388*4882a593Smuzhiyun * is advanced so that it will not reference the freed memory. This
1389*4882a593Smuzhiyun * function may be called without any locking if there are no other threads
1390*4882a593Smuzhiyun * which can access this tree.
1391*4882a593Smuzhiyun */
radix_tree_iter_delete(struct radix_tree_root * root,struct radix_tree_iter * iter,void __rcu ** slot)1392*4882a593Smuzhiyun void radix_tree_iter_delete(struct radix_tree_root *root,
1393*4882a593Smuzhiyun struct radix_tree_iter *iter, void __rcu **slot)
1394*4882a593Smuzhiyun {
1395*4882a593Smuzhiyun if (__radix_tree_delete(root, iter->node, slot))
1396*4882a593Smuzhiyun iter->index = iter->next_index;
1397*4882a593Smuzhiyun }
1398*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_iter_delete);
1399*4882a593Smuzhiyun
1400*4882a593Smuzhiyun /**
1401*4882a593Smuzhiyun * radix_tree_delete_item - delete an item from a radix tree
1402*4882a593Smuzhiyun * @root: radix tree root
1403*4882a593Smuzhiyun * @index: index key
1404*4882a593Smuzhiyun * @item: expected item
1405*4882a593Smuzhiyun *
1406*4882a593Smuzhiyun * Remove @item at @index from the radix tree rooted at @root.
1407*4882a593Smuzhiyun *
1408*4882a593Smuzhiyun * Return: the deleted entry, or %NULL if it was not present
1409*4882a593Smuzhiyun * or the entry at the given @index was not @item.
1410*4882a593Smuzhiyun */
radix_tree_delete_item(struct radix_tree_root * root,unsigned long index,void * item)1411*4882a593Smuzhiyun void *radix_tree_delete_item(struct radix_tree_root *root,
1412*4882a593Smuzhiyun unsigned long index, void *item)
1413*4882a593Smuzhiyun {
1414*4882a593Smuzhiyun struct radix_tree_node *node = NULL;
1415*4882a593Smuzhiyun void __rcu **slot = NULL;
1416*4882a593Smuzhiyun void *entry;
1417*4882a593Smuzhiyun
1418*4882a593Smuzhiyun entry = __radix_tree_lookup(root, index, &node, &slot);
1419*4882a593Smuzhiyun if (!slot)
1420*4882a593Smuzhiyun return NULL;
1421*4882a593Smuzhiyun if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
1422*4882a593Smuzhiyun get_slot_offset(node, slot))))
1423*4882a593Smuzhiyun return NULL;
1424*4882a593Smuzhiyun
1425*4882a593Smuzhiyun if (item && entry != item)
1426*4882a593Smuzhiyun return NULL;
1427*4882a593Smuzhiyun
1428*4882a593Smuzhiyun __radix_tree_delete(root, node, slot);
1429*4882a593Smuzhiyun
1430*4882a593Smuzhiyun return entry;
1431*4882a593Smuzhiyun }
1432*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_delete_item);
1433*4882a593Smuzhiyun
1434*4882a593Smuzhiyun /**
1435*4882a593Smuzhiyun * radix_tree_delete - delete an entry from a radix tree
1436*4882a593Smuzhiyun * @root: radix tree root
1437*4882a593Smuzhiyun * @index: index key
1438*4882a593Smuzhiyun *
1439*4882a593Smuzhiyun * Remove the entry at @index from the radix tree rooted at @root.
1440*4882a593Smuzhiyun *
1441*4882a593Smuzhiyun * Return: The deleted entry, or %NULL if it was not present.
1442*4882a593Smuzhiyun */
radix_tree_delete(struct radix_tree_root * root,unsigned long index)1443*4882a593Smuzhiyun void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1444*4882a593Smuzhiyun {
1445*4882a593Smuzhiyun return radix_tree_delete_item(root, index, NULL);
1446*4882a593Smuzhiyun }
1447*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_delete);
1448*4882a593Smuzhiyun
1449*4882a593Smuzhiyun /**
1450*4882a593Smuzhiyun * radix_tree_tagged - test whether any items in the tree are tagged
1451*4882a593Smuzhiyun * @root: radix tree root
1452*4882a593Smuzhiyun * @tag: tag to test
1453*4882a593Smuzhiyun */
radix_tree_tagged(const struct radix_tree_root * root,unsigned int tag)1454*4882a593Smuzhiyun int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
1455*4882a593Smuzhiyun {
1456*4882a593Smuzhiyun return root_tag_get(root, tag);
1457*4882a593Smuzhiyun }
1458*4882a593Smuzhiyun EXPORT_SYMBOL(radix_tree_tagged);
1459*4882a593Smuzhiyun
1460*4882a593Smuzhiyun /**
1461*4882a593Smuzhiyun * idr_preload - preload for idr_alloc()
1462*4882a593Smuzhiyun * @gfp_mask: allocation mask to use for preloading
1463*4882a593Smuzhiyun *
1464*4882a593Smuzhiyun * Preallocate memory to use for the next call to idr_alloc(). This function
1465*4882a593Smuzhiyun * returns with preemption disabled. It will be enabled by idr_preload_end().
1466*4882a593Smuzhiyun */
idr_preload(gfp_t gfp_mask)1467*4882a593Smuzhiyun void idr_preload(gfp_t gfp_mask)
1468*4882a593Smuzhiyun {
1469*4882a593Smuzhiyun if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
1470*4882a593Smuzhiyun local_lock(&radix_tree_preloads.lock);
1471*4882a593Smuzhiyun }
1472*4882a593Smuzhiyun EXPORT_SYMBOL(idr_preload);
1473*4882a593Smuzhiyun
idr_get_free(struct radix_tree_root * root,struct radix_tree_iter * iter,gfp_t gfp,unsigned long max)1474*4882a593Smuzhiyun void __rcu **idr_get_free(struct radix_tree_root *root,
1475*4882a593Smuzhiyun struct radix_tree_iter *iter, gfp_t gfp,
1476*4882a593Smuzhiyun unsigned long max)
1477*4882a593Smuzhiyun {
1478*4882a593Smuzhiyun struct radix_tree_node *node = NULL, *child;
1479*4882a593Smuzhiyun void __rcu **slot = (void __rcu **)&root->xa_head;
1480*4882a593Smuzhiyun unsigned long maxindex, start = iter->next_index;
1481*4882a593Smuzhiyun unsigned int shift, offset = 0;
1482*4882a593Smuzhiyun
1483*4882a593Smuzhiyun grow:
1484*4882a593Smuzhiyun shift = radix_tree_load_root(root, &child, &maxindex);
1485*4882a593Smuzhiyun if (!radix_tree_tagged(root, IDR_FREE))
1486*4882a593Smuzhiyun start = max(start, maxindex + 1);
1487*4882a593Smuzhiyun if (start > max)
1488*4882a593Smuzhiyun return ERR_PTR(-ENOSPC);
1489*4882a593Smuzhiyun
1490*4882a593Smuzhiyun if (start > maxindex) {
1491*4882a593Smuzhiyun int error = radix_tree_extend(root, gfp, start, shift);
1492*4882a593Smuzhiyun if (error < 0)
1493*4882a593Smuzhiyun return ERR_PTR(error);
1494*4882a593Smuzhiyun shift = error;
1495*4882a593Smuzhiyun child = rcu_dereference_raw(root->xa_head);
1496*4882a593Smuzhiyun }
1497*4882a593Smuzhiyun if (start == 0 && shift == 0)
1498*4882a593Smuzhiyun shift = RADIX_TREE_MAP_SHIFT;
1499*4882a593Smuzhiyun
1500*4882a593Smuzhiyun while (shift) {
1501*4882a593Smuzhiyun shift -= RADIX_TREE_MAP_SHIFT;
1502*4882a593Smuzhiyun if (child == NULL) {
1503*4882a593Smuzhiyun /* Have to add a child node. */
1504*4882a593Smuzhiyun child = radix_tree_node_alloc(gfp, node, root, shift,
1505*4882a593Smuzhiyun offset, 0, 0);
1506*4882a593Smuzhiyun if (!child)
1507*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
1508*4882a593Smuzhiyun all_tag_set(child, IDR_FREE);
1509*4882a593Smuzhiyun rcu_assign_pointer(*slot, node_to_entry(child));
1510*4882a593Smuzhiyun if (node)
1511*4882a593Smuzhiyun node->count++;
1512*4882a593Smuzhiyun } else if (!radix_tree_is_internal_node(child))
1513*4882a593Smuzhiyun break;
1514*4882a593Smuzhiyun
1515*4882a593Smuzhiyun node = entry_to_node(child);
1516*4882a593Smuzhiyun offset = radix_tree_descend(node, &child, start);
1517*4882a593Smuzhiyun if (!tag_get(node, IDR_FREE, offset)) {
1518*4882a593Smuzhiyun offset = radix_tree_find_next_bit(node, IDR_FREE,
1519*4882a593Smuzhiyun offset + 1);
1520*4882a593Smuzhiyun start = next_index(start, node, offset);
1521*4882a593Smuzhiyun if (start > max || start == 0)
1522*4882a593Smuzhiyun return ERR_PTR(-ENOSPC);
1523*4882a593Smuzhiyun while (offset == RADIX_TREE_MAP_SIZE) {
1524*4882a593Smuzhiyun offset = node->offset + 1;
1525*4882a593Smuzhiyun node = node->parent;
1526*4882a593Smuzhiyun if (!node)
1527*4882a593Smuzhiyun goto grow;
1528*4882a593Smuzhiyun shift = node->shift;
1529*4882a593Smuzhiyun }
1530*4882a593Smuzhiyun child = rcu_dereference_raw(node->slots[offset]);
1531*4882a593Smuzhiyun }
1532*4882a593Smuzhiyun slot = &node->slots[offset];
1533*4882a593Smuzhiyun }
1534*4882a593Smuzhiyun
1535*4882a593Smuzhiyun iter->index = start;
1536*4882a593Smuzhiyun if (node)
1537*4882a593Smuzhiyun iter->next_index = 1 + min(max, (start | node_maxindex(node)));
1538*4882a593Smuzhiyun else
1539*4882a593Smuzhiyun iter->next_index = 1;
1540*4882a593Smuzhiyun iter->node = node;
1541*4882a593Smuzhiyun set_iter_tags(iter, node, offset, IDR_FREE);
1542*4882a593Smuzhiyun
1543*4882a593Smuzhiyun return slot;
1544*4882a593Smuzhiyun }
1545*4882a593Smuzhiyun
1546*4882a593Smuzhiyun /**
1547*4882a593Smuzhiyun * idr_destroy - release all internal memory from an IDR
1548*4882a593Smuzhiyun * @idr: idr handle
1549*4882a593Smuzhiyun *
1550*4882a593Smuzhiyun * After this function is called, the IDR is empty, and may be reused or
1551*4882a593Smuzhiyun * the data structure containing it may be freed.
1552*4882a593Smuzhiyun *
1553*4882a593Smuzhiyun * A typical clean-up sequence for objects stored in an idr tree will use
1554*4882a593Smuzhiyun * idr_for_each() to free all objects, if necessary, then idr_destroy() to
1555*4882a593Smuzhiyun * free the memory used to keep track of those objects.
1556*4882a593Smuzhiyun */
idr_destroy(struct idr * idr)1557*4882a593Smuzhiyun void idr_destroy(struct idr *idr)
1558*4882a593Smuzhiyun {
1559*4882a593Smuzhiyun struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
1560*4882a593Smuzhiyun if (radix_tree_is_internal_node(node))
1561*4882a593Smuzhiyun radix_tree_free_nodes(node);
1562*4882a593Smuzhiyun idr->idr_rt.xa_head = NULL;
1563*4882a593Smuzhiyun root_tag_set(&idr->idr_rt, IDR_FREE);
1564*4882a593Smuzhiyun }
1565*4882a593Smuzhiyun EXPORT_SYMBOL(idr_destroy);
1566*4882a593Smuzhiyun
1567*4882a593Smuzhiyun static void
radix_tree_node_ctor(void * arg)1568*4882a593Smuzhiyun radix_tree_node_ctor(void *arg)
1569*4882a593Smuzhiyun {
1570*4882a593Smuzhiyun struct radix_tree_node *node = arg;
1571*4882a593Smuzhiyun
1572*4882a593Smuzhiyun memset(node, 0, sizeof(*node));
1573*4882a593Smuzhiyun INIT_LIST_HEAD(&node->private_list);
1574*4882a593Smuzhiyun }
1575*4882a593Smuzhiyun
radix_tree_cpu_dead(unsigned int cpu)1576*4882a593Smuzhiyun static int radix_tree_cpu_dead(unsigned int cpu)
1577*4882a593Smuzhiyun {
1578*4882a593Smuzhiyun struct radix_tree_preload *rtp;
1579*4882a593Smuzhiyun struct radix_tree_node *node;
1580*4882a593Smuzhiyun
1581*4882a593Smuzhiyun /* Free per-cpu pool of preloaded nodes */
1582*4882a593Smuzhiyun rtp = &per_cpu(radix_tree_preloads, cpu);
1583*4882a593Smuzhiyun while (rtp->nr) {
1584*4882a593Smuzhiyun node = rtp->nodes;
1585*4882a593Smuzhiyun rtp->nodes = node->parent;
1586*4882a593Smuzhiyun kmem_cache_free(radix_tree_node_cachep, node);
1587*4882a593Smuzhiyun rtp->nr--;
1588*4882a593Smuzhiyun }
1589*4882a593Smuzhiyun return 0;
1590*4882a593Smuzhiyun }
1591*4882a593Smuzhiyun
radix_tree_init(void)1592*4882a593Smuzhiyun void __init radix_tree_init(void)
1593*4882a593Smuzhiyun {
1594*4882a593Smuzhiyun int ret;
1595*4882a593Smuzhiyun
1596*4882a593Smuzhiyun BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
1597*4882a593Smuzhiyun BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
1598*4882a593Smuzhiyun BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
1599*4882a593Smuzhiyun radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1600*4882a593Smuzhiyun sizeof(struct radix_tree_node), 0,
1601*4882a593Smuzhiyun SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1602*4882a593Smuzhiyun radix_tree_node_ctor);
1603*4882a593Smuzhiyun ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
1604*4882a593Smuzhiyun NULL, radix_tree_cpu_dead);
1605*4882a593Smuzhiyun WARN_ON(ret < 0);
1606*4882a593Smuzhiyun }
1607