1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * linux/mm/slab.c
4*4882a593Smuzhiyun * Written by Mark Hemment, 1996/97.
5*4882a593Smuzhiyun * (markhe@nextd.demon.co.uk)
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Major cleanup, different bufctl logic, per-cpu arrays
10*4882a593Smuzhiyun * (c) 2000 Manfred Spraul
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * Cleanup, make the head arrays unconditional, preparation for NUMA
13*4882a593Smuzhiyun * (c) 2002 Manfred Spraul
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * An implementation of the Slab Allocator as described in outline in;
16*4882a593Smuzhiyun * UNIX Internals: The New Frontiers by Uresh Vahalia
17*4882a593Smuzhiyun * Pub: Prentice Hall ISBN 0-13-101908-2
18*4882a593Smuzhiyun * or with a little more detail in;
19*4882a593Smuzhiyun * The Slab Allocator: An Object-Caching Kernel Memory Allocator
20*4882a593Smuzhiyun * Jeff Bonwick (Sun Microsystems).
21*4882a593Smuzhiyun * Presented at: USENIX Summer 1994 Technical Conference
22*4882a593Smuzhiyun *
23*4882a593Smuzhiyun * The memory is organized in caches, one cache for each object type.
24*4882a593Smuzhiyun * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
25*4882a593Smuzhiyun * Each cache consists out of many slabs (they are small (usually one
26*4882a593Smuzhiyun * page long) and always contiguous), and each slab contains multiple
27*4882a593Smuzhiyun * initialized objects.
28*4882a593Smuzhiyun *
29*4882a593Smuzhiyun * This means, that your constructor is used only for newly allocated
30*4882a593Smuzhiyun * slabs and you must pass objects with the same initializations to
31*4882a593Smuzhiyun * kmem_cache_free.
32*4882a593Smuzhiyun *
33*4882a593Smuzhiyun * Each cache can only support one memory type (GFP_DMA, GFP_HIGHMEM,
34*4882a593Smuzhiyun * normal). If you need a special memory type, then must create a new
35*4882a593Smuzhiyun * cache for that memory type.
36*4882a593Smuzhiyun *
37*4882a593Smuzhiyun * In order to reduce fragmentation, the slabs are sorted in 3 groups:
38*4882a593Smuzhiyun * full slabs with 0 free objects
39*4882a593Smuzhiyun * partial slabs
40*4882a593Smuzhiyun * empty slabs with no allocated objects
41*4882a593Smuzhiyun *
42*4882a593Smuzhiyun * If partial slabs exist, then new allocations come from these slabs,
43*4882a593Smuzhiyun * otherwise from empty slabs or new slabs are allocated.
44*4882a593Smuzhiyun *
45*4882a593Smuzhiyun * kmem_cache_destroy() CAN CRASH if you try to allocate from the cache
46*4882a593Smuzhiyun * during kmem_cache_destroy(). The caller must prevent concurrent allocs.
47*4882a593Smuzhiyun *
48*4882a593Smuzhiyun * Each cache has a short per-cpu head array, most allocs
49*4882a593Smuzhiyun * and frees go into that array, and if that array overflows, then 1/2
50*4882a593Smuzhiyun * of the entries in the array are given back into the global cache.
51*4882a593Smuzhiyun * The head array is strictly LIFO and should improve the cache hit rates.
52*4882a593Smuzhiyun * On SMP, it additionally reduces the spinlock operations.
53*4882a593Smuzhiyun *
54*4882a593Smuzhiyun * The c_cpuarray may not be read with enabled local interrupts -
55*4882a593Smuzhiyun * it's changed with a smp_call_function().
56*4882a593Smuzhiyun *
57*4882a593Smuzhiyun * SMP synchronization:
58*4882a593Smuzhiyun * constructors and destructors are called without any locking.
59*4882a593Smuzhiyun * Several members in struct kmem_cache and struct slab never change, they
60*4882a593Smuzhiyun * are accessed without any locking.
61*4882a593Smuzhiyun * The per-cpu arrays are never accessed from the wrong cpu, no locking,
62*4882a593Smuzhiyun * and local interrupts are disabled so slab code is preempt-safe.
63*4882a593Smuzhiyun * The non-constant members are protected with a per-cache irq spinlock.
64*4882a593Smuzhiyun *
65*4882a593Smuzhiyun * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
66*4882a593Smuzhiyun * in 2000 - many ideas in the current implementation are derived from
67*4882a593Smuzhiyun * his patch.
68*4882a593Smuzhiyun *
69*4882a593Smuzhiyun * Further notes from the original documentation:
70*4882a593Smuzhiyun *
71*4882a593Smuzhiyun * 11 April '97. Started multi-threading - markhe
72*4882a593Smuzhiyun * The global cache-chain is protected by the mutex 'slab_mutex'.
73*4882a593Smuzhiyun * The sem is only needed when accessing/extending the cache-chain, which
74*4882a593Smuzhiyun * can never happen inside an interrupt (kmem_cache_create(),
75*4882a593Smuzhiyun * kmem_cache_shrink() and kmem_cache_reap()).
76*4882a593Smuzhiyun *
77*4882a593Smuzhiyun * At present, each engine can be growing a cache. This should be blocked.
78*4882a593Smuzhiyun *
79*4882a593Smuzhiyun * 15 March 2005. NUMA slab allocator.
80*4882a593Smuzhiyun * Shai Fultheim <shai@scalex86.org>.
81*4882a593Smuzhiyun * Shobhit Dayal <shobhit@calsoftinc.com>
82*4882a593Smuzhiyun * Alok N Kataria <alokk@calsoftinc.com>
83*4882a593Smuzhiyun * Christoph Lameter <christoph@lameter.com>
84*4882a593Smuzhiyun *
85*4882a593Smuzhiyun * Modified the slab allocator to be node aware on NUMA systems.
86*4882a593Smuzhiyun * Each node has its own list of partial, free and full slabs.
87*4882a593Smuzhiyun * All object allocations for a node occur from node specific slab lists.
88*4882a593Smuzhiyun */
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun #include <linux/slab.h>
91*4882a593Smuzhiyun #include <linux/mm.h>
92*4882a593Smuzhiyun #include <linux/poison.h>
93*4882a593Smuzhiyun #include <linux/swap.h>
94*4882a593Smuzhiyun #include <linux/cache.h>
95*4882a593Smuzhiyun #include <linux/interrupt.h>
96*4882a593Smuzhiyun #include <linux/init.h>
97*4882a593Smuzhiyun #include <linux/compiler.h>
98*4882a593Smuzhiyun #include <linux/cpuset.h>
99*4882a593Smuzhiyun #include <linux/proc_fs.h>
100*4882a593Smuzhiyun #include <linux/seq_file.h>
101*4882a593Smuzhiyun #include <linux/notifier.h>
102*4882a593Smuzhiyun #include <linux/kallsyms.h>
103*4882a593Smuzhiyun #include <linux/kfence.h>
104*4882a593Smuzhiyun #include <linux/cpu.h>
105*4882a593Smuzhiyun #include <linux/sysctl.h>
106*4882a593Smuzhiyun #include <linux/module.h>
107*4882a593Smuzhiyun #include <linux/rcupdate.h>
108*4882a593Smuzhiyun #include <linux/string.h>
109*4882a593Smuzhiyun #include <linux/uaccess.h>
110*4882a593Smuzhiyun #include <linux/nodemask.h>
111*4882a593Smuzhiyun #include <linux/kmemleak.h>
112*4882a593Smuzhiyun #include <linux/mempolicy.h>
113*4882a593Smuzhiyun #include <linux/mutex.h>
114*4882a593Smuzhiyun #include <linux/fault-inject.h>
115*4882a593Smuzhiyun #include <linux/rtmutex.h>
116*4882a593Smuzhiyun #include <linux/reciprocal_div.h>
117*4882a593Smuzhiyun #include <linux/debugobjects.h>
118*4882a593Smuzhiyun #include <linux/memory.h>
119*4882a593Smuzhiyun #include <linux/prefetch.h>
120*4882a593Smuzhiyun #include <linux/sched/task_stack.h>
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun #include <net/sock.h>
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun #include <asm/cacheflush.h>
125*4882a593Smuzhiyun #include <asm/tlbflush.h>
126*4882a593Smuzhiyun #include <asm/page.h>
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun #include <trace/events/kmem.h>
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun #include "internal.h"
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun #include "slab.h"
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun /*
135*4882a593Smuzhiyun * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
136*4882a593Smuzhiyun * 0 for faster, smaller code (especially in the critical paths).
137*4882a593Smuzhiyun *
138*4882a593Smuzhiyun * STATS - 1 to collect stats for /proc/slabinfo.
139*4882a593Smuzhiyun * 0 for faster, smaller code (especially in the critical paths).
140*4882a593Smuzhiyun *
141*4882a593Smuzhiyun * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
142*4882a593Smuzhiyun */
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_SLAB
145*4882a593Smuzhiyun #define DEBUG 1
146*4882a593Smuzhiyun #define STATS 1
147*4882a593Smuzhiyun #define FORCED_DEBUG 1
148*4882a593Smuzhiyun #else
149*4882a593Smuzhiyun #define DEBUG 0
150*4882a593Smuzhiyun #define STATS 0
151*4882a593Smuzhiyun #define FORCED_DEBUG 0
152*4882a593Smuzhiyun #endif
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun /* Shouldn't this be in a header file somewhere? */
155*4882a593Smuzhiyun #define BYTES_PER_WORD sizeof(void *)
156*4882a593Smuzhiyun #define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun #ifndef ARCH_KMALLOC_FLAGS
159*4882a593Smuzhiyun #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
160*4882a593Smuzhiyun #endif
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun #define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \
163*4882a593Smuzhiyun <= SLAB_OBJ_MIN_SIZE) ? 1 : 0)
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun #if FREELIST_BYTE_INDEX
166*4882a593Smuzhiyun typedef unsigned char freelist_idx_t;
167*4882a593Smuzhiyun #else
168*4882a593Smuzhiyun typedef unsigned short freelist_idx_t;
169*4882a593Smuzhiyun #endif
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun /*
174*4882a593Smuzhiyun * struct array_cache
175*4882a593Smuzhiyun *
176*4882a593Smuzhiyun * Purpose:
177*4882a593Smuzhiyun * - LIFO ordering, to hand out cache-warm objects from _alloc
178*4882a593Smuzhiyun * - reduce the number of linked list operations
179*4882a593Smuzhiyun * - reduce spinlock operations
180*4882a593Smuzhiyun *
181*4882a593Smuzhiyun * The limit is stored in the per-cpu structure to reduce the data cache
182*4882a593Smuzhiyun * footprint.
183*4882a593Smuzhiyun *
184*4882a593Smuzhiyun */
185*4882a593Smuzhiyun struct array_cache {
186*4882a593Smuzhiyun unsigned int avail;
187*4882a593Smuzhiyun unsigned int limit;
188*4882a593Smuzhiyun unsigned int batchcount;
189*4882a593Smuzhiyun unsigned int touched;
190*4882a593Smuzhiyun void *entry[]; /*
191*4882a593Smuzhiyun * Must have this definition in here for the proper
192*4882a593Smuzhiyun * alignment of array_cache. Also simplifies accessing
193*4882a593Smuzhiyun * the entries.
194*4882a593Smuzhiyun */
195*4882a593Smuzhiyun };
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun struct alien_cache {
198*4882a593Smuzhiyun spinlock_t lock;
199*4882a593Smuzhiyun struct array_cache ac;
200*4882a593Smuzhiyun };
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun /*
203*4882a593Smuzhiyun * Need this for bootstrapping a per node allocator.
204*4882a593Smuzhiyun */
205*4882a593Smuzhiyun #define NUM_INIT_LISTS (2 * MAX_NUMNODES)
206*4882a593Smuzhiyun static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
207*4882a593Smuzhiyun #define CACHE_CACHE 0
208*4882a593Smuzhiyun #define SIZE_NODE (MAX_NUMNODES)
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun static int drain_freelist(struct kmem_cache *cache,
211*4882a593Smuzhiyun struct kmem_cache_node *n, int tofree);
212*4882a593Smuzhiyun static void free_block(struct kmem_cache *cachep, void **objpp, int len,
213*4882a593Smuzhiyun int node, struct list_head *list);
214*4882a593Smuzhiyun static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
215*4882a593Smuzhiyun static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
216*4882a593Smuzhiyun static void cache_reap(struct work_struct *unused);
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
219*4882a593Smuzhiyun void **list);
220*4882a593Smuzhiyun static inline void fixup_slab_list(struct kmem_cache *cachep,
221*4882a593Smuzhiyun struct kmem_cache_node *n, struct page *page,
222*4882a593Smuzhiyun void **list);
223*4882a593Smuzhiyun static int slab_early_init = 1;
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
226*4882a593Smuzhiyun
kmem_cache_node_init(struct kmem_cache_node * parent)227*4882a593Smuzhiyun static void kmem_cache_node_init(struct kmem_cache_node *parent)
228*4882a593Smuzhiyun {
229*4882a593Smuzhiyun INIT_LIST_HEAD(&parent->slabs_full);
230*4882a593Smuzhiyun INIT_LIST_HEAD(&parent->slabs_partial);
231*4882a593Smuzhiyun INIT_LIST_HEAD(&parent->slabs_free);
232*4882a593Smuzhiyun parent->total_slabs = 0;
233*4882a593Smuzhiyun parent->free_slabs = 0;
234*4882a593Smuzhiyun parent->shared = NULL;
235*4882a593Smuzhiyun parent->alien = NULL;
236*4882a593Smuzhiyun parent->colour_next = 0;
237*4882a593Smuzhiyun spin_lock_init(&parent->list_lock);
238*4882a593Smuzhiyun parent->free_objects = 0;
239*4882a593Smuzhiyun parent->free_touched = 0;
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun #define MAKE_LIST(cachep, listp, slab, nodeid) \
243*4882a593Smuzhiyun do { \
244*4882a593Smuzhiyun INIT_LIST_HEAD(listp); \
245*4882a593Smuzhiyun list_splice(&get_node(cachep, nodeid)->slab, listp); \
246*4882a593Smuzhiyun } while (0)
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
249*4882a593Smuzhiyun do { \
250*4882a593Smuzhiyun MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
251*4882a593Smuzhiyun MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
252*4882a593Smuzhiyun MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
253*4882a593Smuzhiyun } while (0)
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun #define CFLGS_OBJFREELIST_SLAB ((slab_flags_t __force)0x40000000U)
256*4882a593Smuzhiyun #define CFLGS_OFF_SLAB ((slab_flags_t __force)0x80000000U)
257*4882a593Smuzhiyun #define OBJFREELIST_SLAB(x) ((x)->flags & CFLGS_OBJFREELIST_SLAB)
258*4882a593Smuzhiyun #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun #define BATCHREFILL_LIMIT 16
261*4882a593Smuzhiyun /*
262*4882a593Smuzhiyun * Optimization question: fewer reaps means less probability for unnessary
263*4882a593Smuzhiyun * cpucache drain/refill cycles.
264*4882a593Smuzhiyun *
265*4882a593Smuzhiyun * OTOH the cpuarrays can contain lots of objects,
266*4882a593Smuzhiyun * which could lock up otherwise freeable slabs.
267*4882a593Smuzhiyun */
268*4882a593Smuzhiyun #define REAPTIMEOUT_AC (2*HZ)
269*4882a593Smuzhiyun #define REAPTIMEOUT_NODE (4*HZ)
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun #if STATS
272*4882a593Smuzhiyun #define STATS_INC_ACTIVE(x) ((x)->num_active++)
273*4882a593Smuzhiyun #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
274*4882a593Smuzhiyun #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
275*4882a593Smuzhiyun #define STATS_INC_GROWN(x) ((x)->grown++)
276*4882a593Smuzhiyun #define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
277*4882a593Smuzhiyun #define STATS_SET_HIGH(x) \
278*4882a593Smuzhiyun do { \
279*4882a593Smuzhiyun if ((x)->num_active > (x)->high_mark) \
280*4882a593Smuzhiyun (x)->high_mark = (x)->num_active; \
281*4882a593Smuzhiyun } while (0)
282*4882a593Smuzhiyun #define STATS_INC_ERR(x) ((x)->errors++)
283*4882a593Smuzhiyun #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
284*4882a593Smuzhiyun #define STATS_INC_NODEFREES(x) ((x)->node_frees++)
285*4882a593Smuzhiyun #define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
286*4882a593Smuzhiyun #define STATS_SET_FREEABLE(x, i) \
287*4882a593Smuzhiyun do { \
288*4882a593Smuzhiyun if ((x)->max_freeable < i) \
289*4882a593Smuzhiyun (x)->max_freeable = i; \
290*4882a593Smuzhiyun } while (0)
291*4882a593Smuzhiyun #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
292*4882a593Smuzhiyun #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
293*4882a593Smuzhiyun #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
294*4882a593Smuzhiyun #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
295*4882a593Smuzhiyun #else
296*4882a593Smuzhiyun #define STATS_INC_ACTIVE(x) do { } while (0)
297*4882a593Smuzhiyun #define STATS_DEC_ACTIVE(x) do { } while (0)
298*4882a593Smuzhiyun #define STATS_INC_ALLOCED(x) do { } while (0)
299*4882a593Smuzhiyun #define STATS_INC_GROWN(x) do { } while (0)
300*4882a593Smuzhiyun #define STATS_ADD_REAPED(x,y) do { (void)(y); } while (0)
301*4882a593Smuzhiyun #define STATS_SET_HIGH(x) do { } while (0)
302*4882a593Smuzhiyun #define STATS_INC_ERR(x) do { } while (0)
303*4882a593Smuzhiyun #define STATS_INC_NODEALLOCS(x) do { } while (0)
304*4882a593Smuzhiyun #define STATS_INC_NODEFREES(x) do { } while (0)
305*4882a593Smuzhiyun #define STATS_INC_ACOVERFLOW(x) do { } while (0)
306*4882a593Smuzhiyun #define STATS_SET_FREEABLE(x, i) do { } while (0)
307*4882a593Smuzhiyun #define STATS_INC_ALLOCHIT(x) do { } while (0)
308*4882a593Smuzhiyun #define STATS_INC_ALLOCMISS(x) do { } while (0)
309*4882a593Smuzhiyun #define STATS_INC_FREEHIT(x) do { } while (0)
310*4882a593Smuzhiyun #define STATS_INC_FREEMISS(x) do { } while (0)
311*4882a593Smuzhiyun #endif
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun #if DEBUG
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun /*
316*4882a593Smuzhiyun * memory layout of objects:
317*4882a593Smuzhiyun * 0 : objp
318*4882a593Smuzhiyun * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
319*4882a593Smuzhiyun * the end of an object is aligned with the end of the real
320*4882a593Smuzhiyun * allocation. Catches writes behind the end of the allocation.
321*4882a593Smuzhiyun * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
322*4882a593Smuzhiyun * redzone word.
323*4882a593Smuzhiyun * cachep->obj_offset: The real object.
324*4882a593Smuzhiyun * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
325*4882a593Smuzhiyun * cachep->size - 1* BYTES_PER_WORD: last caller address
326*4882a593Smuzhiyun * [BYTES_PER_WORD long]
327*4882a593Smuzhiyun */
obj_offset(struct kmem_cache * cachep)328*4882a593Smuzhiyun static int obj_offset(struct kmem_cache *cachep)
329*4882a593Smuzhiyun {
330*4882a593Smuzhiyun return cachep->obj_offset;
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun
dbg_redzone1(struct kmem_cache * cachep,void * objp)333*4882a593Smuzhiyun static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
334*4882a593Smuzhiyun {
335*4882a593Smuzhiyun BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
336*4882a593Smuzhiyun return (unsigned long long*) (objp + obj_offset(cachep) -
337*4882a593Smuzhiyun sizeof(unsigned long long));
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun
dbg_redzone2(struct kmem_cache * cachep,void * objp)340*4882a593Smuzhiyun static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
341*4882a593Smuzhiyun {
342*4882a593Smuzhiyun BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
343*4882a593Smuzhiyun if (cachep->flags & SLAB_STORE_USER)
344*4882a593Smuzhiyun return (unsigned long long *)(objp + cachep->size -
345*4882a593Smuzhiyun sizeof(unsigned long long) -
346*4882a593Smuzhiyun REDZONE_ALIGN);
347*4882a593Smuzhiyun return (unsigned long long *) (objp + cachep->size -
348*4882a593Smuzhiyun sizeof(unsigned long long));
349*4882a593Smuzhiyun }
350*4882a593Smuzhiyun
dbg_userword(struct kmem_cache * cachep,void * objp)351*4882a593Smuzhiyun static void **dbg_userword(struct kmem_cache *cachep, void *objp)
352*4882a593Smuzhiyun {
353*4882a593Smuzhiyun BUG_ON(!(cachep->flags & SLAB_STORE_USER));
354*4882a593Smuzhiyun return (void **)(objp + cachep->size - BYTES_PER_WORD);
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun #else
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun #define obj_offset(x) 0
360*4882a593Smuzhiyun #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
361*4882a593Smuzhiyun #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
362*4882a593Smuzhiyun #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun #endif
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun /*
367*4882a593Smuzhiyun * Do not go above this order unless 0 objects fit into the slab or
368*4882a593Smuzhiyun * overridden on the command line.
369*4882a593Smuzhiyun */
370*4882a593Smuzhiyun #define SLAB_MAX_ORDER_HI 1
371*4882a593Smuzhiyun #define SLAB_MAX_ORDER_LO 0
372*4882a593Smuzhiyun static int slab_max_order = SLAB_MAX_ORDER_LO;
373*4882a593Smuzhiyun static bool slab_max_order_set __initdata;
374*4882a593Smuzhiyun
index_to_obj(struct kmem_cache * cache,struct page * page,unsigned int idx)375*4882a593Smuzhiyun static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
376*4882a593Smuzhiyun unsigned int idx)
377*4882a593Smuzhiyun {
378*4882a593Smuzhiyun return page->s_mem + cache->size * idx;
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun #define BOOT_CPUCACHE_ENTRIES 1
382*4882a593Smuzhiyun /* internal cache of cache description objs */
383*4882a593Smuzhiyun static struct kmem_cache kmem_cache_boot = {
384*4882a593Smuzhiyun .batchcount = 1,
385*4882a593Smuzhiyun .limit = BOOT_CPUCACHE_ENTRIES,
386*4882a593Smuzhiyun .shared = 1,
387*4882a593Smuzhiyun .size = sizeof(struct kmem_cache),
388*4882a593Smuzhiyun .name = "kmem_cache",
389*4882a593Smuzhiyun };
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
392*4882a593Smuzhiyun
cpu_cache_get(struct kmem_cache * cachep)393*4882a593Smuzhiyun static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
394*4882a593Smuzhiyun {
395*4882a593Smuzhiyun return this_cpu_ptr(cachep->cpu_cache);
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun /*
399*4882a593Smuzhiyun * Calculate the number of objects and left-over bytes for a given buffer size.
400*4882a593Smuzhiyun */
cache_estimate(unsigned long gfporder,size_t buffer_size,slab_flags_t flags,size_t * left_over)401*4882a593Smuzhiyun static unsigned int cache_estimate(unsigned long gfporder, size_t buffer_size,
402*4882a593Smuzhiyun slab_flags_t flags, size_t *left_over)
403*4882a593Smuzhiyun {
404*4882a593Smuzhiyun unsigned int num;
405*4882a593Smuzhiyun size_t slab_size = PAGE_SIZE << gfporder;
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun /*
408*4882a593Smuzhiyun * The slab management structure can be either off the slab or
409*4882a593Smuzhiyun * on it. For the latter case, the memory allocated for a
410*4882a593Smuzhiyun * slab is used for:
411*4882a593Smuzhiyun *
412*4882a593Smuzhiyun * - @buffer_size bytes for each object
413*4882a593Smuzhiyun * - One freelist_idx_t for each object
414*4882a593Smuzhiyun *
415*4882a593Smuzhiyun * We don't need to consider alignment of freelist because
416*4882a593Smuzhiyun * freelist will be at the end of slab page. The objects will be
417*4882a593Smuzhiyun * at the correct alignment.
418*4882a593Smuzhiyun *
419*4882a593Smuzhiyun * If the slab management structure is off the slab, then the
420*4882a593Smuzhiyun * alignment will already be calculated into the size. Because
421*4882a593Smuzhiyun * the slabs are all pages aligned, the objects will be at the
422*4882a593Smuzhiyun * correct alignment when allocated.
423*4882a593Smuzhiyun */
424*4882a593Smuzhiyun if (flags & (CFLGS_OBJFREELIST_SLAB | CFLGS_OFF_SLAB)) {
425*4882a593Smuzhiyun num = slab_size / buffer_size;
426*4882a593Smuzhiyun *left_over = slab_size % buffer_size;
427*4882a593Smuzhiyun } else {
428*4882a593Smuzhiyun num = slab_size / (buffer_size + sizeof(freelist_idx_t));
429*4882a593Smuzhiyun *left_over = slab_size %
430*4882a593Smuzhiyun (buffer_size + sizeof(freelist_idx_t));
431*4882a593Smuzhiyun }
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun return num;
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun #if DEBUG
437*4882a593Smuzhiyun #define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
438*4882a593Smuzhiyun
__slab_error(const char * function,struct kmem_cache * cachep,char * msg)439*4882a593Smuzhiyun static void __slab_error(const char *function, struct kmem_cache *cachep,
440*4882a593Smuzhiyun char *msg)
441*4882a593Smuzhiyun {
442*4882a593Smuzhiyun pr_err("slab error in %s(): cache `%s': %s\n",
443*4882a593Smuzhiyun function, cachep->name, msg);
444*4882a593Smuzhiyun dump_stack();
445*4882a593Smuzhiyun add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun #endif
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun /*
450*4882a593Smuzhiyun * By default on NUMA we use alien caches to stage the freeing of
451*4882a593Smuzhiyun * objects allocated from other nodes. This causes massive memory
452*4882a593Smuzhiyun * inefficiencies when using fake NUMA setup to split memory into a
453*4882a593Smuzhiyun * large number of small nodes, so it can be disabled on the command
454*4882a593Smuzhiyun * line
455*4882a593Smuzhiyun */
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun static int use_alien_caches __read_mostly = 1;
noaliencache_setup(char * s)458*4882a593Smuzhiyun static int __init noaliencache_setup(char *s)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun use_alien_caches = 0;
461*4882a593Smuzhiyun return 1;
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun __setup("noaliencache", noaliencache_setup);
464*4882a593Smuzhiyun
slab_max_order_setup(char * str)465*4882a593Smuzhiyun static int __init slab_max_order_setup(char *str)
466*4882a593Smuzhiyun {
467*4882a593Smuzhiyun get_option(&str, &slab_max_order);
468*4882a593Smuzhiyun slab_max_order = slab_max_order < 0 ? 0 :
469*4882a593Smuzhiyun min(slab_max_order, MAX_ORDER - 1);
470*4882a593Smuzhiyun slab_max_order_set = true;
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun return 1;
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun __setup("slab_max_order=", slab_max_order_setup);
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun #ifdef CONFIG_NUMA
477*4882a593Smuzhiyun /*
478*4882a593Smuzhiyun * Special reaping functions for NUMA systems called from cache_reap().
479*4882a593Smuzhiyun * These take care of doing round robin flushing of alien caches (containing
480*4882a593Smuzhiyun * objects freed on different nodes from which they were allocated) and the
481*4882a593Smuzhiyun * flushing of remote pcps by calling drain_node_pages.
482*4882a593Smuzhiyun */
483*4882a593Smuzhiyun static DEFINE_PER_CPU(unsigned long, slab_reap_node);
484*4882a593Smuzhiyun
init_reap_node(int cpu)485*4882a593Smuzhiyun static void init_reap_node(int cpu)
486*4882a593Smuzhiyun {
487*4882a593Smuzhiyun per_cpu(slab_reap_node, cpu) = next_node_in(cpu_to_mem(cpu),
488*4882a593Smuzhiyun node_online_map);
489*4882a593Smuzhiyun }
490*4882a593Smuzhiyun
next_reap_node(void)491*4882a593Smuzhiyun static void next_reap_node(void)
492*4882a593Smuzhiyun {
493*4882a593Smuzhiyun int node = __this_cpu_read(slab_reap_node);
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun node = next_node_in(node, node_online_map);
496*4882a593Smuzhiyun __this_cpu_write(slab_reap_node, node);
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun #else
500*4882a593Smuzhiyun #define init_reap_node(cpu) do { } while (0)
501*4882a593Smuzhiyun #define next_reap_node(void) do { } while (0)
502*4882a593Smuzhiyun #endif
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun /*
505*4882a593Smuzhiyun * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz
506*4882a593Smuzhiyun * via the workqueue/eventd.
507*4882a593Smuzhiyun * Add the CPU number into the expiration time to minimize the possibility of
508*4882a593Smuzhiyun * the CPUs getting into lockstep and contending for the global cache chain
509*4882a593Smuzhiyun * lock.
510*4882a593Smuzhiyun */
start_cpu_timer(int cpu)511*4882a593Smuzhiyun static void start_cpu_timer(int cpu)
512*4882a593Smuzhiyun {
513*4882a593Smuzhiyun struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
514*4882a593Smuzhiyun
515*4882a593Smuzhiyun if (reap_work->work.func == NULL) {
516*4882a593Smuzhiyun init_reap_node(cpu);
517*4882a593Smuzhiyun INIT_DEFERRABLE_WORK(reap_work, cache_reap);
518*4882a593Smuzhiyun schedule_delayed_work_on(cpu, reap_work,
519*4882a593Smuzhiyun __round_jiffies_relative(HZ, cpu));
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun }
522*4882a593Smuzhiyun
init_arraycache(struct array_cache * ac,int limit,int batch)523*4882a593Smuzhiyun static void init_arraycache(struct array_cache *ac, int limit, int batch)
524*4882a593Smuzhiyun {
525*4882a593Smuzhiyun if (ac) {
526*4882a593Smuzhiyun ac->avail = 0;
527*4882a593Smuzhiyun ac->limit = limit;
528*4882a593Smuzhiyun ac->batchcount = batch;
529*4882a593Smuzhiyun ac->touched = 0;
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun }
532*4882a593Smuzhiyun
alloc_arraycache(int node,int entries,int batchcount,gfp_t gfp)533*4882a593Smuzhiyun static struct array_cache *alloc_arraycache(int node, int entries,
534*4882a593Smuzhiyun int batchcount, gfp_t gfp)
535*4882a593Smuzhiyun {
536*4882a593Smuzhiyun size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
537*4882a593Smuzhiyun struct array_cache *ac = NULL;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun ac = kmalloc_node(memsize, gfp, node);
540*4882a593Smuzhiyun /*
541*4882a593Smuzhiyun * The array_cache structures contain pointers to free object.
542*4882a593Smuzhiyun * However, when such objects are allocated or transferred to another
543*4882a593Smuzhiyun * cache the pointers are not cleared and they could be counted as
544*4882a593Smuzhiyun * valid references during a kmemleak scan. Therefore, kmemleak must
545*4882a593Smuzhiyun * not scan such objects.
546*4882a593Smuzhiyun */
547*4882a593Smuzhiyun kmemleak_no_scan(ac);
548*4882a593Smuzhiyun init_arraycache(ac, entries, batchcount);
549*4882a593Smuzhiyun return ac;
550*4882a593Smuzhiyun }
551*4882a593Smuzhiyun
cache_free_pfmemalloc(struct kmem_cache * cachep,struct page * page,void * objp)552*4882a593Smuzhiyun static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
553*4882a593Smuzhiyun struct page *page, void *objp)
554*4882a593Smuzhiyun {
555*4882a593Smuzhiyun struct kmem_cache_node *n;
556*4882a593Smuzhiyun int page_node;
557*4882a593Smuzhiyun LIST_HEAD(list);
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun page_node = page_to_nid(page);
560*4882a593Smuzhiyun n = get_node(cachep, page_node);
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun spin_lock(&n->list_lock);
563*4882a593Smuzhiyun free_block(cachep, &objp, 1, page_node, &list);
564*4882a593Smuzhiyun spin_unlock(&n->list_lock);
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun slabs_destroy(cachep, &list);
567*4882a593Smuzhiyun }
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun /*
570*4882a593Smuzhiyun * Transfer objects in one arraycache to another.
571*4882a593Smuzhiyun * Locking must be handled by the caller.
572*4882a593Smuzhiyun *
573*4882a593Smuzhiyun * Return the number of entries transferred.
574*4882a593Smuzhiyun */
transfer_objects(struct array_cache * to,struct array_cache * from,unsigned int max)575*4882a593Smuzhiyun static int transfer_objects(struct array_cache *to,
576*4882a593Smuzhiyun struct array_cache *from, unsigned int max)
577*4882a593Smuzhiyun {
578*4882a593Smuzhiyun /* Figure out how many entries to transfer */
579*4882a593Smuzhiyun int nr = min3(from->avail, max, to->limit - to->avail);
580*4882a593Smuzhiyun
581*4882a593Smuzhiyun if (!nr)
582*4882a593Smuzhiyun return 0;
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun memcpy(to->entry + to->avail, from->entry + from->avail -nr,
585*4882a593Smuzhiyun sizeof(void *) *nr);
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun from->avail -= nr;
588*4882a593Smuzhiyun to->avail += nr;
589*4882a593Smuzhiyun return nr;
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun /* &alien->lock must be held by alien callers. */
__free_one(struct array_cache * ac,void * objp)593*4882a593Smuzhiyun static __always_inline void __free_one(struct array_cache *ac, void *objp)
594*4882a593Smuzhiyun {
595*4882a593Smuzhiyun /* Avoid trivial double-free. */
596*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
597*4882a593Smuzhiyun WARN_ON_ONCE(ac->avail > 0 && ac->entry[ac->avail - 1] == objp))
598*4882a593Smuzhiyun return;
599*4882a593Smuzhiyun ac->entry[ac->avail++] = objp;
600*4882a593Smuzhiyun }
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun #ifndef CONFIG_NUMA
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun #define drain_alien_cache(cachep, alien) do { } while (0)
605*4882a593Smuzhiyun #define reap_alien(cachep, n) do { } while (0)
606*4882a593Smuzhiyun
alloc_alien_cache(int node,int limit,gfp_t gfp)607*4882a593Smuzhiyun static inline struct alien_cache **alloc_alien_cache(int node,
608*4882a593Smuzhiyun int limit, gfp_t gfp)
609*4882a593Smuzhiyun {
610*4882a593Smuzhiyun return NULL;
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun
free_alien_cache(struct alien_cache ** ac_ptr)613*4882a593Smuzhiyun static inline void free_alien_cache(struct alien_cache **ac_ptr)
614*4882a593Smuzhiyun {
615*4882a593Smuzhiyun }
616*4882a593Smuzhiyun
cache_free_alien(struct kmem_cache * cachep,void * objp)617*4882a593Smuzhiyun static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
618*4882a593Smuzhiyun {
619*4882a593Smuzhiyun return 0;
620*4882a593Smuzhiyun }
621*4882a593Smuzhiyun
alternate_node_alloc(struct kmem_cache * cachep,gfp_t flags)622*4882a593Smuzhiyun static inline void *alternate_node_alloc(struct kmem_cache *cachep,
623*4882a593Smuzhiyun gfp_t flags)
624*4882a593Smuzhiyun {
625*4882a593Smuzhiyun return NULL;
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun
____cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid)628*4882a593Smuzhiyun static inline void *____cache_alloc_node(struct kmem_cache *cachep,
629*4882a593Smuzhiyun gfp_t flags, int nodeid)
630*4882a593Smuzhiyun {
631*4882a593Smuzhiyun return NULL;
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun
gfp_exact_node(gfp_t flags)634*4882a593Smuzhiyun static inline gfp_t gfp_exact_node(gfp_t flags)
635*4882a593Smuzhiyun {
636*4882a593Smuzhiyun return flags & ~__GFP_NOFAIL;
637*4882a593Smuzhiyun }
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun #else /* CONFIG_NUMA */
640*4882a593Smuzhiyun
641*4882a593Smuzhiyun static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
642*4882a593Smuzhiyun static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
643*4882a593Smuzhiyun
__alloc_alien_cache(int node,int entries,int batch,gfp_t gfp)644*4882a593Smuzhiyun static struct alien_cache *__alloc_alien_cache(int node, int entries,
645*4882a593Smuzhiyun int batch, gfp_t gfp)
646*4882a593Smuzhiyun {
647*4882a593Smuzhiyun size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
648*4882a593Smuzhiyun struct alien_cache *alc = NULL;
649*4882a593Smuzhiyun
650*4882a593Smuzhiyun alc = kmalloc_node(memsize, gfp, node);
651*4882a593Smuzhiyun if (alc) {
652*4882a593Smuzhiyun kmemleak_no_scan(alc);
653*4882a593Smuzhiyun init_arraycache(&alc->ac, entries, batch);
654*4882a593Smuzhiyun spin_lock_init(&alc->lock);
655*4882a593Smuzhiyun }
656*4882a593Smuzhiyun return alc;
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun
alloc_alien_cache(int node,int limit,gfp_t gfp)659*4882a593Smuzhiyun static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
660*4882a593Smuzhiyun {
661*4882a593Smuzhiyun struct alien_cache **alc_ptr;
662*4882a593Smuzhiyun int i;
663*4882a593Smuzhiyun
664*4882a593Smuzhiyun if (limit > 1)
665*4882a593Smuzhiyun limit = 12;
666*4882a593Smuzhiyun alc_ptr = kcalloc_node(nr_node_ids, sizeof(void *), gfp, node);
667*4882a593Smuzhiyun if (!alc_ptr)
668*4882a593Smuzhiyun return NULL;
669*4882a593Smuzhiyun
670*4882a593Smuzhiyun for_each_node(i) {
671*4882a593Smuzhiyun if (i == node || !node_online(i))
672*4882a593Smuzhiyun continue;
673*4882a593Smuzhiyun alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
674*4882a593Smuzhiyun if (!alc_ptr[i]) {
675*4882a593Smuzhiyun for (i--; i >= 0; i--)
676*4882a593Smuzhiyun kfree(alc_ptr[i]);
677*4882a593Smuzhiyun kfree(alc_ptr);
678*4882a593Smuzhiyun return NULL;
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun }
681*4882a593Smuzhiyun return alc_ptr;
682*4882a593Smuzhiyun }
683*4882a593Smuzhiyun
free_alien_cache(struct alien_cache ** alc_ptr)684*4882a593Smuzhiyun static void free_alien_cache(struct alien_cache **alc_ptr)
685*4882a593Smuzhiyun {
686*4882a593Smuzhiyun int i;
687*4882a593Smuzhiyun
688*4882a593Smuzhiyun if (!alc_ptr)
689*4882a593Smuzhiyun return;
690*4882a593Smuzhiyun for_each_node(i)
691*4882a593Smuzhiyun kfree(alc_ptr[i]);
692*4882a593Smuzhiyun kfree(alc_ptr);
693*4882a593Smuzhiyun }
694*4882a593Smuzhiyun
__drain_alien_cache(struct kmem_cache * cachep,struct array_cache * ac,int node,struct list_head * list)695*4882a593Smuzhiyun static void __drain_alien_cache(struct kmem_cache *cachep,
696*4882a593Smuzhiyun struct array_cache *ac, int node,
697*4882a593Smuzhiyun struct list_head *list)
698*4882a593Smuzhiyun {
699*4882a593Smuzhiyun struct kmem_cache_node *n = get_node(cachep, node);
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun if (ac->avail) {
702*4882a593Smuzhiyun spin_lock(&n->list_lock);
703*4882a593Smuzhiyun /*
704*4882a593Smuzhiyun * Stuff objects into the remote nodes shared array first.
705*4882a593Smuzhiyun * That way we could avoid the overhead of putting the objects
706*4882a593Smuzhiyun * into the free lists and getting them back later.
707*4882a593Smuzhiyun */
708*4882a593Smuzhiyun if (n->shared)
709*4882a593Smuzhiyun transfer_objects(n->shared, ac, ac->limit);
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun free_block(cachep, ac->entry, ac->avail, node, list);
712*4882a593Smuzhiyun ac->avail = 0;
713*4882a593Smuzhiyun spin_unlock(&n->list_lock);
714*4882a593Smuzhiyun }
715*4882a593Smuzhiyun }
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun /*
718*4882a593Smuzhiyun * Called from cache_reap() to regularly drain alien caches round robin.
719*4882a593Smuzhiyun */
reap_alien(struct kmem_cache * cachep,struct kmem_cache_node * n)720*4882a593Smuzhiyun static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
721*4882a593Smuzhiyun {
722*4882a593Smuzhiyun int node = __this_cpu_read(slab_reap_node);
723*4882a593Smuzhiyun
724*4882a593Smuzhiyun if (n->alien) {
725*4882a593Smuzhiyun struct alien_cache *alc = n->alien[node];
726*4882a593Smuzhiyun struct array_cache *ac;
727*4882a593Smuzhiyun
728*4882a593Smuzhiyun if (alc) {
729*4882a593Smuzhiyun ac = &alc->ac;
730*4882a593Smuzhiyun if (ac->avail && spin_trylock_irq(&alc->lock)) {
731*4882a593Smuzhiyun LIST_HEAD(list);
732*4882a593Smuzhiyun
733*4882a593Smuzhiyun __drain_alien_cache(cachep, ac, node, &list);
734*4882a593Smuzhiyun spin_unlock_irq(&alc->lock);
735*4882a593Smuzhiyun slabs_destroy(cachep, &list);
736*4882a593Smuzhiyun }
737*4882a593Smuzhiyun }
738*4882a593Smuzhiyun }
739*4882a593Smuzhiyun }
740*4882a593Smuzhiyun
drain_alien_cache(struct kmem_cache * cachep,struct alien_cache ** alien)741*4882a593Smuzhiyun static void drain_alien_cache(struct kmem_cache *cachep,
742*4882a593Smuzhiyun struct alien_cache **alien)
743*4882a593Smuzhiyun {
744*4882a593Smuzhiyun int i = 0;
745*4882a593Smuzhiyun struct alien_cache *alc;
746*4882a593Smuzhiyun struct array_cache *ac;
747*4882a593Smuzhiyun unsigned long flags;
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun for_each_online_node(i) {
750*4882a593Smuzhiyun alc = alien[i];
751*4882a593Smuzhiyun if (alc) {
752*4882a593Smuzhiyun LIST_HEAD(list);
753*4882a593Smuzhiyun
754*4882a593Smuzhiyun ac = &alc->ac;
755*4882a593Smuzhiyun spin_lock_irqsave(&alc->lock, flags);
756*4882a593Smuzhiyun __drain_alien_cache(cachep, ac, i, &list);
757*4882a593Smuzhiyun spin_unlock_irqrestore(&alc->lock, flags);
758*4882a593Smuzhiyun slabs_destroy(cachep, &list);
759*4882a593Smuzhiyun }
760*4882a593Smuzhiyun }
761*4882a593Smuzhiyun }
762*4882a593Smuzhiyun
__cache_free_alien(struct kmem_cache * cachep,void * objp,int node,int page_node)763*4882a593Smuzhiyun static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
764*4882a593Smuzhiyun int node, int page_node)
765*4882a593Smuzhiyun {
766*4882a593Smuzhiyun struct kmem_cache_node *n;
767*4882a593Smuzhiyun struct alien_cache *alien = NULL;
768*4882a593Smuzhiyun struct array_cache *ac;
769*4882a593Smuzhiyun LIST_HEAD(list);
770*4882a593Smuzhiyun
771*4882a593Smuzhiyun n = get_node(cachep, node);
772*4882a593Smuzhiyun STATS_INC_NODEFREES(cachep);
773*4882a593Smuzhiyun if (n->alien && n->alien[page_node]) {
774*4882a593Smuzhiyun alien = n->alien[page_node];
775*4882a593Smuzhiyun ac = &alien->ac;
776*4882a593Smuzhiyun spin_lock(&alien->lock);
777*4882a593Smuzhiyun if (unlikely(ac->avail == ac->limit)) {
778*4882a593Smuzhiyun STATS_INC_ACOVERFLOW(cachep);
779*4882a593Smuzhiyun __drain_alien_cache(cachep, ac, page_node, &list);
780*4882a593Smuzhiyun }
781*4882a593Smuzhiyun __free_one(ac, objp);
782*4882a593Smuzhiyun spin_unlock(&alien->lock);
783*4882a593Smuzhiyun slabs_destroy(cachep, &list);
784*4882a593Smuzhiyun } else {
785*4882a593Smuzhiyun n = get_node(cachep, page_node);
786*4882a593Smuzhiyun spin_lock(&n->list_lock);
787*4882a593Smuzhiyun free_block(cachep, &objp, 1, page_node, &list);
788*4882a593Smuzhiyun spin_unlock(&n->list_lock);
789*4882a593Smuzhiyun slabs_destroy(cachep, &list);
790*4882a593Smuzhiyun }
791*4882a593Smuzhiyun return 1;
792*4882a593Smuzhiyun }
793*4882a593Smuzhiyun
cache_free_alien(struct kmem_cache * cachep,void * objp)794*4882a593Smuzhiyun static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
795*4882a593Smuzhiyun {
796*4882a593Smuzhiyun int page_node = page_to_nid(virt_to_page(objp));
797*4882a593Smuzhiyun int node = numa_mem_id();
798*4882a593Smuzhiyun /*
799*4882a593Smuzhiyun * Make sure we are not freeing a object from another node to the array
800*4882a593Smuzhiyun * cache on this cpu.
801*4882a593Smuzhiyun */
802*4882a593Smuzhiyun if (likely(node == page_node))
803*4882a593Smuzhiyun return 0;
804*4882a593Smuzhiyun
805*4882a593Smuzhiyun return __cache_free_alien(cachep, objp, node, page_node);
806*4882a593Smuzhiyun }
807*4882a593Smuzhiyun
808*4882a593Smuzhiyun /*
809*4882a593Smuzhiyun * Construct gfp mask to allocate from a specific node but do not reclaim or
810*4882a593Smuzhiyun * warn about failures.
811*4882a593Smuzhiyun */
gfp_exact_node(gfp_t flags)812*4882a593Smuzhiyun static inline gfp_t gfp_exact_node(gfp_t flags)
813*4882a593Smuzhiyun {
814*4882a593Smuzhiyun return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
815*4882a593Smuzhiyun }
816*4882a593Smuzhiyun #endif
817*4882a593Smuzhiyun
init_cache_node(struct kmem_cache * cachep,int node,gfp_t gfp)818*4882a593Smuzhiyun static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
819*4882a593Smuzhiyun {
820*4882a593Smuzhiyun struct kmem_cache_node *n;
821*4882a593Smuzhiyun
822*4882a593Smuzhiyun /*
823*4882a593Smuzhiyun * Set up the kmem_cache_node for cpu before we can
824*4882a593Smuzhiyun * begin anything. Make sure some other cpu on this
825*4882a593Smuzhiyun * node has not already allocated this
826*4882a593Smuzhiyun */
827*4882a593Smuzhiyun n = get_node(cachep, node);
828*4882a593Smuzhiyun if (n) {
829*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
830*4882a593Smuzhiyun n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
831*4882a593Smuzhiyun cachep->num;
832*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
833*4882a593Smuzhiyun
834*4882a593Smuzhiyun return 0;
835*4882a593Smuzhiyun }
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
838*4882a593Smuzhiyun if (!n)
839*4882a593Smuzhiyun return -ENOMEM;
840*4882a593Smuzhiyun
841*4882a593Smuzhiyun kmem_cache_node_init(n);
842*4882a593Smuzhiyun n->next_reap = jiffies + REAPTIMEOUT_NODE +
843*4882a593Smuzhiyun ((unsigned long)cachep) % REAPTIMEOUT_NODE;
844*4882a593Smuzhiyun
845*4882a593Smuzhiyun n->free_limit =
846*4882a593Smuzhiyun (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
847*4882a593Smuzhiyun
848*4882a593Smuzhiyun /*
849*4882a593Smuzhiyun * The kmem_cache_nodes don't come and go as CPUs
850*4882a593Smuzhiyun * come and go. slab_mutex is sufficient
851*4882a593Smuzhiyun * protection here.
852*4882a593Smuzhiyun */
853*4882a593Smuzhiyun cachep->node[node] = n;
854*4882a593Smuzhiyun
855*4882a593Smuzhiyun return 0;
856*4882a593Smuzhiyun }
857*4882a593Smuzhiyun
858*4882a593Smuzhiyun #if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
859*4882a593Smuzhiyun /*
860*4882a593Smuzhiyun * Allocates and initializes node for a node on each slab cache, used for
861*4882a593Smuzhiyun * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
862*4882a593Smuzhiyun * will be allocated off-node since memory is not yet online for the new node.
863*4882a593Smuzhiyun * When hotplugging memory or a cpu, existing node are not replaced if
864*4882a593Smuzhiyun * already in use.
865*4882a593Smuzhiyun *
866*4882a593Smuzhiyun * Must hold slab_mutex.
867*4882a593Smuzhiyun */
init_cache_node_node(int node)868*4882a593Smuzhiyun static int init_cache_node_node(int node)
869*4882a593Smuzhiyun {
870*4882a593Smuzhiyun int ret;
871*4882a593Smuzhiyun struct kmem_cache *cachep;
872*4882a593Smuzhiyun
873*4882a593Smuzhiyun list_for_each_entry(cachep, &slab_caches, list) {
874*4882a593Smuzhiyun ret = init_cache_node(cachep, node, GFP_KERNEL);
875*4882a593Smuzhiyun if (ret)
876*4882a593Smuzhiyun return ret;
877*4882a593Smuzhiyun }
878*4882a593Smuzhiyun
879*4882a593Smuzhiyun return 0;
880*4882a593Smuzhiyun }
881*4882a593Smuzhiyun #endif
882*4882a593Smuzhiyun
setup_kmem_cache_node(struct kmem_cache * cachep,int node,gfp_t gfp,bool force_change)883*4882a593Smuzhiyun static int setup_kmem_cache_node(struct kmem_cache *cachep,
884*4882a593Smuzhiyun int node, gfp_t gfp, bool force_change)
885*4882a593Smuzhiyun {
886*4882a593Smuzhiyun int ret = -ENOMEM;
887*4882a593Smuzhiyun struct kmem_cache_node *n;
888*4882a593Smuzhiyun struct array_cache *old_shared = NULL;
889*4882a593Smuzhiyun struct array_cache *new_shared = NULL;
890*4882a593Smuzhiyun struct alien_cache **new_alien = NULL;
891*4882a593Smuzhiyun LIST_HEAD(list);
892*4882a593Smuzhiyun
893*4882a593Smuzhiyun if (use_alien_caches) {
894*4882a593Smuzhiyun new_alien = alloc_alien_cache(node, cachep->limit, gfp);
895*4882a593Smuzhiyun if (!new_alien)
896*4882a593Smuzhiyun goto fail;
897*4882a593Smuzhiyun }
898*4882a593Smuzhiyun
899*4882a593Smuzhiyun if (cachep->shared) {
900*4882a593Smuzhiyun new_shared = alloc_arraycache(node,
901*4882a593Smuzhiyun cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
902*4882a593Smuzhiyun if (!new_shared)
903*4882a593Smuzhiyun goto fail;
904*4882a593Smuzhiyun }
905*4882a593Smuzhiyun
906*4882a593Smuzhiyun ret = init_cache_node(cachep, node, gfp);
907*4882a593Smuzhiyun if (ret)
908*4882a593Smuzhiyun goto fail;
909*4882a593Smuzhiyun
910*4882a593Smuzhiyun n = get_node(cachep, node);
911*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
912*4882a593Smuzhiyun if (n->shared && force_change) {
913*4882a593Smuzhiyun free_block(cachep, n->shared->entry,
914*4882a593Smuzhiyun n->shared->avail, node, &list);
915*4882a593Smuzhiyun n->shared->avail = 0;
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun
918*4882a593Smuzhiyun if (!n->shared || force_change) {
919*4882a593Smuzhiyun old_shared = n->shared;
920*4882a593Smuzhiyun n->shared = new_shared;
921*4882a593Smuzhiyun new_shared = NULL;
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun
924*4882a593Smuzhiyun if (!n->alien) {
925*4882a593Smuzhiyun n->alien = new_alien;
926*4882a593Smuzhiyun new_alien = NULL;
927*4882a593Smuzhiyun }
928*4882a593Smuzhiyun
929*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
930*4882a593Smuzhiyun slabs_destroy(cachep, &list);
931*4882a593Smuzhiyun
932*4882a593Smuzhiyun /*
933*4882a593Smuzhiyun * To protect lockless access to n->shared during irq disabled context.
934*4882a593Smuzhiyun * If n->shared isn't NULL in irq disabled context, accessing to it is
935*4882a593Smuzhiyun * guaranteed to be valid until irq is re-enabled, because it will be
936*4882a593Smuzhiyun * freed after synchronize_rcu().
937*4882a593Smuzhiyun */
938*4882a593Smuzhiyun if (old_shared && force_change)
939*4882a593Smuzhiyun synchronize_rcu();
940*4882a593Smuzhiyun
941*4882a593Smuzhiyun fail:
942*4882a593Smuzhiyun kfree(old_shared);
943*4882a593Smuzhiyun kfree(new_shared);
944*4882a593Smuzhiyun free_alien_cache(new_alien);
945*4882a593Smuzhiyun
946*4882a593Smuzhiyun return ret;
947*4882a593Smuzhiyun }
948*4882a593Smuzhiyun
949*4882a593Smuzhiyun #ifdef CONFIG_SMP
950*4882a593Smuzhiyun
cpuup_canceled(long cpu)951*4882a593Smuzhiyun static void cpuup_canceled(long cpu)
952*4882a593Smuzhiyun {
953*4882a593Smuzhiyun struct kmem_cache *cachep;
954*4882a593Smuzhiyun struct kmem_cache_node *n = NULL;
955*4882a593Smuzhiyun int node = cpu_to_mem(cpu);
956*4882a593Smuzhiyun const struct cpumask *mask = cpumask_of_node(node);
957*4882a593Smuzhiyun
958*4882a593Smuzhiyun list_for_each_entry(cachep, &slab_caches, list) {
959*4882a593Smuzhiyun struct array_cache *nc;
960*4882a593Smuzhiyun struct array_cache *shared;
961*4882a593Smuzhiyun struct alien_cache **alien;
962*4882a593Smuzhiyun LIST_HEAD(list);
963*4882a593Smuzhiyun
964*4882a593Smuzhiyun n = get_node(cachep, node);
965*4882a593Smuzhiyun if (!n)
966*4882a593Smuzhiyun continue;
967*4882a593Smuzhiyun
968*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
969*4882a593Smuzhiyun
970*4882a593Smuzhiyun /* Free limit for this kmem_cache_node */
971*4882a593Smuzhiyun n->free_limit -= cachep->batchcount;
972*4882a593Smuzhiyun
973*4882a593Smuzhiyun /* cpu is dead; no one can alloc from it. */
974*4882a593Smuzhiyun nc = per_cpu_ptr(cachep->cpu_cache, cpu);
975*4882a593Smuzhiyun free_block(cachep, nc->entry, nc->avail, node, &list);
976*4882a593Smuzhiyun nc->avail = 0;
977*4882a593Smuzhiyun
978*4882a593Smuzhiyun if (!cpumask_empty(mask)) {
979*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
980*4882a593Smuzhiyun goto free_slab;
981*4882a593Smuzhiyun }
982*4882a593Smuzhiyun
983*4882a593Smuzhiyun shared = n->shared;
984*4882a593Smuzhiyun if (shared) {
985*4882a593Smuzhiyun free_block(cachep, shared->entry,
986*4882a593Smuzhiyun shared->avail, node, &list);
987*4882a593Smuzhiyun n->shared = NULL;
988*4882a593Smuzhiyun }
989*4882a593Smuzhiyun
990*4882a593Smuzhiyun alien = n->alien;
991*4882a593Smuzhiyun n->alien = NULL;
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun kfree(shared);
996*4882a593Smuzhiyun if (alien) {
997*4882a593Smuzhiyun drain_alien_cache(cachep, alien);
998*4882a593Smuzhiyun free_alien_cache(alien);
999*4882a593Smuzhiyun }
1000*4882a593Smuzhiyun
1001*4882a593Smuzhiyun free_slab:
1002*4882a593Smuzhiyun slabs_destroy(cachep, &list);
1003*4882a593Smuzhiyun }
1004*4882a593Smuzhiyun /*
1005*4882a593Smuzhiyun * In the previous loop, all the objects were freed to
1006*4882a593Smuzhiyun * the respective cache's slabs, now we can go ahead and
1007*4882a593Smuzhiyun * shrink each nodelist to its limit.
1008*4882a593Smuzhiyun */
1009*4882a593Smuzhiyun list_for_each_entry(cachep, &slab_caches, list) {
1010*4882a593Smuzhiyun n = get_node(cachep, node);
1011*4882a593Smuzhiyun if (!n)
1012*4882a593Smuzhiyun continue;
1013*4882a593Smuzhiyun drain_freelist(cachep, n, INT_MAX);
1014*4882a593Smuzhiyun }
1015*4882a593Smuzhiyun }
1016*4882a593Smuzhiyun
cpuup_prepare(long cpu)1017*4882a593Smuzhiyun static int cpuup_prepare(long cpu)
1018*4882a593Smuzhiyun {
1019*4882a593Smuzhiyun struct kmem_cache *cachep;
1020*4882a593Smuzhiyun int node = cpu_to_mem(cpu);
1021*4882a593Smuzhiyun int err;
1022*4882a593Smuzhiyun
1023*4882a593Smuzhiyun /*
1024*4882a593Smuzhiyun * We need to do this right in the beginning since
1025*4882a593Smuzhiyun * alloc_arraycache's are going to use this list.
1026*4882a593Smuzhiyun * kmalloc_node allows us to add the slab to the right
1027*4882a593Smuzhiyun * kmem_cache_node and not this cpu's kmem_cache_node
1028*4882a593Smuzhiyun */
1029*4882a593Smuzhiyun err = init_cache_node_node(node);
1030*4882a593Smuzhiyun if (err < 0)
1031*4882a593Smuzhiyun goto bad;
1032*4882a593Smuzhiyun
1033*4882a593Smuzhiyun /*
1034*4882a593Smuzhiyun * Now we can go ahead with allocating the shared arrays and
1035*4882a593Smuzhiyun * array caches
1036*4882a593Smuzhiyun */
1037*4882a593Smuzhiyun list_for_each_entry(cachep, &slab_caches, list) {
1038*4882a593Smuzhiyun err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
1039*4882a593Smuzhiyun if (err)
1040*4882a593Smuzhiyun goto bad;
1041*4882a593Smuzhiyun }
1042*4882a593Smuzhiyun
1043*4882a593Smuzhiyun return 0;
1044*4882a593Smuzhiyun bad:
1045*4882a593Smuzhiyun cpuup_canceled(cpu);
1046*4882a593Smuzhiyun return -ENOMEM;
1047*4882a593Smuzhiyun }
1048*4882a593Smuzhiyun
slab_prepare_cpu(unsigned int cpu)1049*4882a593Smuzhiyun int slab_prepare_cpu(unsigned int cpu)
1050*4882a593Smuzhiyun {
1051*4882a593Smuzhiyun int err;
1052*4882a593Smuzhiyun
1053*4882a593Smuzhiyun mutex_lock(&slab_mutex);
1054*4882a593Smuzhiyun err = cpuup_prepare(cpu);
1055*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
1056*4882a593Smuzhiyun return err;
1057*4882a593Smuzhiyun }
1058*4882a593Smuzhiyun
1059*4882a593Smuzhiyun /*
1060*4882a593Smuzhiyun * This is called for a failed online attempt and for a successful
1061*4882a593Smuzhiyun * offline.
1062*4882a593Smuzhiyun *
1063*4882a593Smuzhiyun * Even if all the cpus of a node are down, we don't free the
1064*4882a593Smuzhiyun * kmem_cache_node of any cache. This to avoid a race between cpu_down, and
1065*4882a593Smuzhiyun * a kmalloc allocation from another cpu for memory from the node of
1066*4882a593Smuzhiyun * the cpu going down. The kmem_cache_node structure is usually allocated from
1067*4882a593Smuzhiyun * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
1068*4882a593Smuzhiyun */
slab_dead_cpu(unsigned int cpu)1069*4882a593Smuzhiyun int slab_dead_cpu(unsigned int cpu)
1070*4882a593Smuzhiyun {
1071*4882a593Smuzhiyun mutex_lock(&slab_mutex);
1072*4882a593Smuzhiyun cpuup_canceled(cpu);
1073*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
1074*4882a593Smuzhiyun return 0;
1075*4882a593Smuzhiyun }
1076*4882a593Smuzhiyun #endif
1077*4882a593Smuzhiyun
slab_online_cpu(unsigned int cpu)1078*4882a593Smuzhiyun static int slab_online_cpu(unsigned int cpu)
1079*4882a593Smuzhiyun {
1080*4882a593Smuzhiyun start_cpu_timer(cpu);
1081*4882a593Smuzhiyun return 0;
1082*4882a593Smuzhiyun }
1083*4882a593Smuzhiyun
slab_offline_cpu(unsigned int cpu)1084*4882a593Smuzhiyun static int slab_offline_cpu(unsigned int cpu)
1085*4882a593Smuzhiyun {
1086*4882a593Smuzhiyun /*
1087*4882a593Smuzhiyun * Shutdown cache reaper. Note that the slab_mutex is held so
1088*4882a593Smuzhiyun * that if cache_reap() is invoked it cannot do anything
1089*4882a593Smuzhiyun * expensive but will only modify reap_work and reschedule the
1090*4882a593Smuzhiyun * timer.
1091*4882a593Smuzhiyun */
1092*4882a593Smuzhiyun cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
1093*4882a593Smuzhiyun /* Now the cache_reaper is guaranteed to be not running. */
1094*4882a593Smuzhiyun per_cpu(slab_reap_work, cpu).work.func = NULL;
1095*4882a593Smuzhiyun return 0;
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun
1098*4882a593Smuzhiyun #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1099*4882a593Smuzhiyun /*
1100*4882a593Smuzhiyun * Drains freelist for a node on each slab cache, used for memory hot-remove.
1101*4882a593Smuzhiyun * Returns -EBUSY if all objects cannot be drained so that the node is not
1102*4882a593Smuzhiyun * removed.
1103*4882a593Smuzhiyun *
1104*4882a593Smuzhiyun * Must hold slab_mutex.
1105*4882a593Smuzhiyun */
drain_cache_node_node(int node)1106*4882a593Smuzhiyun static int __meminit drain_cache_node_node(int node)
1107*4882a593Smuzhiyun {
1108*4882a593Smuzhiyun struct kmem_cache *cachep;
1109*4882a593Smuzhiyun int ret = 0;
1110*4882a593Smuzhiyun
1111*4882a593Smuzhiyun list_for_each_entry(cachep, &slab_caches, list) {
1112*4882a593Smuzhiyun struct kmem_cache_node *n;
1113*4882a593Smuzhiyun
1114*4882a593Smuzhiyun n = get_node(cachep, node);
1115*4882a593Smuzhiyun if (!n)
1116*4882a593Smuzhiyun continue;
1117*4882a593Smuzhiyun
1118*4882a593Smuzhiyun drain_freelist(cachep, n, INT_MAX);
1119*4882a593Smuzhiyun
1120*4882a593Smuzhiyun if (!list_empty(&n->slabs_full) ||
1121*4882a593Smuzhiyun !list_empty(&n->slabs_partial)) {
1122*4882a593Smuzhiyun ret = -EBUSY;
1123*4882a593Smuzhiyun break;
1124*4882a593Smuzhiyun }
1125*4882a593Smuzhiyun }
1126*4882a593Smuzhiyun return ret;
1127*4882a593Smuzhiyun }
1128*4882a593Smuzhiyun
slab_memory_callback(struct notifier_block * self,unsigned long action,void * arg)1129*4882a593Smuzhiyun static int __meminit slab_memory_callback(struct notifier_block *self,
1130*4882a593Smuzhiyun unsigned long action, void *arg)
1131*4882a593Smuzhiyun {
1132*4882a593Smuzhiyun struct memory_notify *mnb = arg;
1133*4882a593Smuzhiyun int ret = 0;
1134*4882a593Smuzhiyun int nid;
1135*4882a593Smuzhiyun
1136*4882a593Smuzhiyun nid = mnb->status_change_nid;
1137*4882a593Smuzhiyun if (nid < 0)
1138*4882a593Smuzhiyun goto out;
1139*4882a593Smuzhiyun
1140*4882a593Smuzhiyun switch (action) {
1141*4882a593Smuzhiyun case MEM_GOING_ONLINE:
1142*4882a593Smuzhiyun mutex_lock(&slab_mutex);
1143*4882a593Smuzhiyun ret = init_cache_node_node(nid);
1144*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
1145*4882a593Smuzhiyun break;
1146*4882a593Smuzhiyun case MEM_GOING_OFFLINE:
1147*4882a593Smuzhiyun mutex_lock(&slab_mutex);
1148*4882a593Smuzhiyun ret = drain_cache_node_node(nid);
1149*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
1150*4882a593Smuzhiyun break;
1151*4882a593Smuzhiyun case MEM_ONLINE:
1152*4882a593Smuzhiyun case MEM_OFFLINE:
1153*4882a593Smuzhiyun case MEM_CANCEL_ONLINE:
1154*4882a593Smuzhiyun case MEM_CANCEL_OFFLINE:
1155*4882a593Smuzhiyun break;
1156*4882a593Smuzhiyun }
1157*4882a593Smuzhiyun out:
1158*4882a593Smuzhiyun return notifier_from_errno(ret);
1159*4882a593Smuzhiyun }
1160*4882a593Smuzhiyun #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
1161*4882a593Smuzhiyun
1162*4882a593Smuzhiyun /*
1163*4882a593Smuzhiyun * swap the static kmem_cache_node with kmalloced memory
1164*4882a593Smuzhiyun */
init_list(struct kmem_cache * cachep,struct kmem_cache_node * list,int nodeid)1165*4882a593Smuzhiyun static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
1166*4882a593Smuzhiyun int nodeid)
1167*4882a593Smuzhiyun {
1168*4882a593Smuzhiyun struct kmem_cache_node *ptr;
1169*4882a593Smuzhiyun
1170*4882a593Smuzhiyun ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
1171*4882a593Smuzhiyun BUG_ON(!ptr);
1172*4882a593Smuzhiyun
1173*4882a593Smuzhiyun memcpy(ptr, list, sizeof(struct kmem_cache_node));
1174*4882a593Smuzhiyun /*
1175*4882a593Smuzhiyun * Do not assume that spinlocks can be initialized via memcpy:
1176*4882a593Smuzhiyun */
1177*4882a593Smuzhiyun spin_lock_init(&ptr->list_lock);
1178*4882a593Smuzhiyun
1179*4882a593Smuzhiyun MAKE_ALL_LISTS(cachep, ptr, nodeid);
1180*4882a593Smuzhiyun cachep->node[nodeid] = ptr;
1181*4882a593Smuzhiyun }
1182*4882a593Smuzhiyun
1183*4882a593Smuzhiyun /*
1184*4882a593Smuzhiyun * For setting up all the kmem_cache_node for cache whose buffer_size is same as
1185*4882a593Smuzhiyun * size of kmem_cache_node.
1186*4882a593Smuzhiyun */
set_up_node(struct kmem_cache * cachep,int index)1187*4882a593Smuzhiyun static void __init set_up_node(struct kmem_cache *cachep, int index)
1188*4882a593Smuzhiyun {
1189*4882a593Smuzhiyun int node;
1190*4882a593Smuzhiyun
1191*4882a593Smuzhiyun for_each_online_node(node) {
1192*4882a593Smuzhiyun cachep->node[node] = &init_kmem_cache_node[index + node];
1193*4882a593Smuzhiyun cachep->node[node]->next_reap = jiffies +
1194*4882a593Smuzhiyun REAPTIMEOUT_NODE +
1195*4882a593Smuzhiyun ((unsigned long)cachep) % REAPTIMEOUT_NODE;
1196*4882a593Smuzhiyun }
1197*4882a593Smuzhiyun }
1198*4882a593Smuzhiyun
1199*4882a593Smuzhiyun /*
1200*4882a593Smuzhiyun * Initialisation. Called after the page allocator have been initialised and
1201*4882a593Smuzhiyun * before smp_init().
1202*4882a593Smuzhiyun */
kmem_cache_init(void)1203*4882a593Smuzhiyun void __init kmem_cache_init(void)
1204*4882a593Smuzhiyun {
1205*4882a593Smuzhiyun int i;
1206*4882a593Smuzhiyun
1207*4882a593Smuzhiyun kmem_cache = &kmem_cache_boot;
1208*4882a593Smuzhiyun
1209*4882a593Smuzhiyun if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
1210*4882a593Smuzhiyun use_alien_caches = 0;
1211*4882a593Smuzhiyun
1212*4882a593Smuzhiyun for (i = 0; i < NUM_INIT_LISTS; i++)
1213*4882a593Smuzhiyun kmem_cache_node_init(&init_kmem_cache_node[i]);
1214*4882a593Smuzhiyun
1215*4882a593Smuzhiyun /*
1216*4882a593Smuzhiyun * Fragmentation resistance on low memory - only use bigger
1217*4882a593Smuzhiyun * page orders on machines with more than 32MB of memory if
1218*4882a593Smuzhiyun * not overridden on the command line.
1219*4882a593Smuzhiyun */
1220*4882a593Smuzhiyun if (!slab_max_order_set && totalram_pages() > (32 << 20) >> PAGE_SHIFT)
1221*4882a593Smuzhiyun slab_max_order = SLAB_MAX_ORDER_HI;
1222*4882a593Smuzhiyun
1223*4882a593Smuzhiyun /* Bootstrap is tricky, because several objects are allocated
1224*4882a593Smuzhiyun * from caches that do not exist yet:
1225*4882a593Smuzhiyun * 1) initialize the kmem_cache cache: it contains the struct
1226*4882a593Smuzhiyun * kmem_cache structures of all caches, except kmem_cache itself:
1227*4882a593Smuzhiyun * kmem_cache is statically allocated.
1228*4882a593Smuzhiyun * Initially an __init data area is used for the head array and the
1229*4882a593Smuzhiyun * kmem_cache_node structures, it's replaced with a kmalloc allocated
1230*4882a593Smuzhiyun * array at the end of the bootstrap.
1231*4882a593Smuzhiyun * 2) Create the first kmalloc cache.
1232*4882a593Smuzhiyun * The struct kmem_cache for the new cache is allocated normally.
1233*4882a593Smuzhiyun * An __init data area is used for the head array.
1234*4882a593Smuzhiyun * 3) Create the remaining kmalloc caches, with minimally sized
1235*4882a593Smuzhiyun * head arrays.
1236*4882a593Smuzhiyun * 4) Replace the __init data head arrays for kmem_cache and the first
1237*4882a593Smuzhiyun * kmalloc cache with kmalloc allocated arrays.
1238*4882a593Smuzhiyun * 5) Replace the __init data for kmem_cache_node for kmem_cache and
1239*4882a593Smuzhiyun * the other cache's with kmalloc allocated memory.
1240*4882a593Smuzhiyun * 6) Resize the head arrays of the kmalloc caches to their final sizes.
1241*4882a593Smuzhiyun */
1242*4882a593Smuzhiyun
1243*4882a593Smuzhiyun /* 1) create the kmem_cache */
1244*4882a593Smuzhiyun
1245*4882a593Smuzhiyun /*
1246*4882a593Smuzhiyun * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
1247*4882a593Smuzhiyun */
1248*4882a593Smuzhiyun create_boot_cache(kmem_cache, "kmem_cache",
1249*4882a593Smuzhiyun offsetof(struct kmem_cache, node) +
1250*4882a593Smuzhiyun nr_node_ids * sizeof(struct kmem_cache_node *),
1251*4882a593Smuzhiyun SLAB_HWCACHE_ALIGN, 0, 0);
1252*4882a593Smuzhiyun list_add(&kmem_cache->list, &slab_caches);
1253*4882a593Smuzhiyun slab_state = PARTIAL;
1254*4882a593Smuzhiyun
1255*4882a593Smuzhiyun /*
1256*4882a593Smuzhiyun * Initialize the caches that provide memory for the kmem_cache_node
1257*4882a593Smuzhiyun * structures first. Without this, further allocations will bug.
1258*4882a593Smuzhiyun */
1259*4882a593Smuzhiyun kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
1260*4882a593Smuzhiyun kmalloc_info[INDEX_NODE].name[KMALLOC_NORMAL],
1261*4882a593Smuzhiyun kmalloc_info[INDEX_NODE].size,
1262*4882a593Smuzhiyun ARCH_KMALLOC_FLAGS, 0,
1263*4882a593Smuzhiyun kmalloc_info[INDEX_NODE].size);
1264*4882a593Smuzhiyun slab_state = PARTIAL_NODE;
1265*4882a593Smuzhiyun setup_kmalloc_cache_index_table();
1266*4882a593Smuzhiyun
1267*4882a593Smuzhiyun slab_early_init = 0;
1268*4882a593Smuzhiyun
1269*4882a593Smuzhiyun /* 5) Replace the bootstrap kmem_cache_node */
1270*4882a593Smuzhiyun {
1271*4882a593Smuzhiyun int nid;
1272*4882a593Smuzhiyun
1273*4882a593Smuzhiyun for_each_online_node(nid) {
1274*4882a593Smuzhiyun init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
1275*4882a593Smuzhiyun
1276*4882a593Smuzhiyun init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE],
1277*4882a593Smuzhiyun &init_kmem_cache_node[SIZE_NODE + nid], nid);
1278*4882a593Smuzhiyun }
1279*4882a593Smuzhiyun }
1280*4882a593Smuzhiyun
1281*4882a593Smuzhiyun create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
1282*4882a593Smuzhiyun }
1283*4882a593Smuzhiyun
kmem_cache_init_late(void)1284*4882a593Smuzhiyun void __init kmem_cache_init_late(void)
1285*4882a593Smuzhiyun {
1286*4882a593Smuzhiyun struct kmem_cache *cachep;
1287*4882a593Smuzhiyun
1288*4882a593Smuzhiyun /* 6) resize the head arrays to their final sizes */
1289*4882a593Smuzhiyun mutex_lock(&slab_mutex);
1290*4882a593Smuzhiyun list_for_each_entry(cachep, &slab_caches, list)
1291*4882a593Smuzhiyun if (enable_cpucache(cachep, GFP_NOWAIT))
1292*4882a593Smuzhiyun BUG();
1293*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
1294*4882a593Smuzhiyun
1295*4882a593Smuzhiyun /* Done! */
1296*4882a593Smuzhiyun slab_state = FULL;
1297*4882a593Smuzhiyun
1298*4882a593Smuzhiyun #ifdef CONFIG_NUMA
1299*4882a593Smuzhiyun /*
1300*4882a593Smuzhiyun * Register a memory hotplug callback that initializes and frees
1301*4882a593Smuzhiyun * node.
1302*4882a593Smuzhiyun */
1303*4882a593Smuzhiyun hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1304*4882a593Smuzhiyun #endif
1305*4882a593Smuzhiyun
1306*4882a593Smuzhiyun /*
1307*4882a593Smuzhiyun * The reap timers are started later, with a module init call: That part
1308*4882a593Smuzhiyun * of the kernel is not yet operational.
1309*4882a593Smuzhiyun */
1310*4882a593Smuzhiyun }
1311*4882a593Smuzhiyun
cpucache_init(void)1312*4882a593Smuzhiyun static int __init cpucache_init(void)
1313*4882a593Smuzhiyun {
1314*4882a593Smuzhiyun int ret;
1315*4882a593Smuzhiyun
1316*4882a593Smuzhiyun /*
1317*4882a593Smuzhiyun * Register the timers that return unneeded pages to the page allocator
1318*4882a593Smuzhiyun */
1319*4882a593Smuzhiyun ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
1320*4882a593Smuzhiyun slab_online_cpu, slab_offline_cpu);
1321*4882a593Smuzhiyun WARN_ON(ret < 0);
1322*4882a593Smuzhiyun
1323*4882a593Smuzhiyun return 0;
1324*4882a593Smuzhiyun }
1325*4882a593Smuzhiyun __initcall(cpucache_init);
1326*4882a593Smuzhiyun
1327*4882a593Smuzhiyun static noinline void
slab_out_of_memory(struct kmem_cache * cachep,gfp_t gfpflags,int nodeid)1328*4882a593Smuzhiyun slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1329*4882a593Smuzhiyun {
1330*4882a593Smuzhiyun #if DEBUG
1331*4882a593Smuzhiyun struct kmem_cache_node *n;
1332*4882a593Smuzhiyun unsigned long flags;
1333*4882a593Smuzhiyun int node;
1334*4882a593Smuzhiyun static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
1335*4882a593Smuzhiyun DEFAULT_RATELIMIT_BURST);
1336*4882a593Smuzhiyun
1337*4882a593Smuzhiyun if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs))
1338*4882a593Smuzhiyun return;
1339*4882a593Smuzhiyun
1340*4882a593Smuzhiyun pr_warn("SLAB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
1341*4882a593Smuzhiyun nodeid, gfpflags, &gfpflags);
1342*4882a593Smuzhiyun pr_warn(" cache: %s, object size: %d, order: %d\n",
1343*4882a593Smuzhiyun cachep->name, cachep->size, cachep->gfporder);
1344*4882a593Smuzhiyun
1345*4882a593Smuzhiyun for_each_kmem_cache_node(cachep, node, n) {
1346*4882a593Smuzhiyun unsigned long total_slabs, free_slabs, free_objs;
1347*4882a593Smuzhiyun
1348*4882a593Smuzhiyun spin_lock_irqsave(&n->list_lock, flags);
1349*4882a593Smuzhiyun total_slabs = n->total_slabs;
1350*4882a593Smuzhiyun free_slabs = n->free_slabs;
1351*4882a593Smuzhiyun free_objs = n->free_objects;
1352*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
1353*4882a593Smuzhiyun
1354*4882a593Smuzhiyun pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
1355*4882a593Smuzhiyun node, total_slabs - free_slabs, total_slabs,
1356*4882a593Smuzhiyun (total_slabs * cachep->num) - free_objs,
1357*4882a593Smuzhiyun total_slabs * cachep->num);
1358*4882a593Smuzhiyun }
1359*4882a593Smuzhiyun #endif
1360*4882a593Smuzhiyun }
1361*4882a593Smuzhiyun
1362*4882a593Smuzhiyun /*
1363*4882a593Smuzhiyun * Interface to system's page allocator. No need to hold the
1364*4882a593Smuzhiyun * kmem_cache_node ->list_lock.
1365*4882a593Smuzhiyun *
1366*4882a593Smuzhiyun * If we requested dmaable memory, we will get it. Even if we
1367*4882a593Smuzhiyun * did not request dmaable memory, we might get it, but that
1368*4882a593Smuzhiyun * would be relatively rare and ignorable.
1369*4882a593Smuzhiyun */
kmem_getpages(struct kmem_cache * cachep,gfp_t flags,int nodeid)1370*4882a593Smuzhiyun static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
1371*4882a593Smuzhiyun int nodeid)
1372*4882a593Smuzhiyun {
1373*4882a593Smuzhiyun struct page *page;
1374*4882a593Smuzhiyun
1375*4882a593Smuzhiyun flags |= cachep->allocflags;
1376*4882a593Smuzhiyun
1377*4882a593Smuzhiyun page = __alloc_pages_node(nodeid, flags, cachep->gfporder);
1378*4882a593Smuzhiyun if (!page) {
1379*4882a593Smuzhiyun slab_out_of_memory(cachep, flags, nodeid);
1380*4882a593Smuzhiyun return NULL;
1381*4882a593Smuzhiyun }
1382*4882a593Smuzhiyun
1383*4882a593Smuzhiyun account_slab_page(page, cachep->gfporder, cachep);
1384*4882a593Smuzhiyun __SetPageSlab(page);
1385*4882a593Smuzhiyun /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
1386*4882a593Smuzhiyun if (sk_memalloc_socks() && page_is_pfmemalloc(page))
1387*4882a593Smuzhiyun SetPageSlabPfmemalloc(page);
1388*4882a593Smuzhiyun
1389*4882a593Smuzhiyun return page;
1390*4882a593Smuzhiyun }
1391*4882a593Smuzhiyun
1392*4882a593Smuzhiyun /*
1393*4882a593Smuzhiyun * Interface to system's page release.
1394*4882a593Smuzhiyun */
kmem_freepages(struct kmem_cache * cachep,struct page * page)1395*4882a593Smuzhiyun static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
1396*4882a593Smuzhiyun {
1397*4882a593Smuzhiyun int order = cachep->gfporder;
1398*4882a593Smuzhiyun
1399*4882a593Smuzhiyun BUG_ON(!PageSlab(page));
1400*4882a593Smuzhiyun __ClearPageSlabPfmemalloc(page);
1401*4882a593Smuzhiyun __ClearPageSlab(page);
1402*4882a593Smuzhiyun page_mapcount_reset(page);
1403*4882a593Smuzhiyun page->mapping = NULL;
1404*4882a593Smuzhiyun
1405*4882a593Smuzhiyun if (current->reclaim_state)
1406*4882a593Smuzhiyun current->reclaim_state->reclaimed_slab += 1 << order;
1407*4882a593Smuzhiyun unaccount_slab_page(page, order, cachep);
1408*4882a593Smuzhiyun __free_pages(page, order);
1409*4882a593Smuzhiyun }
1410*4882a593Smuzhiyun
kmem_rcu_free(struct rcu_head * head)1411*4882a593Smuzhiyun static void kmem_rcu_free(struct rcu_head *head)
1412*4882a593Smuzhiyun {
1413*4882a593Smuzhiyun struct kmem_cache *cachep;
1414*4882a593Smuzhiyun struct page *page;
1415*4882a593Smuzhiyun
1416*4882a593Smuzhiyun page = container_of(head, struct page, rcu_head);
1417*4882a593Smuzhiyun cachep = page->slab_cache;
1418*4882a593Smuzhiyun
1419*4882a593Smuzhiyun kmem_freepages(cachep, page);
1420*4882a593Smuzhiyun }
1421*4882a593Smuzhiyun
1422*4882a593Smuzhiyun #if DEBUG
is_debug_pagealloc_cache(struct kmem_cache * cachep)1423*4882a593Smuzhiyun static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
1424*4882a593Smuzhiyun {
1425*4882a593Smuzhiyun if (debug_pagealloc_enabled_static() && OFF_SLAB(cachep) &&
1426*4882a593Smuzhiyun (cachep->size % PAGE_SIZE) == 0)
1427*4882a593Smuzhiyun return true;
1428*4882a593Smuzhiyun
1429*4882a593Smuzhiyun return false;
1430*4882a593Smuzhiyun }
1431*4882a593Smuzhiyun
1432*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_PAGEALLOC
slab_kernel_map(struct kmem_cache * cachep,void * objp,int map)1433*4882a593Smuzhiyun static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map)
1434*4882a593Smuzhiyun {
1435*4882a593Smuzhiyun if (!is_debug_pagealloc_cache(cachep))
1436*4882a593Smuzhiyun return;
1437*4882a593Smuzhiyun
1438*4882a593Smuzhiyun __kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
1439*4882a593Smuzhiyun }
1440*4882a593Smuzhiyun
1441*4882a593Smuzhiyun #else
slab_kernel_map(struct kmem_cache * cachep,void * objp,int map)1442*4882a593Smuzhiyun static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
1443*4882a593Smuzhiyun int map) {}
1444*4882a593Smuzhiyun
1445*4882a593Smuzhiyun #endif
1446*4882a593Smuzhiyun
poison_obj(struct kmem_cache * cachep,void * addr,unsigned char val)1447*4882a593Smuzhiyun static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1448*4882a593Smuzhiyun {
1449*4882a593Smuzhiyun int size = cachep->object_size;
1450*4882a593Smuzhiyun addr = &((char *)addr)[obj_offset(cachep)];
1451*4882a593Smuzhiyun
1452*4882a593Smuzhiyun memset(addr, val, size);
1453*4882a593Smuzhiyun *(unsigned char *)(addr + size - 1) = POISON_END;
1454*4882a593Smuzhiyun }
1455*4882a593Smuzhiyun
dump_line(char * data,int offset,int limit)1456*4882a593Smuzhiyun static void dump_line(char *data, int offset, int limit)
1457*4882a593Smuzhiyun {
1458*4882a593Smuzhiyun int i;
1459*4882a593Smuzhiyun unsigned char error = 0;
1460*4882a593Smuzhiyun int bad_count = 0;
1461*4882a593Smuzhiyun
1462*4882a593Smuzhiyun pr_err("%03x: ", offset);
1463*4882a593Smuzhiyun for (i = 0; i < limit; i++) {
1464*4882a593Smuzhiyun if (data[offset + i] != POISON_FREE) {
1465*4882a593Smuzhiyun error = data[offset + i];
1466*4882a593Smuzhiyun bad_count++;
1467*4882a593Smuzhiyun }
1468*4882a593Smuzhiyun }
1469*4882a593Smuzhiyun print_hex_dump(KERN_CONT, "", 0, 16, 1,
1470*4882a593Smuzhiyun &data[offset], limit, 1);
1471*4882a593Smuzhiyun
1472*4882a593Smuzhiyun if (bad_count == 1) {
1473*4882a593Smuzhiyun error ^= POISON_FREE;
1474*4882a593Smuzhiyun if (!(error & (error - 1))) {
1475*4882a593Smuzhiyun pr_err("Single bit error detected. Probably bad RAM.\n");
1476*4882a593Smuzhiyun #ifdef CONFIG_X86
1477*4882a593Smuzhiyun pr_err("Run memtest86+ or a similar memory test tool.\n");
1478*4882a593Smuzhiyun #else
1479*4882a593Smuzhiyun pr_err("Run a memory test tool.\n");
1480*4882a593Smuzhiyun #endif
1481*4882a593Smuzhiyun }
1482*4882a593Smuzhiyun }
1483*4882a593Smuzhiyun }
1484*4882a593Smuzhiyun #endif
1485*4882a593Smuzhiyun
1486*4882a593Smuzhiyun #if DEBUG
1487*4882a593Smuzhiyun
print_objinfo(struct kmem_cache * cachep,void * objp,int lines)1488*4882a593Smuzhiyun static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1489*4882a593Smuzhiyun {
1490*4882a593Smuzhiyun int i, size;
1491*4882a593Smuzhiyun char *realobj;
1492*4882a593Smuzhiyun
1493*4882a593Smuzhiyun if (cachep->flags & SLAB_RED_ZONE) {
1494*4882a593Smuzhiyun pr_err("Redzone: 0x%llx/0x%llx\n",
1495*4882a593Smuzhiyun *dbg_redzone1(cachep, objp),
1496*4882a593Smuzhiyun *dbg_redzone2(cachep, objp));
1497*4882a593Smuzhiyun }
1498*4882a593Smuzhiyun
1499*4882a593Smuzhiyun if (cachep->flags & SLAB_STORE_USER)
1500*4882a593Smuzhiyun pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp));
1501*4882a593Smuzhiyun realobj = (char *)objp + obj_offset(cachep);
1502*4882a593Smuzhiyun size = cachep->object_size;
1503*4882a593Smuzhiyun for (i = 0; i < size && lines; i += 16, lines--) {
1504*4882a593Smuzhiyun int limit;
1505*4882a593Smuzhiyun limit = 16;
1506*4882a593Smuzhiyun if (i + limit > size)
1507*4882a593Smuzhiyun limit = size - i;
1508*4882a593Smuzhiyun dump_line(realobj, i, limit);
1509*4882a593Smuzhiyun }
1510*4882a593Smuzhiyun }
1511*4882a593Smuzhiyun
check_poison_obj(struct kmem_cache * cachep,void * objp)1512*4882a593Smuzhiyun static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1513*4882a593Smuzhiyun {
1514*4882a593Smuzhiyun char *realobj;
1515*4882a593Smuzhiyun int size, i;
1516*4882a593Smuzhiyun int lines = 0;
1517*4882a593Smuzhiyun
1518*4882a593Smuzhiyun if (is_debug_pagealloc_cache(cachep))
1519*4882a593Smuzhiyun return;
1520*4882a593Smuzhiyun
1521*4882a593Smuzhiyun realobj = (char *)objp + obj_offset(cachep);
1522*4882a593Smuzhiyun size = cachep->object_size;
1523*4882a593Smuzhiyun
1524*4882a593Smuzhiyun for (i = 0; i < size; i++) {
1525*4882a593Smuzhiyun char exp = POISON_FREE;
1526*4882a593Smuzhiyun if (i == size - 1)
1527*4882a593Smuzhiyun exp = POISON_END;
1528*4882a593Smuzhiyun if (realobj[i] != exp) {
1529*4882a593Smuzhiyun int limit;
1530*4882a593Smuzhiyun /* Mismatch ! */
1531*4882a593Smuzhiyun /* Print header */
1532*4882a593Smuzhiyun if (lines == 0) {
1533*4882a593Smuzhiyun pr_err("Slab corruption (%s): %s start=%px, len=%d\n",
1534*4882a593Smuzhiyun print_tainted(), cachep->name,
1535*4882a593Smuzhiyun realobj, size);
1536*4882a593Smuzhiyun print_objinfo(cachep, objp, 0);
1537*4882a593Smuzhiyun }
1538*4882a593Smuzhiyun /* Hexdump the affected line */
1539*4882a593Smuzhiyun i = (i / 16) * 16;
1540*4882a593Smuzhiyun limit = 16;
1541*4882a593Smuzhiyun if (i + limit > size)
1542*4882a593Smuzhiyun limit = size - i;
1543*4882a593Smuzhiyun dump_line(realobj, i, limit);
1544*4882a593Smuzhiyun i += 16;
1545*4882a593Smuzhiyun lines++;
1546*4882a593Smuzhiyun /* Limit to 5 lines */
1547*4882a593Smuzhiyun if (lines > 5)
1548*4882a593Smuzhiyun break;
1549*4882a593Smuzhiyun }
1550*4882a593Smuzhiyun }
1551*4882a593Smuzhiyun if (lines != 0) {
1552*4882a593Smuzhiyun /* Print some data about the neighboring objects, if they
1553*4882a593Smuzhiyun * exist:
1554*4882a593Smuzhiyun */
1555*4882a593Smuzhiyun struct page *page = virt_to_head_page(objp);
1556*4882a593Smuzhiyun unsigned int objnr;
1557*4882a593Smuzhiyun
1558*4882a593Smuzhiyun objnr = obj_to_index(cachep, page, objp);
1559*4882a593Smuzhiyun if (objnr) {
1560*4882a593Smuzhiyun objp = index_to_obj(cachep, page, objnr - 1);
1561*4882a593Smuzhiyun realobj = (char *)objp + obj_offset(cachep);
1562*4882a593Smuzhiyun pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
1563*4882a593Smuzhiyun print_objinfo(cachep, objp, 2);
1564*4882a593Smuzhiyun }
1565*4882a593Smuzhiyun if (objnr + 1 < cachep->num) {
1566*4882a593Smuzhiyun objp = index_to_obj(cachep, page, objnr + 1);
1567*4882a593Smuzhiyun realobj = (char *)objp + obj_offset(cachep);
1568*4882a593Smuzhiyun pr_err("Next obj: start=%px, len=%d\n", realobj, size);
1569*4882a593Smuzhiyun print_objinfo(cachep, objp, 2);
1570*4882a593Smuzhiyun }
1571*4882a593Smuzhiyun }
1572*4882a593Smuzhiyun }
1573*4882a593Smuzhiyun #endif
1574*4882a593Smuzhiyun
1575*4882a593Smuzhiyun #if DEBUG
slab_destroy_debugcheck(struct kmem_cache * cachep,struct page * page)1576*4882a593Smuzhiyun static void slab_destroy_debugcheck(struct kmem_cache *cachep,
1577*4882a593Smuzhiyun struct page *page)
1578*4882a593Smuzhiyun {
1579*4882a593Smuzhiyun int i;
1580*4882a593Smuzhiyun
1581*4882a593Smuzhiyun if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {
1582*4882a593Smuzhiyun poison_obj(cachep, page->freelist - obj_offset(cachep),
1583*4882a593Smuzhiyun POISON_FREE);
1584*4882a593Smuzhiyun }
1585*4882a593Smuzhiyun
1586*4882a593Smuzhiyun for (i = 0; i < cachep->num; i++) {
1587*4882a593Smuzhiyun void *objp = index_to_obj(cachep, page, i);
1588*4882a593Smuzhiyun
1589*4882a593Smuzhiyun if (cachep->flags & SLAB_POISON) {
1590*4882a593Smuzhiyun check_poison_obj(cachep, objp);
1591*4882a593Smuzhiyun slab_kernel_map(cachep, objp, 1);
1592*4882a593Smuzhiyun }
1593*4882a593Smuzhiyun if (cachep->flags & SLAB_RED_ZONE) {
1594*4882a593Smuzhiyun if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1595*4882a593Smuzhiyun slab_error(cachep, "start of a freed object was overwritten");
1596*4882a593Smuzhiyun if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1597*4882a593Smuzhiyun slab_error(cachep, "end of a freed object was overwritten");
1598*4882a593Smuzhiyun }
1599*4882a593Smuzhiyun }
1600*4882a593Smuzhiyun }
1601*4882a593Smuzhiyun #else
slab_destroy_debugcheck(struct kmem_cache * cachep,struct page * page)1602*4882a593Smuzhiyun static void slab_destroy_debugcheck(struct kmem_cache *cachep,
1603*4882a593Smuzhiyun struct page *page)
1604*4882a593Smuzhiyun {
1605*4882a593Smuzhiyun }
1606*4882a593Smuzhiyun #endif
1607*4882a593Smuzhiyun
1608*4882a593Smuzhiyun /**
1609*4882a593Smuzhiyun * slab_destroy - destroy and release all objects in a slab
1610*4882a593Smuzhiyun * @cachep: cache pointer being destroyed
1611*4882a593Smuzhiyun * @page: page pointer being destroyed
1612*4882a593Smuzhiyun *
1613*4882a593Smuzhiyun * Destroy all the objs in a slab page, and release the mem back to the system.
1614*4882a593Smuzhiyun * Before calling the slab page must have been unlinked from the cache. The
1615*4882a593Smuzhiyun * kmem_cache_node ->list_lock is not held/needed.
1616*4882a593Smuzhiyun */
slab_destroy(struct kmem_cache * cachep,struct page * page)1617*4882a593Smuzhiyun static void slab_destroy(struct kmem_cache *cachep, struct page *page)
1618*4882a593Smuzhiyun {
1619*4882a593Smuzhiyun void *freelist;
1620*4882a593Smuzhiyun
1621*4882a593Smuzhiyun freelist = page->freelist;
1622*4882a593Smuzhiyun slab_destroy_debugcheck(cachep, page);
1623*4882a593Smuzhiyun if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
1624*4882a593Smuzhiyun call_rcu(&page->rcu_head, kmem_rcu_free);
1625*4882a593Smuzhiyun else
1626*4882a593Smuzhiyun kmem_freepages(cachep, page);
1627*4882a593Smuzhiyun
1628*4882a593Smuzhiyun /*
1629*4882a593Smuzhiyun * From now on, we don't use freelist
1630*4882a593Smuzhiyun * although actual page can be freed in rcu context
1631*4882a593Smuzhiyun */
1632*4882a593Smuzhiyun if (OFF_SLAB(cachep))
1633*4882a593Smuzhiyun kmem_cache_free(cachep->freelist_cache, freelist);
1634*4882a593Smuzhiyun }
1635*4882a593Smuzhiyun
1636*4882a593Smuzhiyun /*
1637*4882a593Smuzhiyun * Update the size of the caches before calling slabs_destroy as it may
1638*4882a593Smuzhiyun * recursively call kfree.
1639*4882a593Smuzhiyun */
slabs_destroy(struct kmem_cache * cachep,struct list_head * list)1640*4882a593Smuzhiyun static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
1641*4882a593Smuzhiyun {
1642*4882a593Smuzhiyun struct page *page, *n;
1643*4882a593Smuzhiyun
1644*4882a593Smuzhiyun list_for_each_entry_safe(page, n, list, slab_list) {
1645*4882a593Smuzhiyun list_del(&page->slab_list);
1646*4882a593Smuzhiyun slab_destroy(cachep, page);
1647*4882a593Smuzhiyun }
1648*4882a593Smuzhiyun }
1649*4882a593Smuzhiyun
1650*4882a593Smuzhiyun /**
1651*4882a593Smuzhiyun * calculate_slab_order - calculate size (page order) of slabs
1652*4882a593Smuzhiyun * @cachep: pointer to the cache that is being created
1653*4882a593Smuzhiyun * @size: size of objects to be created in this cache.
1654*4882a593Smuzhiyun * @flags: slab allocation flags
1655*4882a593Smuzhiyun *
1656*4882a593Smuzhiyun * Also calculates the number of objects per slab.
1657*4882a593Smuzhiyun *
1658*4882a593Smuzhiyun * This could be made much more intelligent. For now, try to avoid using
1659*4882a593Smuzhiyun * high order pages for slabs. When the gfp() functions are more friendly
1660*4882a593Smuzhiyun * towards high-order requests, this should be changed.
1661*4882a593Smuzhiyun *
1662*4882a593Smuzhiyun * Return: number of left-over bytes in a slab
1663*4882a593Smuzhiyun */
calculate_slab_order(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1664*4882a593Smuzhiyun static size_t calculate_slab_order(struct kmem_cache *cachep,
1665*4882a593Smuzhiyun size_t size, slab_flags_t flags)
1666*4882a593Smuzhiyun {
1667*4882a593Smuzhiyun size_t left_over = 0;
1668*4882a593Smuzhiyun int gfporder;
1669*4882a593Smuzhiyun
1670*4882a593Smuzhiyun for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
1671*4882a593Smuzhiyun unsigned int num;
1672*4882a593Smuzhiyun size_t remainder;
1673*4882a593Smuzhiyun
1674*4882a593Smuzhiyun num = cache_estimate(gfporder, size, flags, &remainder);
1675*4882a593Smuzhiyun if (!num)
1676*4882a593Smuzhiyun continue;
1677*4882a593Smuzhiyun
1678*4882a593Smuzhiyun /* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */
1679*4882a593Smuzhiyun if (num > SLAB_OBJ_MAX_NUM)
1680*4882a593Smuzhiyun break;
1681*4882a593Smuzhiyun
1682*4882a593Smuzhiyun if (flags & CFLGS_OFF_SLAB) {
1683*4882a593Smuzhiyun struct kmem_cache *freelist_cache;
1684*4882a593Smuzhiyun size_t freelist_size;
1685*4882a593Smuzhiyun
1686*4882a593Smuzhiyun freelist_size = num * sizeof(freelist_idx_t);
1687*4882a593Smuzhiyun freelist_cache = kmalloc_slab(freelist_size, 0u);
1688*4882a593Smuzhiyun if (!freelist_cache)
1689*4882a593Smuzhiyun continue;
1690*4882a593Smuzhiyun
1691*4882a593Smuzhiyun /*
1692*4882a593Smuzhiyun * Needed to avoid possible looping condition
1693*4882a593Smuzhiyun * in cache_grow_begin()
1694*4882a593Smuzhiyun */
1695*4882a593Smuzhiyun if (OFF_SLAB(freelist_cache))
1696*4882a593Smuzhiyun continue;
1697*4882a593Smuzhiyun
1698*4882a593Smuzhiyun /* check if off slab has enough benefit */
1699*4882a593Smuzhiyun if (freelist_cache->size > cachep->size / 2)
1700*4882a593Smuzhiyun continue;
1701*4882a593Smuzhiyun }
1702*4882a593Smuzhiyun
1703*4882a593Smuzhiyun /* Found something acceptable - save it away */
1704*4882a593Smuzhiyun cachep->num = num;
1705*4882a593Smuzhiyun cachep->gfporder = gfporder;
1706*4882a593Smuzhiyun left_over = remainder;
1707*4882a593Smuzhiyun
1708*4882a593Smuzhiyun /*
1709*4882a593Smuzhiyun * A VFS-reclaimable slab tends to have most allocations
1710*4882a593Smuzhiyun * as GFP_NOFS and we really don't want to have to be allocating
1711*4882a593Smuzhiyun * higher-order pages when we are unable to shrink dcache.
1712*4882a593Smuzhiyun */
1713*4882a593Smuzhiyun if (flags & SLAB_RECLAIM_ACCOUNT)
1714*4882a593Smuzhiyun break;
1715*4882a593Smuzhiyun
1716*4882a593Smuzhiyun /*
1717*4882a593Smuzhiyun * Large number of objects is good, but very large slabs are
1718*4882a593Smuzhiyun * currently bad for the gfp()s.
1719*4882a593Smuzhiyun */
1720*4882a593Smuzhiyun if (gfporder >= slab_max_order)
1721*4882a593Smuzhiyun break;
1722*4882a593Smuzhiyun
1723*4882a593Smuzhiyun /*
1724*4882a593Smuzhiyun * Acceptable internal fragmentation?
1725*4882a593Smuzhiyun */
1726*4882a593Smuzhiyun if (left_over * 8 <= (PAGE_SIZE << gfporder))
1727*4882a593Smuzhiyun break;
1728*4882a593Smuzhiyun }
1729*4882a593Smuzhiyun return left_over;
1730*4882a593Smuzhiyun }
1731*4882a593Smuzhiyun
alloc_kmem_cache_cpus(struct kmem_cache * cachep,int entries,int batchcount)1732*4882a593Smuzhiyun static struct array_cache __percpu *alloc_kmem_cache_cpus(
1733*4882a593Smuzhiyun struct kmem_cache *cachep, int entries, int batchcount)
1734*4882a593Smuzhiyun {
1735*4882a593Smuzhiyun int cpu;
1736*4882a593Smuzhiyun size_t size;
1737*4882a593Smuzhiyun struct array_cache __percpu *cpu_cache;
1738*4882a593Smuzhiyun
1739*4882a593Smuzhiyun size = sizeof(void *) * entries + sizeof(struct array_cache);
1740*4882a593Smuzhiyun cpu_cache = __alloc_percpu(size, sizeof(void *));
1741*4882a593Smuzhiyun
1742*4882a593Smuzhiyun if (!cpu_cache)
1743*4882a593Smuzhiyun return NULL;
1744*4882a593Smuzhiyun
1745*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
1746*4882a593Smuzhiyun init_arraycache(per_cpu_ptr(cpu_cache, cpu),
1747*4882a593Smuzhiyun entries, batchcount);
1748*4882a593Smuzhiyun }
1749*4882a593Smuzhiyun
1750*4882a593Smuzhiyun return cpu_cache;
1751*4882a593Smuzhiyun }
1752*4882a593Smuzhiyun
setup_cpu_cache(struct kmem_cache * cachep,gfp_t gfp)1753*4882a593Smuzhiyun static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
1754*4882a593Smuzhiyun {
1755*4882a593Smuzhiyun if (slab_state >= FULL)
1756*4882a593Smuzhiyun return enable_cpucache(cachep, gfp);
1757*4882a593Smuzhiyun
1758*4882a593Smuzhiyun cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
1759*4882a593Smuzhiyun if (!cachep->cpu_cache)
1760*4882a593Smuzhiyun return 1;
1761*4882a593Smuzhiyun
1762*4882a593Smuzhiyun if (slab_state == DOWN) {
1763*4882a593Smuzhiyun /* Creation of first cache (kmem_cache). */
1764*4882a593Smuzhiyun set_up_node(kmem_cache, CACHE_CACHE);
1765*4882a593Smuzhiyun } else if (slab_state == PARTIAL) {
1766*4882a593Smuzhiyun /* For kmem_cache_node */
1767*4882a593Smuzhiyun set_up_node(cachep, SIZE_NODE);
1768*4882a593Smuzhiyun } else {
1769*4882a593Smuzhiyun int node;
1770*4882a593Smuzhiyun
1771*4882a593Smuzhiyun for_each_online_node(node) {
1772*4882a593Smuzhiyun cachep->node[node] = kmalloc_node(
1773*4882a593Smuzhiyun sizeof(struct kmem_cache_node), gfp, node);
1774*4882a593Smuzhiyun BUG_ON(!cachep->node[node]);
1775*4882a593Smuzhiyun kmem_cache_node_init(cachep->node[node]);
1776*4882a593Smuzhiyun }
1777*4882a593Smuzhiyun }
1778*4882a593Smuzhiyun
1779*4882a593Smuzhiyun cachep->node[numa_mem_id()]->next_reap =
1780*4882a593Smuzhiyun jiffies + REAPTIMEOUT_NODE +
1781*4882a593Smuzhiyun ((unsigned long)cachep) % REAPTIMEOUT_NODE;
1782*4882a593Smuzhiyun
1783*4882a593Smuzhiyun cpu_cache_get(cachep)->avail = 0;
1784*4882a593Smuzhiyun cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
1785*4882a593Smuzhiyun cpu_cache_get(cachep)->batchcount = 1;
1786*4882a593Smuzhiyun cpu_cache_get(cachep)->touched = 0;
1787*4882a593Smuzhiyun cachep->batchcount = 1;
1788*4882a593Smuzhiyun cachep->limit = BOOT_CPUCACHE_ENTRIES;
1789*4882a593Smuzhiyun return 0;
1790*4882a593Smuzhiyun }
1791*4882a593Smuzhiyun
kmem_cache_flags(unsigned int object_size,slab_flags_t flags,const char * name)1792*4882a593Smuzhiyun slab_flags_t kmem_cache_flags(unsigned int object_size,
1793*4882a593Smuzhiyun slab_flags_t flags, const char *name)
1794*4882a593Smuzhiyun {
1795*4882a593Smuzhiyun return flags;
1796*4882a593Smuzhiyun }
1797*4882a593Smuzhiyun
1798*4882a593Smuzhiyun struct kmem_cache *
__kmem_cache_alias(const char * name,unsigned int size,unsigned int align,slab_flags_t flags,void (* ctor)(void *))1799*4882a593Smuzhiyun __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
1800*4882a593Smuzhiyun slab_flags_t flags, void (*ctor)(void *))
1801*4882a593Smuzhiyun {
1802*4882a593Smuzhiyun struct kmem_cache *cachep;
1803*4882a593Smuzhiyun
1804*4882a593Smuzhiyun cachep = find_mergeable(size, align, flags, name, ctor);
1805*4882a593Smuzhiyun if (cachep) {
1806*4882a593Smuzhiyun cachep->refcount++;
1807*4882a593Smuzhiyun
1808*4882a593Smuzhiyun /*
1809*4882a593Smuzhiyun * Adjust the object sizes so that we clear
1810*4882a593Smuzhiyun * the complete object on kzalloc.
1811*4882a593Smuzhiyun */
1812*4882a593Smuzhiyun cachep->object_size = max_t(int, cachep->object_size, size);
1813*4882a593Smuzhiyun }
1814*4882a593Smuzhiyun return cachep;
1815*4882a593Smuzhiyun }
1816*4882a593Smuzhiyun
set_objfreelist_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1817*4882a593Smuzhiyun static bool set_objfreelist_slab_cache(struct kmem_cache *cachep,
1818*4882a593Smuzhiyun size_t size, slab_flags_t flags)
1819*4882a593Smuzhiyun {
1820*4882a593Smuzhiyun size_t left;
1821*4882a593Smuzhiyun
1822*4882a593Smuzhiyun cachep->num = 0;
1823*4882a593Smuzhiyun
1824*4882a593Smuzhiyun /*
1825*4882a593Smuzhiyun * If slab auto-initialization on free is enabled, store the freelist
1826*4882a593Smuzhiyun * off-slab, so that its contents don't end up in one of the allocated
1827*4882a593Smuzhiyun * objects.
1828*4882a593Smuzhiyun */
1829*4882a593Smuzhiyun if (unlikely(slab_want_init_on_free(cachep)))
1830*4882a593Smuzhiyun return false;
1831*4882a593Smuzhiyun
1832*4882a593Smuzhiyun if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)
1833*4882a593Smuzhiyun return false;
1834*4882a593Smuzhiyun
1835*4882a593Smuzhiyun left = calculate_slab_order(cachep, size,
1836*4882a593Smuzhiyun flags | CFLGS_OBJFREELIST_SLAB);
1837*4882a593Smuzhiyun if (!cachep->num)
1838*4882a593Smuzhiyun return false;
1839*4882a593Smuzhiyun
1840*4882a593Smuzhiyun if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size)
1841*4882a593Smuzhiyun return false;
1842*4882a593Smuzhiyun
1843*4882a593Smuzhiyun cachep->colour = left / cachep->colour_off;
1844*4882a593Smuzhiyun
1845*4882a593Smuzhiyun return true;
1846*4882a593Smuzhiyun }
1847*4882a593Smuzhiyun
set_off_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1848*4882a593Smuzhiyun static bool set_off_slab_cache(struct kmem_cache *cachep,
1849*4882a593Smuzhiyun size_t size, slab_flags_t flags)
1850*4882a593Smuzhiyun {
1851*4882a593Smuzhiyun size_t left;
1852*4882a593Smuzhiyun
1853*4882a593Smuzhiyun cachep->num = 0;
1854*4882a593Smuzhiyun
1855*4882a593Smuzhiyun /*
1856*4882a593Smuzhiyun * Always use on-slab management when SLAB_NOLEAKTRACE
1857*4882a593Smuzhiyun * to avoid recursive calls into kmemleak.
1858*4882a593Smuzhiyun */
1859*4882a593Smuzhiyun if (flags & SLAB_NOLEAKTRACE)
1860*4882a593Smuzhiyun return false;
1861*4882a593Smuzhiyun
1862*4882a593Smuzhiyun /*
1863*4882a593Smuzhiyun * Size is large, assume best to place the slab management obj
1864*4882a593Smuzhiyun * off-slab (should allow better packing of objs).
1865*4882a593Smuzhiyun */
1866*4882a593Smuzhiyun left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB);
1867*4882a593Smuzhiyun if (!cachep->num)
1868*4882a593Smuzhiyun return false;
1869*4882a593Smuzhiyun
1870*4882a593Smuzhiyun /*
1871*4882a593Smuzhiyun * If the slab has been placed off-slab, and we have enough space then
1872*4882a593Smuzhiyun * move it on-slab. This is at the expense of any extra colouring.
1873*4882a593Smuzhiyun */
1874*4882a593Smuzhiyun if (left >= cachep->num * sizeof(freelist_idx_t))
1875*4882a593Smuzhiyun return false;
1876*4882a593Smuzhiyun
1877*4882a593Smuzhiyun cachep->colour = left / cachep->colour_off;
1878*4882a593Smuzhiyun
1879*4882a593Smuzhiyun return true;
1880*4882a593Smuzhiyun }
1881*4882a593Smuzhiyun
set_on_slab_cache(struct kmem_cache * cachep,size_t size,slab_flags_t flags)1882*4882a593Smuzhiyun static bool set_on_slab_cache(struct kmem_cache *cachep,
1883*4882a593Smuzhiyun size_t size, slab_flags_t flags)
1884*4882a593Smuzhiyun {
1885*4882a593Smuzhiyun size_t left;
1886*4882a593Smuzhiyun
1887*4882a593Smuzhiyun cachep->num = 0;
1888*4882a593Smuzhiyun
1889*4882a593Smuzhiyun left = calculate_slab_order(cachep, size, flags);
1890*4882a593Smuzhiyun if (!cachep->num)
1891*4882a593Smuzhiyun return false;
1892*4882a593Smuzhiyun
1893*4882a593Smuzhiyun cachep->colour = left / cachep->colour_off;
1894*4882a593Smuzhiyun
1895*4882a593Smuzhiyun return true;
1896*4882a593Smuzhiyun }
1897*4882a593Smuzhiyun
1898*4882a593Smuzhiyun /**
1899*4882a593Smuzhiyun * __kmem_cache_create - Create a cache.
1900*4882a593Smuzhiyun * @cachep: cache management descriptor
1901*4882a593Smuzhiyun * @flags: SLAB flags
1902*4882a593Smuzhiyun *
1903*4882a593Smuzhiyun * Returns a ptr to the cache on success, NULL on failure.
1904*4882a593Smuzhiyun * Cannot be called within a int, but can be interrupted.
1905*4882a593Smuzhiyun * The @ctor is run when new pages are allocated by the cache.
1906*4882a593Smuzhiyun *
1907*4882a593Smuzhiyun * The flags are
1908*4882a593Smuzhiyun *
1909*4882a593Smuzhiyun * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
1910*4882a593Smuzhiyun * to catch references to uninitialised memory.
1911*4882a593Smuzhiyun *
1912*4882a593Smuzhiyun * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
1913*4882a593Smuzhiyun * for buffer overruns.
1914*4882a593Smuzhiyun *
1915*4882a593Smuzhiyun * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
1916*4882a593Smuzhiyun * cacheline. This can be beneficial if you're counting cycles as closely
1917*4882a593Smuzhiyun * as davem.
1918*4882a593Smuzhiyun *
1919*4882a593Smuzhiyun * Return: a pointer to the created cache or %NULL in case of error
1920*4882a593Smuzhiyun */
__kmem_cache_create(struct kmem_cache * cachep,slab_flags_t flags)1921*4882a593Smuzhiyun int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
1922*4882a593Smuzhiyun {
1923*4882a593Smuzhiyun size_t ralign = BYTES_PER_WORD;
1924*4882a593Smuzhiyun gfp_t gfp;
1925*4882a593Smuzhiyun int err;
1926*4882a593Smuzhiyun unsigned int size = cachep->size;
1927*4882a593Smuzhiyun
1928*4882a593Smuzhiyun #if DEBUG
1929*4882a593Smuzhiyun #if FORCED_DEBUG
1930*4882a593Smuzhiyun /*
1931*4882a593Smuzhiyun * Enable redzoning and last user accounting, except for caches with
1932*4882a593Smuzhiyun * large objects, if the increased size would increase the object size
1933*4882a593Smuzhiyun * above the next power of two: caches with object sizes just above a
1934*4882a593Smuzhiyun * power of two have a significant amount of internal fragmentation.
1935*4882a593Smuzhiyun */
1936*4882a593Smuzhiyun if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
1937*4882a593Smuzhiyun 2 * sizeof(unsigned long long)))
1938*4882a593Smuzhiyun flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
1939*4882a593Smuzhiyun if (!(flags & SLAB_TYPESAFE_BY_RCU))
1940*4882a593Smuzhiyun flags |= SLAB_POISON;
1941*4882a593Smuzhiyun #endif
1942*4882a593Smuzhiyun #endif
1943*4882a593Smuzhiyun
1944*4882a593Smuzhiyun /*
1945*4882a593Smuzhiyun * Check that size is in terms of words. This is needed to avoid
1946*4882a593Smuzhiyun * unaligned accesses for some archs when redzoning is used, and makes
1947*4882a593Smuzhiyun * sure any on-slab bufctl's are also correctly aligned.
1948*4882a593Smuzhiyun */
1949*4882a593Smuzhiyun size = ALIGN(size, BYTES_PER_WORD);
1950*4882a593Smuzhiyun
1951*4882a593Smuzhiyun if (flags & SLAB_RED_ZONE) {
1952*4882a593Smuzhiyun ralign = REDZONE_ALIGN;
1953*4882a593Smuzhiyun /* If redzoning, ensure that the second redzone is suitably
1954*4882a593Smuzhiyun * aligned, by adjusting the object size accordingly. */
1955*4882a593Smuzhiyun size = ALIGN(size, REDZONE_ALIGN);
1956*4882a593Smuzhiyun }
1957*4882a593Smuzhiyun
1958*4882a593Smuzhiyun /* 3) caller mandated alignment */
1959*4882a593Smuzhiyun if (ralign < cachep->align) {
1960*4882a593Smuzhiyun ralign = cachep->align;
1961*4882a593Smuzhiyun }
1962*4882a593Smuzhiyun /* disable debug if necessary */
1963*4882a593Smuzhiyun if (ralign > __alignof__(unsigned long long))
1964*4882a593Smuzhiyun flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
1965*4882a593Smuzhiyun /*
1966*4882a593Smuzhiyun * 4) Store it.
1967*4882a593Smuzhiyun */
1968*4882a593Smuzhiyun cachep->align = ralign;
1969*4882a593Smuzhiyun cachep->colour_off = cache_line_size();
1970*4882a593Smuzhiyun /* Offset must be a multiple of the alignment. */
1971*4882a593Smuzhiyun if (cachep->colour_off < cachep->align)
1972*4882a593Smuzhiyun cachep->colour_off = cachep->align;
1973*4882a593Smuzhiyun
1974*4882a593Smuzhiyun if (slab_is_available())
1975*4882a593Smuzhiyun gfp = GFP_KERNEL;
1976*4882a593Smuzhiyun else
1977*4882a593Smuzhiyun gfp = GFP_NOWAIT;
1978*4882a593Smuzhiyun
1979*4882a593Smuzhiyun #if DEBUG
1980*4882a593Smuzhiyun
1981*4882a593Smuzhiyun /*
1982*4882a593Smuzhiyun * Both debugging options require word-alignment which is calculated
1983*4882a593Smuzhiyun * into align above.
1984*4882a593Smuzhiyun */
1985*4882a593Smuzhiyun if (flags & SLAB_RED_ZONE) {
1986*4882a593Smuzhiyun /* add space for red zone words */
1987*4882a593Smuzhiyun cachep->obj_offset += sizeof(unsigned long long);
1988*4882a593Smuzhiyun size += 2 * sizeof(unsigned long long);
1989*4882a593Smuzhiyun }
1990*4882a593Smuzhiyun if (flags & SLAB_STORE_USER) {
1991*4882a593Smuzhiyun /* user store requires one word storage behind the end of
1992*4882a593Smuzhiyun * the real object. But if the second red zone needs to be
1993*4882a593Smuzhiyun * aligned to 64 bits, we must allow that much space.
1994*4882a593Smuzhiyun */
1995*4882a593Smuzhiyun if (flags & SLAB_RED_ZONE)
1996*4882a593Smuzhiyun size += REDZONE_ALIGN;
1997*4882a593Smuzhiyun else
1998*4882a593Smuzhiyun size += BYTES_PER_WORD;
1999*4882a593Smuzhiyun }
2000*4882a593Smuzhiyun #endif
2001*4882a593Smuzhiyun
2002*4882a593Smuzhiyun kasan_cache_create(cachep, &size, &flags);
2003*4882a593Smuzhiyun
2004*4882a593Smuzhiyun size = ALIGN(size, cachep->align);
2005*4882a593Smuzhiyun /*
2006*4882a593Smuzhiyun * We should restrict the number of objects in a slab to implement
2007*4882a593Smuzhiyun * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
2008*4882a593Smuzhiyun */
2009*4882a593Smuzhiyun if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
2010*4882a593Smuzhiyun size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
2011*4882a593Smuzhiyun
2012*4882a593Smuzhiyun #if DEBUG
2013*4882a593Smuzhiyun /*
2014*4882a593Smuzhiyun * To activate debug pagealloc, off-slab management is necessary
2015*4882a593Smuzhiyun * requirement. In early phase of initialization, small sized slab
2016*4882a593Smuzhiyun * doesn't get initialized so it would not be possible. So, we need
2017*4882a593Smuzhiyun * to check size >= 256. It guarantees that all necessary small
2018*4882a593Smuzhiyun * sized slab is initialized in current slab initialization sequence.
2019*4882a593Smuzhiyun */
2020*4882a593Smuzhiyun if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) &&
2021*4882a593Smuzhiyun size >= 256 && cachep->object_size > cache_line_size()) {
2022*4882a593Smuzhiyun if (size < PAGE_SIZE || size % PAGE_SIZE == 0) {
2023*4882a593Smuzhiyun size_t tmp_size = ALIGN(size, PAGE_SIZE);
2024*4882a593Smuzhiyun
2025*4882a593Smuzhiyun if (set_off_slab_cache(cachep, tmp_size, flags)) {
2026*4882a593Smuzhiyun flags |= CFLGS_OFF_SLAB;
2027*4882a593Smuzhiyun cachep->obj_offset += tmp_size - size;
2028*4882a593Smuzhiyun size = tmp_size;
2029*4882a593Smuzhiyun goto done;
2030*4882a593Smuzhiyun }
2031*4882a593Smuzhiyun }
2032*4882a593Smuzhiyun }
2033*4882a593Smuzhiyun #endif
2034*4882a593Smuzhiyun
2035*4882a593Smuzhiyun if (set_objfreelist_slab_cache(cachep, size, flags)) {
2036*4882a593Smuzhiyun flags |= CFLGS_OBJFREELIST_SLAB;
2037*4882a593Smuzhiyun goto done;
2038*4882a593Smuzhiyun }
2039*4882a593Smuzhiyun
2040*4882a593Smuzhiyun if (set_off_slab_cache(cachep, size, flags)) {
2041*4882a593Smuzhiyun flags |= CFLGS_OFF_SLAB;
2042*4882a593Smuzhiyun goto done;
2043*4882a593Smuzhiyun }
2044*4882a593Smuzhiyun
2045*4882a593Smuzhiyun if (set_on_slab_cache(cachep, size, flags))
2046*4882a593Smuzhiyun goto done;
2047*4882a593Smuzhiyun
2048*4882a593Smuzhiyun return -E2BIG;
2049*4882a593Smuzhiyun
2050*4882a593Smuzhiyun done:
2051*4882a593Smuzhiyun cachep->freelist_size = cachep->num * sizeof(freelist_idx_t);
2052*4882a593Smuzhiyun cachep->flags = flags;
2053*4882a593Smuzhiyun cachep->allocflags = __GFP_COMP;
2054*4882a593Smuzhiyun if (flags & SLAB_CACHE_DMA)
2055*4882a593Smuzhiyun cachep->allocflags |= GFP_DMA;
2056*4882a593Smuzhiyun if (flags & SLAB_CACHE_DMA32)
2057*4882a593Smuzhiyun cachep->allocflags |= GFP_DMA32;
2058*4882a593Smuzhiyun if (flags & SLAB_RECLAIM_ACCOUNT)
2059*4882a593Smuzhiyun cachep->allocflags |= __GFP_RECLAIMABLE;
2060*4882a593Smuzhiyun cachep->size = size;
2061*4882a593Smuzhiyun cachep->reciprocal_buffer_size = reciprocal_value(size);
2062*4882a593Smuzhiyun
2063*4882a593Smuzhiyun #if DEBUG
2064*4882a593Smuzhiyun /*
2065*4882a593Smuzhiyun * If we're going to use the generic kernel_map_pages()
2066*4882a593Smuzhiyun * poisoning, then it's going to smash the contents of
2067*4882a593Smuzhiyun * the redzone and userword anyhow, so switch them off.
2068*4882a593Smuzhiyun */
2069*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_PAGE_POISONING) &&
2070*4882a593Smuzhiyun (cachep->flags & SLAB_POISON) &&
2071*4882a593Smuzhiyun is_debug_pagealloc_cache(cachep))
2072*4882a593Smuzhiyun cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2073*4882a593Smuzhiyun #endif
2074*4882a593Smuzhiyun
2075*4882a593Smuzhiyun if (OFF_SLAB(cachep)) {
2076*4882a593Smuzhiyun cachep->freelist_cache =
2077*4882a593Smuzhiyun kmalloc_slab(cachep->freelist_size, 0u);
2078*4882a593Smuzhiyun }
2079*4882a593Smuzhiyun
2080*4882a593Smuzhiyun err = setup_cpu_cache(cachep, gfp);
2081*4882a593Smuzhiyun if (err) {
2082*4882a593Smuzhiyun __kmem_cache_release(cachep);
2083*4882a593Smuzhiyun return err;
2084*4882a593Smuzhiyun }
2085*4882a593Smuzhiyun
2086*4882a593Smuzhiyun return 0;
2087*4882a593Smuzhiyun }
2088*4882a593Smuzhiyun
2089*4882a593Smuzhiyun #if DEBUG
check_irq_off(void)2090*4882a593Smuzhiyun static void check_irq_off(void)
2091*4882a593Smuzhiyun {
2092*4882a593Smuzhiyun BUG_ON(!irqs_disabled());
2093*4882a593Smuzhiyun }
2094*4882a593Smuzhiyun
check_irq_on(void)2095*4882a593Smuzhiyun static void check_irq_on(void)
2096*4882a593Smuzhiyun {
2097*4882a593Smuzhiyun BUG_ON(irqs_disabled());
2098*4882a593Smuzhiyun }
2099*4882a593Smuzhiyun
check_mutex_acquired(void)2100*4882a593Smuzhiyun static void check_mutex_acquired(void)
2101*4882a593Smuzhiyun {
2102*4882a593Smuzhiyun BUG_ON(!mutex_is_locked(&slab_mutex));
2103*4882a593Smuzhiyun }
2104*4882a593Smuzhiyun
check_spinlock_acquired(struct kmem_cache * cachep)2105*4882a593Smuzhiyun static void check_spinlock_acquired(struct kmem_cache *cachep)
2106*4882a593Smuzhiyun {
2107*4882a593Smuzhiyun #ifdef CONFIG_SMP
2108*4882a593Smuzhiyun check_irq_off();
2109*4882a593Smuzhiyun assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
2110*4882a593Smuzhiyun #endif
2111*4882a593Smuzhiyun }
2112*4882a593Smuzhiyun
check_spinlock_acquired_node(struct kmem_cache * cachep,int node)2113*4882a593Smuzhiyun static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2114*4882a593Smuzhiyun {
2115*4882a593Smuzhiyun #ifdef CONFIG_SMP
2116*4882a593Smuzhiyun check_irq_off();
2117*4882a593Smuzhiyun assert_spin_locked(&get_node(cachep, node)->list_lock);
2118*4882a593Smuzhiyun #endif
2119*4882a593Smuzhiyun }
2120*4882a593Smuzhiyun
2121*4882a593Smuzhiyun #else
2122*4882a593Smuzhiyun #define check_irq_off() do { } while(0)
2123*4882a593Smuzhiyun #define check_irq_on() do { } while(0)
2124*4882a593Smuzhiyun #define check_mutex_acquired() do { } while(0)
2125*4882a593Smuzhiyun #define check_spinlock_acquired(x) do { } while(0)
2126*4882a593Smuzhiyun #define check_spinlock_acquired_node(x, y) do { } while(0)
2127*4882a593Smuzhiyun #endif
2128*4882a593Smuzhiyun
drain_array_locked(struct kmem_cache * cachep,struct array_cache * ac,int node,bool free_all,struct list_head * list)2129*4882a593Smuzhiyun static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
2130*4882a593Smuzhiyun int node, bool free_all, struct list_head *list)
2131*4882a593Smuzhiyun {
2132*4882a593Smuzhiyun int tofree;
2133*4882a593Smuzhiyun
2134*4882a593Smuzhiyun if (!ac || !ac->avail)
2135*4882a593Smuzhiyun return;
2136*4882a593Smuzhiyun
2137*4882a593Smuzhiyun tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
2138*4882a593Smuzhiyun if (tofree > ac->avail)
2139*4882a593Smuzhiyun tofree = (ac->avail + 1) / 2;
2140*4882a593Smuzhiyun
2141*4882a593Smuzhiyun free_block(cachep, ac->entry, tofree, node, list);
2142*4882a593Smuzhiyun ac->avail -= tofree;
2143*4882a593Smuzhiyun memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
2144*4882a593Smuzhiyun }
2145*4882a593Smuzhiyun
do_drain(void * arg)2146*4882a593Smuzhiyun static void do_drain(void *arg)
2147*4882a593Smuzhiyun {
2148*4882a593Smuzhiyun struct kmem_cache *cachep = arg;
2149*4882a593Smuzhiyun struct array_cache *ac;
2150*4882a593Smuzhiyun int node = numa_mem_id();
2151*4882a593Smuzhiyun struct kmem_cache_node *n;
2152*4882a593Smuzhiyun LIST_HEAD(list);
2153*4882a593Smuzhiyun
2154*4882a593Smuzhiyun check_irq_off();
2155*4882a593Smuzhiyun ac = cpu_cache_get(cachep);
2156*4882a593Smuzhiyun n = get_node(cachep, node);
2157*4882a593Smuzhiyun spin_lock(&n->list_lock);
2158*4882a593Smuzhiyun free_block(cachep, ac->entry, ac->avail, node, &list);
2159*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2160*4882a593Smuzhiyun ac->avail = 0;
2161*4882a593Smuzhiyun slabs_destroy(cachep, &list);
2162*4882a593Smuzhiyun }
2163*4882a593Smuzhiyun
drain_cpu_caches(struct kmem_cache * cachep)2164*4882a593Smuzhiyun static void drain_cpu_caches(struct kmem_cache *cachep)
2165*4882a593Smuzhiyun {
2166*4882a593Smuzhiyun struct kmem_cache_node *n;
2167*4882a593Smuzhiyun int node;
2168*4882a593Smuzhiyun LIST_HEAD(list);
2169*4882a593Smuzhiyun
2170*4882a593Smuzhiyun on_each_cpu(do_drain, cachep, 1);
2171*4882a593Smuzhiyun check_irq_on();
2172*4882a593Smuzhiyun for_each_kmem_cache_node(cachep, node, n)
2173*4882a593Smuzhiyun if (n->alien)
2174*4882a593Smuzhiyun drain_alien_cache(cachep, n->alien);
2175*4882a593Smuzhiyun
2176*4882a593Smuzhiyun for_each_kmem_cache_node(cachep, node, n) {
2177*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
2178*4882a593Smuzhiyun drain_array_locked(cachep, n->shared, node, true, &list);
2179*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
2180*4882a593Smuzhiyun
2181*4882a593Smuzhiyun slabs_destroy(cachep, &list);
2182*4882a593Smuzhiyun }
2183*4882a593Smuzhiyun }
2184*4882a593Smuzhiyun
2185*4882a593Smuzhiyun /*
2186*4882a593Smuzhiyun * Remove slabs from the list of free slabs.
2187*4882a593Smuzhiyun * Specify the number of slabs to drain in tofree.
2188*4882a593Smuzhiyun *
2189*4882a593Smuzhiyun * Returns the actual number of slabs released.
2190*4882a593Smuzhiyun */
drain_freelist(struct kmem_cache * cache,struct kmem_cache_node * n,int tofree)2191*4882a593Smuzhiyun static int drain_freelist(struct kmem_cache *cache,
2192*4882a593Smuzhiyun struct kmem_cache_node *n, int tofree)
2193*4882a593Smuzhiyun {
2194*4882a593Smuzhiyun struct list_head *p;
2195*4882a593Smuzhiyun int nr_freed;
2196*4882a593Smuzhiyun struct page *page;
2197*4882a593Smuzhiyun
2198*4882a593Smuzhiyun nr_freed = 0;
2199*4882a593Smuzhiyun while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
2200*4882a593Smuzhiyun
2201*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
2202*4882a593Smuzhiyun p = n->slabs_free.prev;
2203*4882a593Smuzhiyun if (p == &n->slabs_free) {
2204*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
2205*4882a593Smuzhiyun goto out;
2206*4882a593Smuzhiyun }
2207*4882a593Smuzhiyun
2208*4882a593Smuzhiyun page = list_entry(p, struct page, slab_list);
2209*4882a593Smuzhiyun list_del(&page->slab_list);
2210*4882a593Smuzhiyun n->free_slabs--;
2211*4882a593Smuzhiyun n->total_slabs--;
2212*4882a593Smuzhiyun /*
2213*4882a593Smuzhiyun * Safe to drop the lock. The slab is no longer linked
2214*4882a593Smuzhiyun * to the cache.
2215*4882a593Smuzhiyun */
2216*4882a593Smuzhiyun n->free_objects -= cache->num;
2217*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
2218*4882a593Smuzhiyun slab_destroy(cache, page);
2219*4882a593Smuzhiyun nr_freed++;
2220*4882a593Smuzhiyun }
2221*4882a593Smuzhiyun out:
2222*4882a593Smuzhiyun return nr_freed;
2223*4882a593Smuzhiyun }
2224*4882a593Smuzhiyun
__kmem_cache_empty(struct kmem_cache * s)2225*4882a593Smuzhiyun bool __kmem_cache_empty(struct kmem_cache *s)
2226*4882a593Smuzhiyun {
2227*4882a593Smuzhiyun int node;
2228*4882a593Smuzhiyun struct kmem_cache_node *n;
2229*4882a593Smuzhiyun
2230*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n)
2231*4882a593Smuzhiyun if (!list_empty(&n->slabs_full) ||
2232*4882a593Smuzhiyun !list_empty(&n->slabs_partial))
2233*4882a593Smuzhiyun return false;
2234*4882a593Smuzhiyun return true;
2235*4882a593Smuzhiyun }
2236*4882a593Smuzhiyun
__kmem_cache_shrink(struct kmem_cache * cachep)2237*4882a593Smuzhiyun int __kmem_cache_shrink(struct kmem_cache *cachep)
2238*4882a593Smuzhiyun {
2239*4882a593Smuzhiyun int ret = 0;
2240*4882a593Smuzhiyun int node;
2241*4882a593Smuzhiyun struct kmem_cache_node *n;
2242*4882a593Smuzhiyun
2243*4882a593Smuzhiyun drain_cpu_caches(cachep);
2244*4882a593Smuzhiyun
2245*4882a593Smuzhiyun check_irq_on();
2246*4882a593Smuzhiyun for_each_kmem_cache_node(cachep, node, n) {
2247*4882a593Smuzhiyun drain_freelist(cachep, n, INT_MAX);
2248*4882a593Smuzhiyun
2249*4882a593Smuzhiyun ret += !list_empty(&n->slabs_full) ||
2250*4882a593Smuzhiyun !list_empty(&n->slabs_partial);
2251*4882a593Smuzhiyun }
2252*4882a593Smuzhiyun return (ret ? 1 : 0);
2253*4882a593Smuzhiyun }
2254*4882a593Smuzhiyun
__kmem_cache_shutdown(struct kmem_cache * cachep)2255*4882a593Smuzhiyun int __kmem_cache_shutdown(struct kmem_cache *cachep)
2256*4882a593Smuzhiyun {
2257*4882a593Smuzhiyun return __kmem_cache_shrink(cachep);
2258*4882a593Smuzhiyun }
2259*4882a593Smuzhiyun
__kmem_cache_release(struct kmem_cache * cachep)2260*4882a593Smuzhiyun void __kmem_cache_release(struct kmem_cache *cachep)
2261*4882a593Smuzhiyun {
2262*4882a593Smuzhiyun int i;
2263*4882a593Smuzhiyun struct kmem_cache_node *n;
2264*4882a593Smuzhiyun
2265*4882a593Smuzhiyun cache_random_seq_destroy(cachep);
2266*4882a593Smuzhiyun
2267*4882a593Smuzhiyun free_percpu(cachep->cpu_cache);
2268*4882a593Smuzhiyun
2269*4882a593Smuzhiyun /* NUMA: free the node structures */
2270*4882a593Smuzhiyun for_each_kmem_cache_node(cachep, i, n) {
2271*4882a593Smuzhiyun kfree(n->shared);
2272*4882a593Smuzhiyun free_alien_cache(n->alien);
2273*4882a593Smuzhiyun kfree(n);
2274*4882a593Smuzhiyun cachep->node[i] = NULL;
2275*4882a593Smuzhiyun }
2276*4882a593Smuzhiyun }
2277*4882a593Smuzhiyun
2278*4882a593Smuzhiyun /*
2279*4882a593Smuzhiyun * Get the memory for a slab management obj.
2280*4882a593Smuzhiyun *
2281*4882a593Smuzhiyun * For a slab cache when the slab descriptor is off-slab, the
2282*4882a593Smuzhiyun * slab descriptor can't come from the same cache which is being created,
2283*4882a593Smuzhiyun * Because if it is the case, that means we defer the creation of
2284*4882a593Smuzhiyun * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
2285*4882a593Smuzhiyun * And we eventually call down to __kmem_cache_create(), which
2286*4882a593Smuzhiyun * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
2287*4882a593Smuzhiyun * This is a "chicken-and-egg" problem.
2288*4882a593Smuzhiyun *
2289*4882a593Smuzhiyun * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
2290*4882a593Smuzhiyun * which are all initialized during kmem_cache_init().
2291*4882a593Smuzhiyun */
alloc_slabmgmt(struct kmem_cache * cachep,struct page * page,int colour_off,gfp_t local_flags,int nodeid)2292*4882a593Smuzhiyun static void *alloc_slabmgmt(struct kmem_cache *cachep,
2293*4882a593Smuzhiyun struct page *page, int colour_off,
2294*4882a593Smuzhiyun gfp_t local_flags, int nodeid)
2295*4882a593Smuzhiyun {
2296*4882a593Smuzhiyun void *freelist;
2297*4882a593Smuzhiyun void *addr = page_address(page);
2298*4882a593Smuzhiyun
2299*4882a593Smuzhiyun page->s_mem = addr + colour_off;
2300*4882a593Smuzhiyun page->active = 0;
2301*4882a593Smuzhiyun
2302*4882a593Smuzhiyun if (OBJFREELIST_SLAB(cachep))
2303*4882a593Smuzhiyun freelist = NULL;
2304*4882a593Smuzhiyun else if (OFF_SLAB(cachep)) {
2305*4882a593Smuzhiyun /* Slab management obj is off-slab. */
2306*4882a593Smuzhiyun freelist = kmem_cache_alloc_node(cachep->freelist_cache,
2307*4882a593Smuzhiyun local_flags, nodeid);
2308*4882a593Smuzhiyun } else {
2309*4882a593Smuzhiyun /* We will use last bytes at the slab for freelist */
2310*4882a593Smuzhiyun freelist = addr + (PAGE_SIZE << cachep->gfporder) -
2311*4882a593Smuzhiyun cachep->freelist_size;
2312*4882a593Smuzhiyun }
2313*4882a593Smuzhiyun
2314*4882a593Smuzhiyun return freelist;
2315*4882a593Smuzhiyun }
2316*4882a593Smuzhiyun
get_free_obj(struct page * page,unsigned int idx)2317*4882a593Smuzhiyun static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
2318*4882a593Smuzhiyun {
2319*4882a593Smuzhiyun return ((freelist_idx_t *)page->freelist)[idx];
2320*4882a593Smuzhiyun }
2321*4882a593Smuzhiyun
set_free_obj(struct page * page,unsigned int idx,freelist_idx_t val)2322*4882a593Smuzhiyun static inline void set_free_obj(struct page *page,
2323*4882a593Smuzhiyun unsigned int idx, freelist_idx_t val)
2324*4882a593Smuzhiyun {
2325*4882a593Smuzhiyun ((freelist_idx_t *)(page->freelist))[idx] = val;
2326*4882a593Smuzhiyun }
2327*4882a593Smuzhiyun
cache_init_objs_debug(struct kmem_cache * cachep,struct page * page)2328*4882a593Smuzhiyun static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
2329*4882a593Smuzhiyun {
2330*4882a593Smuzhiyun #if DEBUG
2331*4882a593Smuzhiyun int i;
2332*4882a593Smuzhiyun
2333*4882a593Smuzhiyun for (i = 0; i < cachep->num; i++) {
2334*4882a593Smuzhiyun void *objp = index_to_obj(cachep, page, i);
2335*4882a593Smuzhiyun
2336*4882a593Smuzhiyun if (cachep->flags & SLAB_STORE_USER)
2337*4882a593Smuzhiyun *dbg_userword(cachep, objp) = NULL;
2338*4882a593Smuzhiyun
2339*4882a593Smuzhiyun if (cachep->flags & SLAB_RED_ZONE) {
2340*4882a593Smuzhiyun *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2341*4882a593Smuzhiyun *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2342*4882a593Smuzhiyun }
2343*4882a593Smuzhiyun /*
2344*4882a593Smuzhiyun * Constructors are not allowed to allocate memory from the same
2345*4882a593Smuzhiyun * cache which they are a constructor for. Otherwise, deadlock.
2346*4882a593Smuzhiyun * They must also be threaded.
2347*4882a593Smuzhiyun */
2348*4882a593Smuzhiyun if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {
2349*4882a593Smuzhiyun kasan_unpoison_object_data(cachep,
2350*4882a593Smuzhiyun objp + obj_offset(cachep));
2351*4882a593Smuzhiyun cachep->ctor(objp + obj_offset(cachep));
2352*4882a593Smuzhiyun kasan_poison_object_data(
2353*4882a593Smuzhiyun cachep, objp + obj_offset(cachep));
2354*4882a593Smuzhiyun }
2355*4882a593Smuzhiyun
2356*4882a593Smuzhiyun if (cachep->flags & SLAB_RED_ZONE) {
2357*4882a593Smuzhiyun if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2358*4882a593Smuzhiyun slab_error(cachep, "constructor overwrote the end of an object");
2359*4882a593Smuzhiyun if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2360*4882a593Smuzhiyun slab_error(cachep, "constructor overwrote the start of an object");
2361*4882a593Smuzhiyun }
2362*4882a593Smuzhiyun /* need to poison the objs? */
2363*4882a593Smuzhiyun if (cachep->flags & SLAB_POISON) {
2364*4882a593Smuzhiyun poison_obj(cachep, objp, POISON_FREE);
2365*4882a593Smuzhiyun slab_kernel_map(cachep, objp, 0);
2366*4882a593Smuzhiyun }
2367*4882a593Smuzhiyun }
2368*4882a593Smuzhiyun #endif
2369*4882a593Smuzhiyun }
2370*4882a593Smuzhiyun
2371*4882a593Smuzhiyun #ifdef CONFIG_SLAB_FREELIST_RANDOM
2372*4882a593Smuzhiyun /* Hold information during a freelist initialization */
2373*4882a593Smuzhiyun union freelist_init_state {
2374*4882a593Smuzhiyun struct {
2375*4882a593Smuzhiyun unsigned int pos;
2376*4882a593Smuzhiyun unsigned int *list;
2377*4882a593Smuzhiyun unsigned int count;
2378*4882a593Smuzhiyun };
2379*4882a593Smuzhiyun struct rnd_state rnd_state;
2380*4882a593Smuzhiyun };
2381*4882a593Smuzhiyun
2382*4882a593Smuzhiyun /*
2383*4882a593Smuzhiyun * Initialize the state based on the randomization methode available.
2384*4882a593Smuzhiyun * return true if the pre-computed list is available, false otherwize.
2385*4882a593Smuzhiyun */
freelist_state_initialize(union freelist_init_state * state,struct kmem_cache * cachep,unsigned int count)2386*4882a593Smuzhiyun static bool freelist_state_initialize(union freelist_init_state *state,
2387*4882a593Smuzhiyun struct kmem_cache *cachep,
2388*4882a593Smuzhiyun unsigned int count)
2389*4882a593Smuzhiyun {
2390*4882a593Smuzhiyun bool ret;
2391*4882a593Smuzhiyun unsigned int rand;
2392*4882a593Smuzhiyun
2393*4882a593Smuzhiyun /* Use best entropy available to define a random shift */
2394*4882a593Smuzhiyun rand = get_random_int();
2395*4882a593Smuzhiyun
2396*4882a593Smuzhiyun /* Use a random state if the pre-computed list is not available */
2397*4882a593Smuzhiyun if (!cachep->random_seq) {
2398*4882a593Smuzhiyun prandom_seed_state(&state->rnd_state, rand);
2399*4882a593Smuzhiyun ret = false;
2400*4882a593Smuzhiyun } else {
2401*4882a593Smuzhiyun state->list = cachep->random_seq;
2402*4882a593Smuzhiyun state->count = count;
2403*4882a593Smuzhiyun state->pos = rand % count;
2404*4882a593Smuzhiyun ret = true;
2405*4882a593Smuzhiyun }
2406*4882a593Smuzhiyun return ret;
2407*4882a593Smuzhiyun }
2408*4882a593Smuzhiyun
2409*4882a593Smuzhiyun /* Get the next entry on the list and randomize it using a random shift */
next_random_slot(union freelist_init_state * state)2410*4882a593Smuzhiyun static freelist_idx_t next_random_slot(union freelist_init_state *state)
2411*4882a593Smuzhiyun {
2412*4882a593Smuzhiyun if (state->pos >= state->count)
2413*4882a593Smuzhiyun state->pos = 0;
2414*4882a593Smuzhiyun return state->list[state->pos++];
2415*4882a593Smuzhiyun }
2416*4882a593Smuzhiyun
2417*4882a593Smuzhiyun /* Swap two freelist entries */
swap_free_obj(struct page * page,unsigned int a,unsigned int b)2418*4882a593Smuzhiyun static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
2419*4882a593Smuzhiyun {
2420*4882a593Smuzhiyun swap(((freelist_idx_t *)page->freelist)[a],
2421*4882a593Smuzhiyun ((freelist_idx_t *)page->freelist)[b]);
2422*4882a593Smuzhiyun }
2423*4882a593Smuzhiyun
2424*4882a593Smuzhiyun /*
2425*4882a593Smuzhiyun * Shuffle the freelist initialization state based on pre-computed lists.
2426*4882a593Smuzhiyun * return true if the list was successfully shuffled, false otherwise.
2427*4882a593Smuzhiyun */
shuffle_freelist(struct kmem_cache * cachep,struct page * page)2428*4882a593Smuzhiyun static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
2429*4882a593Smuzhiyun {
2430*4882a593Smuzhiyun unsigned int objfreelist = 0, i, rand, count = cachep->num;
2431*4882a593Smuzhiyun union freelist_init_state state;
2432*4882a593Smuzhiyun bool precomputed;
2433*4882a593Smuzhiyun
2434*4882a593Smuzhiyun if (count < 2)
2435*4882a593Smuzhiyun return false;
2436*4882a593Smuzhiyun
2437*4882a593Smuzhiyun precomputed = freelist_state_initialize(&state, cachep, count);
2438*4882a593Smuzhiyun
2439*4882a593Smuzhiyun /* Take a random entry as the objfreelist */
2440*4882a593Smuzhiyun if (OBJFREELIST_SLAB(cachep)) {
2441*4882a593Smuzhiyun if (!precomputed)
2442*4882a593Smuzhiyun objfreelist = count - 1;
2443*4882a593Smuzhiyun else
2444*4882a593Smuzhiyun objfreelist = next_random_slot(&state);
2445*4882a593Smuzhiyun page->freelist = index_to_obj(cachep, page, objfreelist) +
2446*4882a593Smuzhiyun obj_offset(cachep);
2447*4882a593Smuzhiyun count--;
2448*4882a593Smuzhiyun }
2449*4882a593Smuzhiyun
2450*4882a593Smuzhiyun /*
2451*4882a593Smuzhiyun * On early boot, generate the list dynamically.
2452*4882a593Smuzhiyun * Later use a pre-computed list for speed.
2453*4882a593Smuzhiyun */
2454*4882a593Smuzhiyun if (!precomputed) {
2455*4882a593Smuzhiyun for (i = 0; i < count; i++)
2456*4882a593Smuzhiyun set_free_obj(page, i, i);
2457*4882a593Smuzhiyun
2458*4882a593Smuzhiyun /* Fisher-Yates shuffle */
2459*4882a593Smuzhiyun for (i = count - 1; i > 0; i--) {
2460*4882a593Smuzhiyun rand = prandom_u32_state(&state.rnd_state);
2461*4882a593Smuzhiyun rand %= (i + 1);
2462*4882a593Smuzhiyun swap_free_obj(page, i, rand);
2463*4882a593Smuzhiyun }
2464*4882a593Smuzhiyun } else {
2465*4882a593Smuzhiyun for (i = 0; i < count; i++)
2466*4882a593Smuzhiyun set_free_obj(page, i, next_random_slot(&state));
2467*4882a593Smuzhiyun }
2468*4882a593Smuzhiyun
2469*4882a593Smuzhiyun if (OBJFREELIST_SLAB(cachep))
2470*4882a593Smuzhiyun set_free_obj(page, cachep->num - 1, objfreelist);
2471*4882a593Smuzhiyun
2472*4882a593Smuzhiyun return true;
2473*4882a593Smuzhiyun }
2474*4882a593Smuzhiyun #else
shuffle_freelist(struct kmem_cache * cachep,struct page * page)2475*4882a593Smuzhiyun static inline bool shuffle_freelist(struct kmem_cache *cachep,
2476*4882a593Smuzhiyun struct page *page)
2477*4882a593Smuzhiyun {
2478*4882a593Smuzhiyun return false;
2479*4882a593Smuzhiyun }
2480*4882a593Smuzhiyun #endif /* CONFIG_SLAB_FREELIST_RANDOM */
2481*4882a593Smuzhiyun
cache_init_objs(struct kmem_cache * cachep,struct page * page)2482*4882a593Smuzhiyun static void cache_init_objs(struct kmem_cache *cachep,
2483*4882a593Smuzhiyun struct page *page)
2484*4882a593Smuzhiyun {
2485*4882a593Smuzhiyun int i;
2486*4882a593Smuzhiyun void *objp;
2487*4882a593Smuzhiyun bool shuffled;
2488*4882a593Smuzhiyun
2489*4882a593Smuzhiyun cache_init_objs_debug(cachep, page);
2490*4882a593Smuzhiyun
2491*4882a593Smuzhiyun /* Try to randomize the freelist if enabled */
2492*4882a593Smuzhiyun shuffled = shuffle_freelist(cachep, page);
2493*4882a593Smuzhiyun
2494*4882a593Smuzhiyun if (!shuffled && OBJFREELIST_SLAB(cachep)) {
2495*4882a593Smuzhiyun page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
2496*4882a593Smuzhiyun obj_offset(cachep);
2497*4882a593Smuzhiyun }
2498*4882a593Smuzhiyun
2499*4882a593Smuzhiyun for (i = 0; i < cachep->num; i++) {
2500*4882a593Smuzhiyun objp = index_to_obj(cachep, page, i);
2501*4882a593Smuzhiyun objp = kasan_init_slab_obj(cachep, objp);
2502*4882a593Smuzhiyun
2503*4882a593Smuzhiyun /* constructor could break poison info */
2504*4882a593Smuzhiyun if (DEBUG == 0 && cachep->ctor) {
2505*4882a593Smuzhiyun kasan_unpoison_object_data(cachep, objp);
2506*4882a593Smuzhiyun cachep->ctor(objp);
2507*4882a593Smuzhiyun kasan_poison_object_data(cachep, objp);
2508*4882a593Smuzhiyun }
2509*4882a593Smuzhiyun
2510*4882a593Smuzhiyun if (!shuffled)
2511*4882a593Smuzhiyun set_free_obj(page, i, i);
2512*4882a593Smuzhiyun }
2513*4882a593Smuzhiyun }
2514*4882a593Smuzhiyun
slab_get_obj(struct kmem_cache * cachep,struct page * page)2515*4882a593Smuzhiyun static void *slab_get_obj(struct kmem_cache *cachep, struct page *page)
2516*4882a593Smuzhiyun {
2517*4882a593Smuzhiyun void *objp;
2518*4882a593Smuzhiyun
2519*4882a593Smuzhiyun objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
2520*4882a593Smuzhiyun page->active++;
2521*4882a593Smuzhiyun
2522*4882a593Smuzhiyun return objp;
2523*4882a593Smuzhiyun }
2524*4882a593Smuzhiyun
slab_put_obj(struct kmem_cache * cachep,struct page * page,void * objp)2525*4882a593Smuzhiyun static void slab_put_obj(struct kmem_cache *cachep,
2526*4882a593Smuzhiyun struct page *page, void *objp)
2527*4882a593Smuzhiyun {
2528*4882a593Smuzhiyun unsigned int objnr = obj_to_index(cachep, page, objp);
2529*4882a593Smuzhiyun #if DEBUG
2530*4882a593Smuzhiyun unsigned int i;
2531*4882a593Smuzhiyun
2532*4882a593Smuzhiyun /* Verify double free bug */
2533*4882a593Smuzhiyun for (i = page->active; i < cachep->num; i++) {
2534*4882a593Smuzhiyun if (get_free_obj(page, i) == objnr) {
2535*4882a593Smuzhiyun pr_err("slab: double free detected in cache '%s', objp %px\n",
2536*4882a593Smuzhiyun cachep->name, objp);
2537*4882a593Smuzhiyun BUG();
2538*4882a593Smuzhiyun }
2539*4882a593Smuzhiyun }
2540*4882a593Smuzhiyun #endif
2541*4882a593Smuzhiyun page->active--;
2542*4882a593Smuzhiyun if (!page->freelist)
2543*4882a593Smuzhiyun page->freelist = objp + obj_offset(cachep);
2544*4882a593Smuzhiyun
2545*4882a593Smuzhiyun set_free_obj(page, page->active, objnr);
2546*4882a593Smuzhiyun }
2547*4882a593Smuzhiyun
2548*4882a593Smuzhiyun /*
2549*4882a593Smuzhiyun * Map pages beginning at addr to the given cache and slab. This is required
2550*4882a593Smuzhiyun * for the slab allocator to be able to lookup the cache and slab of a
2551*4882a593Smuzhiyun * virtual address for kfree, ksize, and slab debugging.
2552*4882a593Smuzhiyun */
slab_map_pages(struct kmem_cache * cache,struct page * page,void * freelist)2553*4882a593Smuzhiyun static void slab_map_pages(struct kmem_cache *cache, struct page *page,
2554*4882a593Smuzhiyun void *freelist)
2555*4882a593Smuzhiyun {
2556*4882a593Smuzhiyun page->slab_cache = cache;
2557*4882a593Smuzhiyun page->freelist = freelist;
2558*4882a593Smuzhiyun }
2559*4882a593Smuzhiyun
2560*4882a593Smuzhiyun /*
2561*4882a593Smuzhiyun * Grow (by 1) the number of slabs within a cache. This is called by
2562*4882a593Smuzhiyun * kmem_cache_alloc() when there are no active objs left in a cache.
2563*4882a593Smuzhiyun */
cache_grow_begin(struct kmem_cache * cachep,gfp_t flags,int nodeid)2564*4882a593Smuzhiyun static struct page *cache_grow_begin(struct kmem_cache *cachep,
2565*4882a593Smuzhiyun gfp_t flags, int nodeid)
2566*4882a593Smuzhiyun {
2567*4882a593Smuzhiyun void *freelist;
2568*4882a593Smuzhiyun size_t offset;
2569*4882a593Smuzhiyun gfp_t local_flags;
2570*4882a593Smuzhiyun int page_node;
2571*4882a593Smuzhiyun struct kmem_cache_node *n;
2572*4882a593Smuzhiyun struct page *page;
2573*4882a593Smuzhiyun
2574*4882a593Smuzhiyun /*
2575*4882a593Smuzhiyun * Be lazy and only check for valid flags here, keeping it out of the
2576*4882a593Smuzhiyun * critical path in kmem_cache_alloc().
2577*4882a593Smuzhiyun */
2578*4882a593Smuzhiyun if (unlikely(flags & GFP_SLAB_BUG_MASK))
2579*4882a593Smuzhiyun flags = kmalloc_fix_flags(flags);
2580*4882a593Smuzhiyun
2581*4882a593Smuzhiyun WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
2582*4882a593Smuzhiyun local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2583*4882a593Smuzhiyun
2584*4882a593Smuzhiyun check_irq_off();
2585*4882a593Smuzhiyun if (gfpflags_allow_blocking(local_flags))
2586*4882a593Smuzhiyun local_irq_enable();
2587*4882a593Smuzhiyun
2588*4882a593Smuzhiyun /*
2589*4882a593Smuzhiyun * Get mem for the objs. Attempt to allocate a physical page from
2590*4882a593Smuzhiyun * 'nodeid'.
2591*4882a593Smuzhiyun */
2592*4882a593Smuzhiyun page = kmem_getpages(cachep, local_flags, nodeid);
2593*4882a593Smuzhiyun if (!page)
2594*4882a593Smuzhiyun goto failed;
2595*4882a593Smuzhiyun
2596*4882a593Smuzhiyun page_node = page_to_nid(page);
2597*4882a593Smuzhiyun n = get_node(cachep, page_node);
2598*4882a593Smuzhiyun
2599*4882a593Smuzhiyun /* Get colour for the slab, and cal the next value. */
2600*4882a593Smuzhiyun n->colour_next++;
2601*4882a593Smuzhiyun if (n->colour_next >= cachep->colour)
2602*4882a593Smuzhiyun n->colour_next = 0;
2603*4882a593Smuzhiyun
2604*4882a593Smuzhiyun offset = n->colour_next;
2605*4882a593Smuzhiyun if (offset >= cachep->colour)
2606*4882a593Smuzhiyun offset = 0;
2607*4882a593Smuzhiyun
2608*4882a593Smuzhiyun offset *= cachep->colour_off;
2609*4882a593Smuzhiyun
2610*4882a593Smuzhiyun /*
2611*4882a593Smuzhiyun * Call kasan_poison_slab() before calling alloc_slabmgmt(), so
2612*4882a593Smuzhiyun * page_address() in the latter returns a non-tagged pointer,
2613*4882a593Smuzhiyun * as it should be for slab pages.
2614*4882a593Smuzhiyun */
2615*4882a593Smuzhiyun kasan_poison_slab(page);
2616*4882a593Smuzhiyun
2617*4882a593Smuzhiyun /* Get slab management. */
2618*4882a593Smuzhiyun freelist = alloc_slabmgmt(cachep, page, offset,
2619*4882a593Smuzhiyun local_flags & ~GFP_CONSTRAINT_MASK, page_node);
2620*4882a593Smuzhiyun if (OFF_SLAB(cachep) && !freelist)
2621*4882a593Smuzhiyun goto opps1;
2622*4882a593Smuzhiyun
2623*4882a593Smuzhiyun slab_map_pages(cachep, page, freelist);
2624*4882a593Smuzhiyun
2625*4882a593Smuzhiyun cache_init_objs(cachep, page);
2626*4882a593Smuzhiyun
2627*4882a593Smuzhiyun if (gfpflags_allow_blocking(local_flags))
2628*4882a593Smuzhiyun local_irq_disable();
2629*4882a593Smuzhiyun
2630*4882a593Smuzhiyun return page;
2631*4882a593Smuzhiyun
2632*4882a593Smuzhiyun opps1:
2633*4882a593Smuzhiyun kmem_freepages(cachep, page);
2634*4882a593Smuzhiyun failed:
2635*4882a593Smuzhiyun if (gfpflags_allow_blocking(local_flags))
2636*4882a593Smuzhiyun local_irq_disable();
2637*4882a593Smuzhiyun return NULL;
2638*4882a593Smuzhiyun }
2639*4882a593Smuzhiyun
cache_grow_end(struct kmem_cache * cachep,struct page * page)2640*4882a593Smuzhiyun static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
2641*4882a593Smuzhiyun {
2642*4882a593Smuzhiyun struct kmem_cache_node *n;
2643*4882a593Smuzhiyun void *list = NULL;
2644*4882a593Smuzhiyun
2645*4882a593Smuzhiyun check_irq_off();
2646*4882a593Smuzhiyun
2647*4882a593Smuzhiyun if (!page)
2648*4882a593Smuzhiyun return;
2649*4882a593Smuzhiyun
2650*4882a593Smuzhiyun INIT_LIST_HEAD(&page->slab_list);
2651*4882a593Smuzhiyun n = get_node(cachep, page_to_nid(page));
2652*4882a593Smuzhiyun
2653*4882a593Smuzhiyun spin_lock(&n->list_lock);
2654*4882a593Smuzhiyun n->total_slabs++;
2655*4882a593Smuzhiyun if (!page->active) {
2656*4882a593Smuzhiyun list_add_tail(&page->slab_list, &n->slabs_free);
2657*4882a593Smuzhiyun n->free_slabs++;
2658*4882a593Smuzhiyun } else
2659*4882a593Smuzhiyun fixup_slab_list(cachep, n, page, &list);
2660*4882a593Smuzhiyun
2661*4882a593Smuzhiyun STATS_INC_GROWN(cachep);
2662*4882a593Smuzhiyun n->free_objects += cachep->num - page->active;
2663*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2664*4882a593Smuzhiyun
2665*4882a593Smuzhiyun fixup_objfreelist_debug(cachep, &list);
2666*4882a593Smuzhiyun }
2667*4882a593Smuzhiyun
2668*4882a593Smuzhiyun #if DEBUG
2669*4882a593Smuzhiyun
2670*4882a593Smuzhiyun /*
2671*4882a593Smuzhiyun * Perform extra freeing checks:
2672*4882a593Smuzhiyun * - detect bad pointers.
2673*4882a593Smuzhiyun * - POISON/RED_ZONE checking
2674*4882a593Smuzhiyun */
kfree_debugcheck(const void * objp)2675*4882a593Smuzhiyun static void kfree_debugcheck(const void *objp)
2676*4882a593Smuzhiyun {
2677*4882a593Smuzhiyun if (!virt_addr_valid(objp)) {
2678*4882a593Smuzhiyun pr_err("kfree_debugcheck: out of range ptr %lxh\n",
2679*4882a593Smuzhiyun (unsigned long)objp);
2680*4882a593Smuzhiyun BUG();
2681*4882a593Smuzhiyun }
2682*4882a593Smuzhiyun }
2683*4882a593Smuzhiyun
verify_redzone_free(struct kmem_cache * cache,void * obj)2684*4882a593Smuzhiyun static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2685*4882a593Smuzhiyun {
2686*4882a593Smuzhiyun unsigned long long redzone1, redzone2;
2687*4882a593Smuzhiyun
2688*4882a593Smuzhiyun redzone1 = *dbg_redzone1(cache, obj);
2689*4882a593Smuzhiyun redzone2 = *dbg_redzone2(cache, obj);
2690*4882a593Smuzhiyun
2691*4882a593Smuzhiyun /*
2692*4882a593Smuzhiyun * Redzone is ok.
2693*4882a593Smuzhiyun */
2694*4882a593Smuzhiyun if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2695*4882a593Smuzhiyun return;
2696*4882a593Smuzhiyun
2697*4882a593Smuzhiyun if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2698*4882a593Smuzhiyun slab_error(cache, "double free detected");
2699*4882a593Smuzhiyun else
2700*4882a593Smuzhiyun slab_error(cache, "memory outside object was overwritten");
2701*4882a593Smuzhiyun
2702*4882a593Smuzhiyun pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
2703*4882a593Smuzhiyun obj, redzone1, redzone2);
2704*4882a593Smuzhiyun }
2705*4882a593Smuzhiyun
cache_free_debugcheck(struct kmem_cache * cachep,void * objp,unsigned long caller)2706*4882a593Smuzhiyun static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2707*4882a593Smuzhiyun unsigned long caller)
2708*4882a593Smuzhiyun {
2709*4882a593Smuzhiyun unsigned int objnr;
2710*4882a593Smuzhiyun struct page *page;
2711*4882a593Smuzhiyun
2712*4882a593Smuzhiyun BUG_ON(virt_to_cache(objp) != cachep);
2713*4882a593Smuzhiyun
2714*4882a593Smuzhiyun objp -= obj_offset(cachep);
2715*4882a593Smuzhiyun kfree_debugcheck(objp);
2716*4882a593Smuzhiyun page = virt_to_head_page(objp);
2717*4882a593Smuzhiyun
2718*4882a593Smuzhiyun if (cachep->flags & SLAB_RED_ZONE) {
2719*4882a593Smuzhiyun verify_redzone_free(cachep, objp);
2720*4882a593Smuzhiyun *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2721*4882a593Smuzhiyun *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2722*4882a593Smuzhiyun }
2723*4882a593Smuzhiyun if (cachep->flags & SLAB_STORE_USER)
2724*4882a593Smuzhiyun *dbg_userword(cachep, objp) = (void *)caller;
2725*4882a593Smuzhiyun
2726*4882a593Smuzhiyun objnr = obj_to_index(cachep, page, objp);
2727*4882a593Smuzhiyun
2728*4882a593Smuzhiyun BUG_ON(objnr >= cachep->num);
2729*4882a593Smuzhiyun BUG_ON(objp != index_to_obj(cachep, page, objnr));
2730*4882a593Smuzhiyun
2731*4882a593Smuzhiyun if (cachep->flags & SLAB_POISON) {
2732*4882a593Smuzhiyun poison_obj(cachep, objp, POISON_FREE);
2733*4882a593Smuzhiyun slab_kernel_map(cachep, objp, 0);
2734*4882a593Smuzhiyun }
2735*4882a593Smuzhiyun return objp;
2736*4882a593Smuzhiyun }
2737*4882a593Smuzhiyun
2738*4882a593Smuzhiyun #else
2739*4882a593Smuzhiyun #define kfree_debugcheck(x) do { } while(0)
2740*4882a593Smuzhiyun #define cache_free_debugcheck(x,objp,z) (objp)
2741*4882a593Smuzhiyun #endif
2742*4882a593Smuzhiyun
fixup_objfreelist_debug(struct kmem_cache * cachep,void ** list)2743*4882a593Smuzhiyun static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
2744*4882a593Smuzhiyun void **list)
2745*4882a593Smuzhiyun {
2746*4882a593Smuzhiyun #if DEBUG
2747*4882a593Smuzhiyun void *next = *list;
2748*4882a593Smuzhiyun void *objp;
2749*4882a593Smuzhiyun
2750*4882a593Smuzhiyun while (next) {
2751*4882a593Smuzhiyun objp = next - obj_offset(cachep);
2752*4882a593Smuzhiyun next = *(void **)next;
2753*4882a593Smuzhiyun poison_obj(cachep, objp, POISON_FREE);
2754*4882a593Smuzhiyun }
2755*4882a593Smuzhiyun #endif
2756*4882a593Smuzhiyun }
2757*4882a593Smuzhiyun
fixup_slab_list(struct kmem_cache * cachep,struct kmem_cache_node * n,struct page * page,void ** list)2758*4882a593Smuzhiyun static inline void fixup_slab_list(struct kmem_cache *cachep,
2759*4882a593Smuzhiyun struct kmem_cache_node *n, struct page *page,
2760*4882a593Smuzhiyun void **list)
2761*4882a593Smuzhiyun {
2762*4882a593Smuzhiyun /* move slabp to correct slabp list: */
2763*4882a593Smuzhiyun list_del(&page->slab_list);
2764*4882a593Smuzhiyun if (page->active == cachep->num) {
2765*4882a593Smuzhiyun list_add(&page->slab_list, &n->slabs_full);
2766*4882a593Smuzhiyun if (OBJFREELIST_SLAB(cachep)) {
2767*4882a593Smuzhiyun #if DEBUG
2768*4882a593Smuzhiyun /* Poisoning will be done without holding the lock */
2769*4882a593Smuzhiyun if (cachep->flags & SLAB_POISON) {
2770*4882a593Smuzhiyun void **objp = page->freelist;
2771*4882a593Smuzhiyun
2772*4882a593Smuzhiyun *objp = *list;
2773*4882a593Smuzhiyun *list = objp;
2774*4882a593Smuzhiyun }
2775*4882a593Smuzhiyun #endif
2776*4882a593Smuzhiyun page->freelist = NULL;
2777*4882a593Smuzhiyun }
2778*4882a593Smuzhiyun } else
2779*4882a593Smuzhiyun list_add(&page->slab_list, &n->slabs_partial);
2780*4882a593Smuzhiyun }
2781*4882a593Smuzhiyun
2782*4882a593Smuzhiyun /* Try to find non-pfmemalloc slab if needed */
get_valid_first_slab(struct kmem_cache_node * n,struct page * page,bool pfmemalloc)2783*4882a593Smuzhiyun static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
2784*4882a593Smuzhiyun struct page *page, bool pfmemalloc)
2785*4882a593Smuzhiyun {
2786*4882a593Smuzhiyun if (!page)
2787*4882a593Smuzhiyun return NULL;
2788*4882a593Smuzhiyun
2789*4882a593Smuzhiyun if (pfmemalloc)
2790*4882a593Smuzhiyun return page;
2791*4882a593Smuzhiyun
2792*4882a593Smuzhiyun if (!PageSlabPfmemalloc(page))
2793*4882a593Smuzhiyun return page;
2794*4882a593Smuzhiyun
2795*4882a593Smuzhiyun /* No need to keep pfmemalloc slab if we have enough free objects */
2796*4882a593Smuzhiyun if (n->free_objects > n->free_limit) {
2797*4882a593Smuzhiyun ClearPageSlabPfmemalloc(page);
2798*4882a593Smuzhiyun return page;
2799*4882a593Smuzhiyun }
2800*4882a593Smuzhiyun
2801*4882a593Smuzhiyun /* Move pfmemalloc slab to the end of list to speed up next search */
2802*4882a593Smuzhiyun list_del(&page->slab_list);
2803*4882a593Smuzhiyun if (!page->active) {
2804*4882a593Smuzhiyun list_add_tail(&page->slab_list, &n->slabs_free);
2805*4882a593Smuzhiyun n->free_slabs++;
2806*4882a593Smuzhiyun } else
2807*4882a593Smuzhiyun list_add_tail(&page->slab_list, &n->slabs_partial);
2808*4882a593Smuzhiyun
2809*4882a593Smuzhiyun list_for_each_entry(page, &n->slabs_partial, slab_list) {
2810*4882a593Smuzhiyun if (!PageSlabPfmemalloc(page))
2811*4882a593Smuzhiyun return page;
2812*4882a593Smuzhiyun }
2813*4882a593Smuzhiyun
2814*4882a593Smuzhiyun n->free_touched = 1;
2815*4882a593Smuzhiyun list_for_each_entry(page, &n->slabs_free, slab_list) {
2816*4882a593Smuzhiyun if (!PageSlabPfmemalloc(page)) {
2817*4882a593Smuzhiyun n->free_slabs--;
2818*4882a593Smuzhiyun return page;
2819*4882a593Smuzhiyun }
2820*4882a593Smuzhiyun }
2821*4882a593Smuzhiyun
2822*4882a593Smuzhiyun return NULL;
2823*4882a593Smuzhiyun }
2824*4882a593Smuzhiyun
get_first_slab(struct kmem_cache_node * n,bool pfmemalloc)2825*4882a593Smuzhiyun static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
2826*4882a593Smuzhiyun {
2827*4882a593Smuzhiyun struct page *page;
2828*4882a593Smuzhiyun
2829*4882a593Smuzhiyun assert_spin_locked(&n->list_lock);
2830*4882a593Smuzhiyun page = list_first_entry_or_null(&n->slabs_partial, struct page,
2831*4882a593Smuzhiyun slab_list);
2832*4882a593Smuzhiyun if (!page) {
2833*4882a593Smuzhiyun n->free_touched = 1;
2834*4882a593Smuzhiyun page = list_first_entry_or_null(&n->slabs_free, struct page,
2835*4882a593Smuzhiyun slab_list);
2836*4882a593Smuzhiyun if (page)
2837*4882a593Smuzhiyun n->free_slabs--;
2838*4882a593Smuzhiyun }
2839*4882a593Smuzhiyun
2840*4882a593Smuzhiyun if (sk_memalloc_socks())
2841*4882a593Smuzhiyun page = get_valid_first_slab(n, page, pfmemalloc);
2842*4882a593Smuzhiyun
2843*4882a593Smuzhiyun return page;
2844*4882a593Smuzhiyun }
2845*4882a593Smuzhiyun
cache_alloc_pfmemalloc(struct kmem_cache * cachep,struct kmem_cache_node * n,gfp_t flags)2846*4882a593Smuzhiyun static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
2847*4882a593Smuzhiyun struct kmem_cache_node *n, gfp_t flags)
2848*4882a593Smuzhiyun {
2849*4882a593Smuzhiyun struct page *page;
2850*4882a593Smuzhiyun void *obj;
2851*4882a593Smuzhiyun void *list = NULL;
2852*4882a593Smuzhiyun
2853*4882a593Smuzhiyun if (!gfp_pfmemalloc_allowed(flags))
2854*4882a593Smuzhiyun return NULL;
2855*4882a593Smuzhiyun
2856*4882a593Smuzhiyun spin_lock(&n->list_lock);
2857*4882a593Smuzhiyun page = get_first_slab(n, true);
2858*4882a593Smuzhiyun if (!page) {
2859*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2860*4882a593Smuzhiyun return NULL;
2861*4882a593Smuzhiyun }
2862*4882a593Smuzhiyun
2863*4882a593Smuzhiyun obj = slab_get_obj(cachep, page);
2864*4882a593Smuzhiyun n->free_objects--;
2865*4882a593Smuzhiyun
2866*4882a593Smuzhiyun fixup_slab_list(cachep, n, page, &list);
2867*4882a593Smuzhiyun
2868*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2869*4882a593Smuzhiyun fixup_objfreelist_debug(cachep, &list);
2870*4882a593Smuzhiyun
2871*4882a593Smuzhiyun return obj;
2872*4882a593Smuzhiyun }
2873*4882a593Smuzhiyun
2874*4882a593Smuzhiyun /*
2875*4882a593Smuzhiyun * Slab list should be fixed up by fixup_slab_list() for existing slab
2876*4882a593Smuzhiyun * or cache_grow_end() for new slab
2877*4882a593Smuzhiyun */
alloc_block(struct kmem_cache * cachep,struct array_cache * ac,struct page * page,int batchcount)2878*4882a593Smuzhiyun static __always_inline int alloc_block(struct kmem_cache *cachep,
2879*4882a593Smuzhiyun struct array_cache *ac, struct page *page, int batchcount)
2880*4882a593Smuzhiyun {
2881*4882a593Smuzhiyun /*
2882*4882a593Smuzhiyun * There must be at least one object available for
2883*4882a593Smuzhiyun * allocation.
2884*4882a593Smuzhiyun */
2885*4882a593Smuzhiyun BUG_ON(page->active >= cachep->num);
2886*4882a593Smuzhiyun
2887*4882a593Smuzhiyun while (page->active < cachep->num && batchcount--) {
2888*4882a593Smuzhiyun STATS_INC_ALLOCED(cachep);
2889*4882a593Smuzhiyun STATS_INC_ACTIVE(cachep);
2890*4882a593Smuzhiyun STATS_SET_HIGH(cachep);
2891*4882a593Smuzhiyun
2892*4882a593Smuzhiyun ac->entry[ac->avail++] = slab_get_obj(cachep, page);
2893*4882a593Smuzhiyun }
2894*4882a593Smuzhiyun
2895*4882a593Smuzhiyun return batchcount;
2896*4882a593Smuzhiyun }
2897*4882a593Smuzhiyun
cache_alloc_refill(struct kmem_cache * cachep,gfp_t flags)2898*4882a593Smuzhiyun static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2899*4882a593Smuzhiyun {
2900*4882a593Smuzhiyun int batchcount;
2901*4882a593Smuzhiyun struct kmem_cache_node *n;
2902*4882a593Smuzhiyun struct array_cache *ac, *shared;
2903*4882a593Smuzhiyun int node;
2904*4882a593Smuzhiyun void *list = NULL;
2905*4882a593Smuzhiyun struct page *page;
2906*4882a593Smuzhiyun
2907*4882a593Smuzhiyun check_irq_off();
2908*4882a593Smuzhiyun node = numa_mem_id();
2909*4882a593Smuzhiyun
2910*4882a593Smuzhiyun ac = cpu_cache_get(cachep);
2911*4882a593Smuzhiyun batchcount = ac->batchcount;
2912*4882a593Smuzhiyun if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2913*4882a593Smuzhiyun /*
2914*4882a593Smuzhiyun * If there was little recent activity on this cache, then
2915*4882a593Smuzhiyun * perform only a partial refill. Otherwise we could generate
2916*4882a593Smuzhiyun * refill bouncing.
2917*4882a593Smuzhiyun */
2918*4882a593Smuzhiyun batchcount = BATCHREFILL_LIMIT;
2919*4882a593Smuzhiyun }
2920*4882a593Smuzhiyun n = get_node(cachep, node);
2921*4882a593Smuzhiyun
2922*4882a593Smuzhiyun BUG_ON(ac->avail > 0 || !n);
2923*4882a593Smuzhiyun shared = READ_ONCE(n->shared);
2924*4882a593Smuzhiyun if (!n->free_objects && (!shared || !shared->avail))
2925*4882a593Smuzhiyun goto direct_grow;
2926*4882a593Smuzhiyun
2927*4882a593Smuzhiyun spin_lock(&n->list_lock);
2928*4882a593Smuzhiyun shared = READ_ONCE(n->shared);
2929*4882a593Smuzhiyun
2930*4882a593Smuzhiyun /* See if we can refill from the shared array */
2931*4882a593Smuzhiyun if (shared && transfer_objects(ac, shared, batchcount)) {
2932*4882a593Smuzhiyun shared->touched = 1;
2933*4882a593Smuzhiyun goto alloc_done;
2934*4882a593Smuzhiyun }
2935*4882a593Smuzhiyun
2936*4882a593Smuzhiyun while (batchcount > 0) {
2937*4882a593Smuzhiyun /* Get slab alloc is to come from. */
2938*4882a593Smuzhiyun page = get_first_slab(n, false);
2939*4882a593Smuzhiyun if (!page)
2940*4882a593Smuzhiyun goto must_grow;
2941*4882a593Smuzhiyun
2942*4882a593Smuzhiyun check_spinlock_acquired(cachep);
2943*4882a593Smuzhiyun
2944*4882a593Smuzhiyun batchcount = alloc_block(cachep, ac, page, batchcount);
2945*4882a593Smuzhiyun fixup_slab_list(cachep, n, page, &list);
2946*4882a593Smuzhiyun }
2947*4882a593Smuzhiyun
2948*4882a593Smuzhiyun must_grow:
2949*4882a593Smuzhiyun n->free_objects -= ac->avail;
2950*4882a593Smuzhiyun alloc_done:
2951*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2952*4882a593Smuzhiyun fixup_objfreelist_debug(cachep, &list);
2953*4882a593Smuzhiyun
2954*4882a593Smuzhiyun direct_grow:
2955*4882a593Smuzhiyun if (unlikely(!ac->avail)) {
2956*4882a593Smuzhiyun /* Check if we can use obj in pfmemalloc slab */
2957*4882a593Smuzhiyun if (sk_memalloc_socks()) {
2958*4882a593Smuzhiyun void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
2959*4882a593Smuzhiyun
2960*4882a593Smuzhiyun if (obj)
2961*4882a593Smuzhiyun return obj;
2962*4882a593Smuzhiyun }
2963*4882a593Smuzhiyun
2964*4882a593Smuzhiyun page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
2965*4882a593Smuzhiyun
2966*4882a593Smuzhiyun /*
2967*4882a593Smuzhiyun * cache_grow_begin() can reenable interrupts,
2968*4882a593Smuzhiyun * then ac could change.
2969*4882a593Smuzhiyun */
2970*4882a593Smuzhiyun ac = cpu_cache_get(cachep);
2971*4882a593Smuzhiyun if (!ac->avail && page)
2972*4882a593Smuzhiyun alloc_block(cachep, ac, page, batchcount);
2973*4882a593Smuzhiyun cache_grow_end(cachep, page);
2974*4882a593Smuzhiyun
2975*4882a593Smuzhiyun if (!ac->avail)
2976*4882a593Smuzhiyun return NULL;
2977*4882a593Smuzhiyun }
2978*4882a593Smuzhiyun ac->touched = 1;
2979*4882a593Smuzhiyun
2980*4882a593Smuzhiyun return ac->entry[--ac->avail];
2981*4882a593Smuzhiyun }
2982*4882a593Smuzhiyun
cache_alloc_debugcheck_before(struct kmem_cache * cachep,gfp_t flags)2983*4882a593Smuzhiyun static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
2984*4882a593Smuzhiyun gfp_t flags)
2985*4882a593Smuzhiyun {
2986*4882a593Smuzhiyun might_sleep_if(gfpflags_allow_blocking(flags));
2987*4882a593Smuzhiyun }
2988*4882a593Smuzhiyun
2989*4882a593Smuzhiyun #if DEBUG
cache_alloc_debugcheck_after(struct kmem_cache * cachep,gfp_t flags,void * objp,unsigned long caller)2990*4882a593Smuzhiyun static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
2991*4882a593Smuzhiyun gfp_t flags, void *objp, unsigned long caller)
2992*4882a593Smuzhiyun {
2993*4882a593Smuzhiyun WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
2994*4882a593Smuzhiyun if (!objp)
2995*4882a593Smuzhiyun return objp;
2996*4882a593Smuzhiyun if (cachep->flags & SLAB_POISON) {
2997*4882a593Smuzhiyun check_poison_obj(cachep, objp);
2998*4882a593Smuzhiyun slab_kernel_map(cachep, objp, 1);
2999*4882a593Smuzhiyun poison_obj(cachep, objp, POISON_INUSE);
3000*4882a593Smuzhiyun }
3001*4882a593Smuzhiyun if (cachep->flags & SLAB_STORE_USER)
3002*4882a593Smuzhiyun *dbg_userword(cachep, objp) = (void *)caller;
3003*4882a593Smuzhiyun
3004*4882a593Smuzhiyun if (cachep->flags & SLAB_RED_ZONE) {
3005*4882a593Smuzhiyun if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3006*4882a593Smuzhiyun *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3007*4882a593Smuzhiyun slab_error(cachep, "double free, or memory outside object was overwritten");
3008*4882a593Smuzhiyun pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n",
3009*4882a593Smuzhiyun objp, *dbg_redzone1(cachep, objp),
3010*4882a593Smuzhiyun *dbg_redzone2(cachep, objp));
3011*4882a593Smuzhiyun }
3012*4882a593Smuzhiyun *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3013*4882a593Smuzhiyun *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3014*4882a593Smuzhiyun }
3015*4882a593Smuzhiyun
3016*4882a593Smuzhiyun objp += obj_offset(cachep);
3017*4882a593Smuzhiyun if (cachep->ctor && cachep->flags & SLAB_POISON)
3018*4882a593Smuzhiyun cachep->ctor(objp);
3019*4882a593Smuzhiyun if ((unsigned long)objp & (arch_slab_minalign() - 1)) {
3020*4882a593Smuzhiyun pr_err("0x%px: not aligned to arch_slab_minalign()=%u\n", objp,
3021*4882a593Smuzhiyun arch_slab_minalign());
3022*4882a593Smuzhiyun }
3023*4882a593Smuzhiyun return objp;
3024*4882a593Smuzhiyun }
3025*4882a593Smuzhiyun #else
3026*4882a593Smuzhiyun #define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3027*4882a593Smuzhiyun #endif
3028*4882a593Smuzhiyun
____cache_alloc(struct kmem_cache * cachep,gfp_t flags)3029*4882a593Smuzhiyun static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3030*4882a593Smuzhiyun {
3031*4882a593Smuzhiyun void *objp;
3032*4882a593Smuzhiyun struct array_cache *ac;
3033*4882a593Smuzhiyun
3034*4882a593Smuzhiyun check_irq_off();
3035*4882a593Smuzhiyun
3036*4882a593Smuzhiyun ac = cpu_cache_get(cachep);
3037*4882a593Smuzhiyun if (likely(ac->avail)) {
3038*4882a593Smuzhiyun ac->touched = 1;
3039*4882a593Smuzhiyun objp = ac->entry[--ac->avail];
3040*4882a593Smuzhiyun
3041*4882a593Smuzhiyun STATS_INC_ALLOCHIT(cachep);
3042*4882a593Smuzhiyun goto out;
3043*4882a593Smuzhiyun }
3044*4882a593Smuzhiyun
3045*4882a593Smuzhiyun STATS_INC_ALLOCMISS(cachep);
3046*4882a593Smuzhiyun objp = cache_alloc_refill(cachep, flags);
3047*4882a593Smuzhiyun /*
3048*4882a593Smuzhiyun * the 'ac' may be updated by cache_alloc_refill(),
3049*4882a593Smuzhiyun * and kmemleak_erase() requires its correct value.
3050*4882a593Smuzhiyun */
3051*4882a593Smuzhiyun ac = cpu_cache_get(cachep);
3052*4882a593Smuzhiyun
3053*4882a593Smuzhiyun out:
3054*4882a593Smuzhiyun /*
3055*4882a593Smuzhiyun * To avoid a false negative, if an object that is in one of the
3056*4882a593Smuzhiyun * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3057*4882a593Smuzhiyun * treat the array pointers as a reference to the object.
3058*4882a593Smuzhiyun */
3059*4882a593Smuzhiyun if (objp)
3060*4882a593Smuzhiyun kmemleak_erase(&ac->entry[ac->avail]);
3061*4882a593Smuzhiyun return objp;
3062*4882a593Smuzhiyun }
3063*4882a593Smuzhiyun
3064*4882a593Smuzhiyun #ifdef CONFIG_NUMA
3065*4882a593Smuzhiyun /*
3066*4882a593Smuzhiyun * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
3067*4882a593Smuzhiyun *
3068*4882a593Smuzhiyun * If we are in_interrupt, then process context, including cpusets and
3069*4882a593Smuzhiyun * mempolicy, may not apply and should not be used for allocation policy.
3070*4882a593Smuzhiyun */
alternate_node_alloc(struct kmem_cache * cachep,gfp_t flags)3071*4882a593Smuzhiyun static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3072*4882a593Smuzhiyun {
3073*4882a593Smuzhiyun int nid_alloc, nid_here;
3074*4882a593Smuzhiyun
3075*4882a593Smuzhiyun if (in_interrupt() || (flags & __GFP_THISNODE))
3076*4882a593Smuzhiyun return NULL;
3077*4882a593Smuzhiyun nid_alloc = nid_here = numa_mem_id();
3078*4882a593Smuzhiyun if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3079*4882a593Smuzhiyun nid_alloc = cpuset_slab_spread_node();
3080*4882a593Smuzhiyun else if (current->mempolicy)
3081*4882a593Smuzhiyun nid_alloc = mempolicy_slab_node();
3082*4882a593Smuzhiyun if (nid_alloc != nid_here)
3083*4882a593Smuzhiyun return ____cache_alloc_node(cachep, flags, nid_alloc);
3084*4882a593Smuzhiyun return NULL;
3085*4882a593Smuzhiyun }
3086*4882a593Smuzhiyun
3087*4882a593Smuzhiyun /*
3088*4882a593Smuzhiyun * Fallback function if there was no memory available and no objects on a
3089*4882a593Smuzhiyun * certain node and fall back is permitted. First we scan all the
3090*4882a593Smuzhiyun * available node for available objects. If that fails then we
3091*4882a593Smuzhiyun * perform an allocation without specifying a node. This allows the page
3092*4882a593Smuzhiyun * allocator to do its reclaim / fallback magic. We then insert the
3093*4882a593Smuzhiyun * slab into the proper nodelist and then allocate from it.
3094*4882a593Smuzhiyun */
fallback_alloc(struct kmem_cache * cache,gfp_t flags)3095*4882a593Smuzhiyun static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3096*4882a593Smuzhiyun {
3097*4882a593Smuzhiyun struct zonelist *zonelist;
3098*4882a593Smuzhiyun struct zoneref *z;
3099*4882a593Smuzhiyun struct zone *zone;
3100*4882a593Smuzhiyun enum zone_type highest_zoneidx = gfp_zone(flags);
3101*4882a593Smuzhiyun void *obj = NULL;
3102*4882a593Smuzhiyun struct page *page;
3103*4882a593Smuzhiyun int nid;
3104*4882a593Smuzhiyun unsigned int cpuset_mems_cookie;
3105*4882a593Smuzhiyun
3106*4882a593Smuzhiyun if (flags & __GFP_THISNODE)
3107*4882a593Smuzhiyun return NULL;
3108*4882a593Smuzhiyun
3109*4882a593Smuzhiyun retry_cpuset:
3110*4882a593Smuzhiyun cpuset_mems_cookie = read_mems_allowed_begin();
3111*4882a593Smuzhiyun zonelist = node_zonelist(mempolicy_slab_node(), flags);
3112*4882a593Smuzhiyun
3113*4882a593Smuzhiyun retry:
3114*4882a593Smuzhiyun /*
3115*4882a593Smuzhiyun * Look through allowed nodes for objects available
3116*4882a593Smuzhiyun * from existing per node queues.
3117*4882a593Smuzhiyun */
3118*4882a593Smuzhiyun for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
3119*4882a593Smuzhiyun nid = zone_to_nid(zone);
3120*4882a593Smuzhiyun
3121*4882a593Smuzhiyun if (cpuset_zone_allowed(zone, flags) &&
3122*4882a593Smuzhiyun get_node(cache, nid) &&
3123*4882a593Smuzhiyun get_node(cache, nid)->free_objects) {
3124*4882a593Smuzhiyun obj = ____cache_alloc_node(cache,
3125*4882a593Smuzhiyun gfp_exact_node(flags), nid);
3126*4882a593Smuzhiyun if (obj)
3127*4882a593Smuzhiyun break;
3128*4882a593Smuzhiyun }
3129*4882a593Smuzhiyun }
3130*4882a593Smuzhiyun
3131*4882a593Smuzhiyun if (!obj) {
3132*4882a593Smuzhiyun /*
3133*4882a593Smuzhiyun * This allocation will be performed within the constraints
3134*4882a593Smuzhiyun * of the current cpuset / memory policy requirements.
3135*4882a593Smuzhiyun * We may trigger various forms of reclaim on the allowed
3136*4882a593Smuzhiyun * set and go into memory reserves if necessary.
3137*4882a593Smuzhiyun */
3138*4882a593Smuzhiyun page = cache_grow_begin(cache, flags, numa_mem_id());
3139*4882a593Smuzhiyun cache_grow_end(cache, page);
3140*4882a593Smuzhiyun if (page) {
3141*4882a593Smuzhiyun nid = page_to_nid(page);
3142*4882a593Smuzhiyun obj = ____cache_alloc_node(cache,
3143*4882a593Smuzhiyun gfp_exact_node(flags), nid);
3144*4882a593Smuzhiyun
3145*4882a593Smuzhiyun /*
3146*4882a593Smuzhiyun * Another processor may allocate the objects in
3147*4882a593Smuzhiyun * the slab since we are not holding any locks.
3148*4882a593Smuzhiyun */
3149*4882a593Smuzhiyun if (!obj)
3150*4882a593Smuzhiyun goto retry;
3151*4882a593Smuzhiyun }
3152*4882a593Smuzhiyun }
3153*4882a593Smuzhiyun
3154*4882a593Smuzhiyun if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie)))
3155*4882a593Smuzhiyun goto retry_cpuset;
3156*4882a593Smuzhiyun return obj;
3157*4882a593Smuzhiyun }
3158*4882a593Smuzhiyun
3159*4882a593Smuzhiyun /*
3160*4882a593Smuzhiyun * A interface to enable slab creation on nodeid
3161*4882a593Smuzhiyun */
____cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid)3162*4882a593Smuzhiyun static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3163*4882a593Smuzhiyun int nodeid)
3164*4882a593Smuzhiyun {
3165*4882a593Smuzhiyun struct page *page;
3166*4882a593Smuzhiyun struct kmem_cache_node *n;
3167*4882a593Smuzhiyun void *obj = NULL;
3168*4882a593Smuzhiyun void *list = NULL;
3169*4882a593Smuzhiyun
3170*4882a593Smuzhiyun VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES);
3171*4882a593Smuzhiyun n = get_node(cachep, nodeid);
3172*4882a593Smuzhiyun BUG_ON(!n);
3173*4882a593Smuzhiyun
3174*4882a593Smuzhiyun check_irq_off();
3175*4882a593Smuzhiyun spin_lock(&n->list_lock);
3176*4882a593Smuzhiyun page = get_first_slab(n, false);
3177*4882a593Smuzhiyun if (!page)
3178*4882a593Smuzhiyun goto must_grow;
3179*4882a593Smuzhiyun
3180*4882a593Smuzhiyun check_spinlock_acquired_node(cachep, nodeid);
3181*4882a593Smuzhiyun
3182*4882a593Smuzhiyun STATS_INC_NODEALLOCS(cachep);
3183*4882a593Smuzhiyun STATS_INC_ACTIVE(cachep);
3184*4882a593Smuzhiyun STATS_SET_HIGH(cachep);
3185*4882a593Smuzhiyun
3186*4882a593Smuzhiyun BUG_ON(page->active == cachep->num);
3187*4882a593Smuzhiyun
3188*4882a593Smuzhiyun obj = slab_get_obj(cachep, page);
3189*4882a593Smuzhiyun n->free_objects--;
3190*4882a593Smuzhiyun
3191*4882a593Smuzhiyun fixup_slab_list(cachep, n, page, &list);
3192*4882a593Smuzhiyun
3193*4882a593Smuzhiyun spin_unlock(&n->list_lock);
3194*4882a593Smuzhiyun fixup_objfreelist_debug(cachep, &list);
3195*4882a593Smuzhiyun return obj;
3196*4882a593Smuzhiyun
3197*4882a593Smuzhiyun must_grow:
3198*4882a593Smuzhiyun spin_unlock(&n->list_lock);
3199*4882a593Smuzhiyun page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
3200*4882a593Smuzhiyun if (page) {
3201*4882a593Smuzhiyun /* This slab isn't counted yet so don't update free_objects */
3202*4882a593Smuzhiyun obj = slab_get_obj(cachep, page);
3203*4882a593Smuzhiyun }
3204*4882a593Smuzhiyun cache_grow_end(cachep, page);
3205*4882a593Smuzhiyun
3206*4882a593Smuzhiyun return obj ? obj : fallback_alloc(cachep, flags);
3207*4882a593Smuzhiyun }
3208*4882a593Smuzhiyun
3209*4882a593Smuzhiyun static __always_inline void *
slab_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid,size_t orig_size,unsigned long caller)3210*4882a593Smuzhiyun slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size,
3211*4882a593Smuzhiyun unsigned long caller)
3212*4882a593Smuzhiyun {
3213*4882a593Smuzhiyun unsigned long save_flags;
3214*4882a593Smuzhiyun void *ptr;
3215*4882a593Smuzhiyun int slab_node = numa_mem_id();
3216*4882a593Smuzhiyun struct obj_cgroup *objcg = NULL;
3217*4882a593Smuzhiyun bool init = false;
3218*4882a593Smuzhiyun
3219*4882a593Smuzhiyun flags &= gfp_allowed_mask;
3220*4882a593Smuzhiyun cachep = slab_pre_alloc_hook(cachep, &objcg, 1, flags);
3221*4882a593Smuzhiyun if (unlikely(!cachep))
3222*4882a593Smuzhiyun return NULL;
3223*4882a593Smuzhiyun
3224*4882a593Smuzhiyun ptr = kfence_alloc(cachep, orig_size, flags);
3225*4882a593Smuzhiyun if (unlikely(ptr))
3226*4882a593Smuzhiyun goto out_hooks;
3227*4882a593Smuzhiyun
3228*4882a593Smuzhiyun cache_alloc_debugcheck_before(cachep, flags);
3229*4882a593Smuzhiyun local_irq_save(save_flags);
3230*4882a593Smuzhiyun
3231*4882a593Smuzhiyun if (nodeid == NUMA_NO_NODE)
3232*4882a593Smuzhiyun nodeid = slab_node;
3233*4882a593Smuzhiyun
3234*4882a593Smuzhiyun if (unlikely(!get_node(cachep, nodeid))) {
3235*4882a593Smuzhiyun /* Node not bootstrapped yet */
3236*4882a593Smuzhiyun ptr = fallback_alloc(cachep, flags);
3237*4882a593Smuzhiyun goto out;
3238*4882a593Smuzhiyun }
3239*4882a593Smuzhiyun
3240*4882a593Smuzhiyun if (nodeid == slab_node) {
3241*4882a593Smuzhiyun /*
3242*4882a593Smuzhiyun * Use the locally cached objects if possible.
3243*4882a593Smuzhiyun * However ____cache_alloc does not allow fallback
3244*4882a593Smuzhiyun * to other nodes. It may fail while we still have
3245*4882a593Smuzhiyun * objects on other nodes available.
3246*4882a593Smuzhiyun */
3247*4882a593Smuzhiyun ptr = ____cache_alloc(cachep, flags);
3248*4882a593Smuzhiyun if (ptr)
3249*4882a593Smuzhiyun goto out;
3250*4882a593Smuzhiyun }
3251*4882a593Smuzhiyun /* ___cache_alloc_node can fall back to other nodes */
3252*4882a593Smuzhiyun ptr = ____cache_alloc_node(cachep, flags, nodeid);
3253*4882a593Smuzhiyun out:
3254*4882a593Smuzhiyun local_irq_restore(save_flags);
3255*4882a593Smuzhiyun ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3256*4882a593Smuzhiyun init = slab_want_init_on_alloc(flags, cachep);
3257*4882a593Smuzhiyun
3258*4882a593Smuzhiyun out_hooks:
3259*4882a593Smuzhiyun slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr, init);
3260*4882a593Smuzhiyun return ptr;
3261*4882a593Smuzhiyun }
3262*4882a593Smuzhiyun
3263*4882a593Smuzhiyun static __always_inline void *
__do_cache_alloc(struct kmem_cache * cache,gfp_t flags)3264*4882a593Smuzhiyun __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3265*4882a593Smuzhiyun {
3266*4882a593Smuzhiyun void *objp;
3267*4882a593Smuzhiyun
3268*4882a593Smuzhiyun if (current->mempolicy || cpuset_do_slab_mem_spread()) {
3269*4882a593Smuzhiyun objp = alternate_node_alloc(cache, flags);
3270*4882a593Smuzhiyun if (objp)
3271*4882a593Smuzhiyun goto out;
3272*4882a593Smuzhiyun }
3273*4882a593Smuzhiyun objp = ____cache_alloc(cache, flags);
3274*4882a593Smuzhiyun
3275*4882a593Smuzhiyun /*
3276*4882a593Smuzhiyun * We may just have run out of memory on the local node.
3277*4882a593Smuzhiyun * ____cache_alloc_node() knows how to locate memory on other nodes
3278*4882a593Smuzhiyun */
3279*4882a593Smuzhiyun if (!objp)
3280*4882a593Smuzhiyun objp = ____cache_alloc_node(cache, flags, numa_mem_id());
3281*4882a593Smuzhiyun
3282*4882a593Smuzhiyun out:
3283*4882a593Smuzhiyun return objp;
3284*4882a593Smuzhiyun }
3285*4882a593Smuzhiyun #else
3286*4882a593Smuzhiyun
3287*4882a593Smuzhiyun static __always_inline void *
__do_cache_alloc(struct kmem_cache * cachep,gfp_t flags)3288*4882a593Smuzhiyun __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3289*4882a593Smuzhiyun {
3290*4882a593Smuzhiyun return ____cache_alloc(cachep, flags);
3291*4882a593Smuzhiyun }
3292*4882a593Smuzhiyun
3293*4882a593Smuzhiyun #endif /* CONFIG_NUMA */
3294*4882a593Smuzhiyun
3295*4882a593Smuzhiyun static __always_inline void *
slab_alloc(struct kmem_cache * cachep,gfp_t flags,size_t orig_size,unsigned long caller)3296*4882a593Smuzhiyun slab_alloc(struct kmem_cache *cachep, gfp_t flags, size_t orig_size, unsigned long caller)
3297*4882a593Smuzhiyun {
3298*4882a593Smuzhiyun unsigned long save_flags;
3299*4882a593Smuzhiyun void *objp;
3300*4882a593Smuzhiyun struct obj_cgroup *objcg = NULL;
3301*4882a593Smuzhiyun bool init = false;
3302*4882a593Smuzhiyun
3303*4882a593Smuzhiyun flags &= gfp_allowed_mask;
3304*4882a593Smuzhiyun cachep = slab_pre_alloc_hook(cachep, &objcg, 1, flags);
3305*4882a593Smuzhiyun if (unlikely(!cachep))
3306*4882a593Smuzhiyun return NULL;
3307*4882a593Smuzhiyun
3308*4882a593Smuzhiyun objp = kfence_alloc(cachep, orig_size, flags);
3309*4882a593Smuzhiyun if (unlikely(objp))
3310*4882a593Smuzhiyun goto out;
3311*4882a593Smuzhiyun
3312*4882a593Smuzhiyun cache_alloc_debugcheck_before(cachep, flags);
3313*4882a593Smuzhiyun local_irq_save(save_flags);
3314*4882a593Smuzhiyun objp = __do_cache_alloc(cachep, flags);
3315*4882a593Smuzhiyun local_irq_restore(save_flags);
3316*4882a593Smuzhiyun objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3317*4882a593Smuzhiyun prefetchw(objp);
3318*4882a593Smuzhiyun init = slab_want_init_on_alloc(flags, cachep);
3319*4882a593Smuzhiyun
3320*4882a593Smuzhiyun out:
3321*4882a593Smuzhiyun slab_post_alloc_hook(cachep, objcg, flags, 1, &objp, init);
3322*4882a593Smuzhiyun return objp;
3323*4882a593Smuzhiyun }
3324*4882a593Smuzhiyun
3325*4882a593Smuzhiyun /*
3326*4882a593Smuzhiyun * Caller needs to acquire correct kmem_cache_node's list_lock
3327*4882a593Smuzhiyun * @list: List of detached free slabs should be freed by caller
3328*4882a593Smuzhiyun */
free_block(struct kmem_cache * cachep,void ** objpp,int nr_objects,int node,struct list_head * list)3329*4882a593Smuzhiyun static void free_block(struct kmem_cache *cachep, void **objpp,
3330*4882a593Smuzhiyun int nr_objects, int node, struct list_head *list)
3331*4882a593Smuzhiyun {
3332*4882a593Smuzhiyun int i;
3333*4882a593Smuzhiyun struct kmem_cache_node *n = get_node(cachep, node);
3334*4882a593Smuzhiyun struct page *page;
3335*4882a593Smuzhiyun
3336*4882a593Smuzhiyun n->free_objects += nr_objects;
3337*4882a593Smuzhiyun
3338*4882a593Smuzhiyun for (i = 0; i < nr_objects; i++) {
3339*4882a593Smuzhiyun void *objp;
3340*4882a593Smuzhiyun struct page *page;
3341*4882a593Smuzhiyun
3342*4882a593Smuzhiyun objp = objpp[i];
3343*4882a593Smuzhiyun
3344*4882a593Smuzhiyun page = virt_to_head_page(objp);
3345*4882a593Smuzhiyun list_del(&page->slab_list);
3346*4882a593Smuzhiyun check_spinlock_acquired_node(cachep, node);
3347*4882a593Smuzhiyun slab_put_obj(cachep, page, objp);
3348*4882a593Smuzhiyun STATS_DEC_ACTIVE(cachep);
3349*4882a593Smuzhiyun
3350*4882a593Smuzhiyun /* fixup slab chains */
3351*4882a593Smuzhiyun if (page->active == 0) {
3352*4882a593Smuzhiyun list_add(&page->slab_list, &n->slabs_free);
3353*4882a593Smuzhiyun n->free_slabs++;
3354*4882a593Smuzhiyun } else {
3355*4882a593Smuzhiyun /* Unconditionally move a slab to the end of the
3356*4882a593Smuzhiyun * partial list on free - maximum time for the
3357*4882a593Smuzhiyun * other objects to be freed, too.
3358*4882a593Smuzhiyun */
3359*4882a593Smuzhiyun list_add_tail(&page->slab_list, &n->slabs_partial);
3360*4882a593Smuzhiyun }
3361*4882a593Smuzhiyun }
3362*4882a593Smuzhiyun
3363*4882a593Smuzhiyun while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
3364*4882a593Smuzhiyun n->free_objects -= cachep->num;
3365*4882a593Smuzhiyun
3366*4882a593Smuzhiyun page = list_last_entry(&n->slabs_free, struct page, slab_list);
3367*4882a593Smuzhiyun list_move(&page->slab_list, list);
3368*4882a593Smuzhiyun n->free_slabs--;
3369*4882a593Smuzhiyun n->total_slabs--;
3370*4882a593Smuzhiyun }
3371*4882a593Smuzhiyun }
3372*4882a593Smuzhiyun
cache_flusharray(struct kmem_cache * cachep,struct array_cache * ac)3373*4882a593Smuzhiyun static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3374*4882a593Smuzhiyun {
3375*4882a593Smuzhiyun int batchcount;
3376*4882a593Smuzhiyun struct kmem_cache_node *n;
3377*4882a593Smuzhiyun int node = numa_mem_id();
3378*4882a593Smuzhiyun LIST_HEAD(list);
3379*4882a593Smuzhiyun
3380*4882a593Smuzhiyun batchcount = ac->batchcount;
3381*4882a593Smuzhiyun
3382*4882a593Smuzhiyun check_irq_off();
3383*4882a593Smuzhiyun n = get_node(cachep, node);
3384*4882a593Smuzhiyun spin_lock(&n->list_lock);
3385*4882a593Smuzhiyun if (n->shared) {
3386*4882a593Smuzhiyun struct array_cache *shared_array = n->shared;
3387*4882a593Smuzhiyun int max = shared_array->limit - shared_array->avail;
3388*4882a593Smuzhiyun if (max) {
3389*4882a593Smuzhiyun if (batchcount > max)
3390*4882a593Smuzhiyun batchcount = max;
3391*4882a593Smuzhiyun memcpy(&(shared_array->entry[shared_array->avail]),
3392*4882a593Smuzhiyun ac->entry, sizeof(void *) * batchcount);
3393*4882a593Smuzhiyun shared_array->avail += batchcount;
3394*4882a593Smuzhiyun goto free_done;
3395*4882a593Smuzhiyun }
3396*4882a593Smuzhiyun }
3397*4882a593Smuzhiyun
3398*4882a593Smuzhiyun free_block(cachep, ac->entry, batchcount, node, &list);
3399*4882a593Smuzhiyun free_done:
3400*4882a593Smuzhiyun #if STATS
3401*4882a593Smuzhiyun {
3402*4882a593Smuzhiyun int i = 0;
3403*4882a593Smuzhiyun struct page *page;
3404*4882a593Smuzhiyun
3405*4882a593Smuzhiyun list_for_each_entry(page, &n->slabs_free, slab_list) {
3406*4882a593Smuzhiyun BUG_ON(page->active);
3407*4882a593Smuzhiyun
3408*4882a593Smuzhiyun i++;
3409*4882a593Smuzhiyun }
3410*4882a593Smuzhiyun STATS_SET_FREEABLE(cachep, i);
3411*4882a593Smuzhiyun }
3412*4882a593Smuzhiyun #endif
3413*4882a593Smuzhiyun spin_unlock(&n->list_lock);
3414*4882a593Smuzhiyun ac->avail -= batchcount;
3415*4882a593Smuzhiyun memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3416*4882a593Smuzhiyun slabs_destroy(cachep, &list);
3417*4882a593Smuzhiyun }
3418*4882a593Smuzhiyun
3419*4882a593Smuzhiyun /*
3420*4882a593Smuzhiyun * Release an obj back to its cache. If the obj has a constructed state, it must
3421*4882a593Smuzhiyun * be in this state _before_ it is released. Called with disabled ints.
3422*4882a593Smuzhiyun */
__cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)3423*4882a593Smuzhiyun static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
3424*4882a593Smuzhiyun unsigned long caller)
3425*4882a593Smuzhiyun {
3426*4882a593Smuzhiyun bool init;
3427*4882a593Smuzhiyun
3428*4882a593Smuzhiyun if (is_kfence_address(objp)) {
3429*4882a593Smuzhiyun kmemleak_free_recursive(objp, cachep->flags);
3430*4882a593Smuzhiyun memcg_slab_free_hook(cachep, &objp, 1);
3431*4882a593Smuzhiyun __kfence_free(objp);
3432*4882a593Smuzhiyun return;
3433*4882a593Smuzhiyun }
3434*4882a593Smuzhiyun
3435*4882a593Smuzhiyun /*
3436*4882a593Smuzhiyun * As memory initialization might be integrated into KASAN,
3437*4882a593Smuzhiyun * kasan_slab_free and initialization memset must be
3438*4882a593Smuzhiyun * kept together to avoid discrepancies in behavior.
3439*4882a593Smuzhiyun */
3440*4882a593Smuzhiyun init = slab_want_init_on_free(cachep);
3441*4882a593Smuzhiyun if (init && !kasan_has_integrated_init())
3442*4882a593Smuzhiyun memset(objp, 0, cachep->object_size);
3443*4882a593Smuzhiyun /* KASAN might put objp into memory quarantine, delaying its reuse. */
3444*4882a593Smuzhiyun if (kasan_slab_free(cachep, objp, init))
3445*4882a593Smuzhiyun return;
3446*4882a593Smuzhiyun
3447*4882a593Smuzhiyun /* Use KCSAN to help debug racy use-after-free. */
3448*4882a593Smuzhiyun if (!(cachep->flags & SLAB_TYPESAFE_BY_RCU))
3449*4882a593Smuzhiyun __kcsan_check_access(objp, cachep->object_size,
3450*4882a593Smuzhiyun KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
3451*4882a593Smuzhiyun
3452*4882a593Smuzhiyun ___cache_free(cachep, objp, caller);
3453*4882a593Smuzhiyun }
3454*4882a593Smuzhiyun
___cache_free(struct kmem_cache * cachep,void * objp,unsigned long caller)3455*4882a593Smuzhiyun void ___cache_free(struct kmem_cache *cachep, void *objp,
3456*4882a593Smuzhiyun unsigned long caller)
3457*4882a593Smuzhiyun {
3458*4882a593Smuzhiyun struct array_cache *ac = cpu_cache_get(cachep);
3459*4882a593Smuzhiyun
3460*4882a593Smuzhiyun check_irq_off();
3461*4882a593Smuzhiyun kmemleak_free_recursive(objp, cachep->flags);
3462*4882a593Smuzhiyun objp = cache_free_debugcheck(cachep, objp, caller);
3463*4882a593Smuzhiyun memcg_slab_free_hook(cachep, &objp, 1);
3464*4882a593Smuzhiyun
3465*4882a593Smuzhiyun /*
3466*4882a593Smuzhiyun * Skip calling cache_free_alien() when the platform is not numa.
3467*4882a593Smuzhiyun * This will avoid cache misses that happen while accessing slabp (which
3468*4882a593Smuzhiyun * is per page memory reference) to get nodeid. Instead use a global
3469*4882a593Smuzhiyun * variable to skip the call, which is mostly likely to be present in
3470*4882a593Smuzhiyun * the cache.
3471*4882a593Smuzhiyun */
3472*4882a593Smuzhiyun if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3473*4882a593Smuzhiyun return;
3474*4882a593Smuzhiyun
3475*4882a593Smuzhiyun if (ac->avail < ac->limit) {
3476*4882a593Smuzhiyun STATS_INC_FREEHIT(cachep);
3477*4882a593Smuzhiyun } else {
3478*4882a593Smuzhiyun STATS_INC_FREEMISS(cachep);
3479*4882a593Smuzhiyun cache_flusharray(cachep, ac);
3480*4882a593Smuzhiyun }
3481*4882a593Smuzhiyun
3482*4882a593Smuzhiyun if (sk_memalloc_socks()) {
3483*4882a593Smuzhiyun struct page *page = virt_to_head_page(objp);
3484*4882a593Smuzhiyun
3485*4882a593Smuzhiyun if (unlikely(PageSlabPfmemalloc(page))) {
3486*4882a593Smuzhiyun cache_free_pfmemalloc(cachep, page, objp);
3487*4882a593Smuzhiyun return;
3488*4882a593Smuzhiyun }
3489*4882a593Smuzhiyun }
3490*4882a593Smuzhiyun
3491*4882a593Smuzhiyun __free_one(ac, objp);
3492*4882a593Smuzhiyun }
3493*4882a593Smuzhiyun
3494*4882a593Smuzhiyun /**
3495*4882a593Smuzhiyun * kmem_cache_alloc - Allocate an object
3496*4882a593Smuzhiyun * @cachep: The cache to allocate from.
3497*4882a593Smuzhiyun * @flags: See kmalloc().
3498*4882a593Smuzhiyun *
3499*4882a593Smuzhiyun * Allocate an object from this cache. The flags are only relevant
3500*4882a593Smuzhiyun * if the cache has no available objects.
3501*4882a593Smuzhiyun *
3502*4882a593Smuzhiyun * Return: pointer to the new object or %NULL in case of error
3503*4882a593Smuzhiyun */
kmem_cache_alloc(struct kmem_cache * cachep,gfp_t flags)3504*4882a593Smuzhiyun void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3505*4882a593Smuzhiyun {
3506*4882a593Smuzhiyun void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_);
3507*4882a593Smuzhiyun
3508*4882a593Smuzhiyun trace_kmem_cache_alloc(_RET_IP_, ret,
3509*4882a593Smuzhiyun cachep->object_size, cachep->size, flags);
3510*4882a593Smuzhiyun
3511*4882a593Smuzhiyun return ret;
3512*4882a593Smuzhiyun }
3513*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc);
3514*4882a593Smuzhiyun
3515*4882a593Smuzhiyun static __always_inline void
cache_alloc_debugcheck_after_bulk(struct kmem_cache * s,gfp_t flags,size_t size,void ** p,unsigned long caller)3516*4882a593Smuzhiyun cache_alloc_debugcheck_after_bulk(struct kmem_cache *s, gfp_t flags,
3517*4882a593Smuzhiyun size_t size, void **p, unsigned long caller)
3518*4882a593Smuzhiyun {
3519*4882a593Smuzhiyun size_t i;
3520*4882a593Smuzhiyun
3521*4882a593Smuzhiyun for (i = 0; i < size; i++)
3522*4882a593Smuzhiyun p[i] = cache_alloc_debugcheck_after(s, flags, p[i], caller);
3523*4882a593Smuzhiyun }
3524*4882a593Smuzhiyun
kmem_cache_alloc_bulk(struct kmem_cache * s,gfp_t flags,size_t size,void ** p)3525*4882a593Smuzhiyun int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3526*4882a593Smuzhiyun void **p)
3527*4882a593Smuzhiyun {
3528*4882a593Smuzhiyun size_t i;
3529*4882a593Smuzhiyun struct obj_cgroup *objcg = NULL;
3530*4882a593Smuzhiyun
3531*4882a593Smuzhiyun s = slab_pre_alloc_hook(s, &objcg, size, flags);
3532*4882a593Smuzhiyun if (!s)
3533*4882a593Smuzhiyun return 0;
3534*4882a593Smuzhiyun
3535*4882a593Smuzhiyun cache_alloc_debugcheck_before(s, flags);
3536*4882a593Smuzhiyun
3537*4882a593Smuzhiyun local_irq_disable();
3538*4882a593Smuzhiyun for (i = 0; i < size; i++) {
3539*4882a593Smuzhiyun void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags);
3540*4882a593Smuzhiyun
3541*4882a593Smuzhiyun if (unlikely(!objp))
3542*4882a593Smuzhiyun goto error;
3543*4882a593Smuzhiyun p[i] = objp;
3544*4882a593Smuzhiyun }
3545*4882a593Smuzhiyun local_irq_enable();
3546*4882a593Smuzhiyun
3547*4882a593Smuzhiyun cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);
3548*4882a593Smuzhiyun
3549*4882a593Smuzhiyun /*
3550*4882a593Smuzhiyun * memcg and kmem_cache debug support and memory initialization.
3551*4882a593Smuzhiyun * Done outside of the IRQ disabled section.
3552*4882a593Smuzhiyun */
3553*4882a593Smuzhiyun slab_post_alloc_hook(s, objcg, flags, size, p,
3554*4882a593Smuzhiyun slab_want_init_on_alloc(flags, s));
3555*4882a593Smuzhiyun /* FIXME: Trace call missing. Christoph would like a bulk variant */
3556*4882a593Smuzhiyun return size;
3557*4882a593Smuzhiyun error:
3558*4882a593Smuzhiyun local_irq_enable();
3559*4882a593Smuzhiyun cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
3560*4882a593Smuzhiyun slab_post_alloc_hook(s, objcg, flags, i, p, false);
3561*4882a593Smuzhiyun __kmem_cache_free_bulk(s, i, p);
3562*4882a593Smuzhiyun return 0;
3563*4882a593Smuzhiyun }
3564*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3565*4882a593Smuzhiyun
3566*4882a593Smuzhiyun #ifdef CONFIG_TRACING
3567*4882a593Smuzhiyun void *
kmem_cache_alloc_trace(struct kmem_cache * cachep,gfp_t flags,size_t size)3568*4882a593Smuzhiyun kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
3569*4882a593Smuzhiyun {
3570*4882a593Smuzhiyun void *ret;
3571*4882a593Smuzhiyun
3572*4882a593Smuzhiyun ret = slab_alloc(cachep, flags, size, _RET_IP_);
3573*4882a593Smuzhiyun
3574*4882a593Smuzhiyun ret = kasan_kmalloc(cachep, ret, size, flags);
3575*4882a593Smuzhiyun trace_kmalloc(_RET_IP_, ret,
3576*4882a593Smuzhiyun size, cachep->size, flags);
3577*4882a593Smuzhiyun return ret;
3578*4882a593Smuzhiyun }
3579*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_trace);
3580*4882a593Smuzhiyun #endif
3581*4882a593Smuzhiyun
3582*4882a593Smuzhiyun #ifdef CONFIG_NUMA
3583*4882a593Smuzhiyun /**
3584*4882a593Smuzhiyun * kmem_cache_alloc_node - Allocate an object on the specified node
3585*4882a593Smuzhiyun * @cachep: The cache to allocate from.
3586*4882a593Smuzhiyun * @flags: See kmalloc().
3587*4882a593Smuzhiyun * @nodeid: node number of the target node.
3588*4882a593Smuzhiyun *
3589*4882a593Smuzhiyun * Identical to kmem_cache_alloc but it will allocate memory on the given
3590*4882a593Smuzhiyun * node, which can improve the performance for cpu bound structures.
3591*4882a593Smuzhiyun *
3592*4882a593Smuzhiyun * Fallback to other node is possible if __GFP_THISNODE is not set.
3593*4882a593Smuzhiyun *
3594*4882a593Smuzhiyun * Return: pointer to the new object or %NULL in case of error
3595*4882a593Smuzhiyun */
kmem_cache_alloc_node(struct kmem_cache * cachep,gfp_t flags,int nodeid)3596*4882a593Smuzhiyun void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3597*4882a593Smuzhiyun {
3598*4882a593Smuzhiyun void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
3599*4882a593Smuzhiyun
3600*4882a593Smuzhiyun trace_kmem_cache_alloc_node(_RET_IP_, ret,
3601*4882a593Smuzhiyun cachep->object_size, cachep->size,
3602*4882a593Smuzhiyun flags, nodeid);
3603*4882a593Smuzhiyun
3604*4882a593Smuzhiyun return ret;
3605*4882a593Smuzhiyun }
3606*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_node);
3607*4882a593Smuzhiyun
3608*4882a593Smuzhiyun #ifdef CONFIG_TRACING
kmem_cache_alloc_node_trace(struct kmem_cache * cachep,gfp_t flags,int nodeid,size_t size)3609*4882a593Smuzhiyun void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
3610*4882a593Smuzhiyun gfp_t flags,
3611*4882a593Smuzhiyun int nodeid,
3612*4882a593Smuzhiyun size_t size)
3613*4882a593Smuzhiyun {
3614*4882a593Smuzhiyun void *ret;
3615*4882a593Smuzhiyun
3616*4882a593Smuzhiyun ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
3617*4882a593Smuzhiyun
3618*4882a593Smuzhiyun ret = kasan_kmalloc(cachep, ret, size, flags);
3619*4882a593Smuzhiyun trace_kmalloc_node(_RET_IP_, ret,
3620*4882a593Smuzhiyun size, cachep->size,
3621*4882a593Smuzhiyun flags, nodeid);
3622*4882a593Smuzhiyun return ret;
3623*4882a593Smuzhiyun }
3624*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3625*4882a593Smuzhiyun #endif
3626*4882a593Smuzhiyun
3627*4882a593Smuzhiyun static __always_inline void *
__do_kmalloc_node(size_t size,gfp_t flags,int node,unsigned long caller)3628*4882a593Smuzhiyun __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
3629*4882a593Smuzhiyun {
3630*4882a593Smuzhiyun struct kmem_cache *cachep;
3631*4882a593Smuzhiyun void *ret;
3632*4882a593Smuzhiyun
3633*4882a593Smuzhiyun if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3634*4882a593Smuzhiyun return NULL;
3635*4882a593Smuzhiyun cachep = kmalloc_slab(size, flags);
3636*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3637*4882a593Smuzhiyun return cachep;
3638*4882a593Smuzhiyun ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);
3639*4882a593Smuzhiyun ret = kasan_kmalloc(cachep, ret, size, flags);
3640*4882a593Smuzhiyun
3641*4882a593Smuzhiyun return ret;
3642*4882a593Smuzhiyun }
3643*4882a593Smuzhiyun
__kmalloc_node(size_t size,gfp_t flags,int node)3644*4882a593Smuzhiyun void *__kmalloc_node(size_t size, gfp_t flags, int node)
3645*4882a593Smuzhiyun {
3646*4882a593Smuzhiyun return __do_kmalloc_node(size, flags, node, _RET_IP_);
3647*4882a593Smuzhiyun }
3648*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_node);
3649*4882a593Smuzhiyun
__kmalloc_node_track_caller(size_t size,gfp_t flags,int node,unsigned long caller)3650*4882a593Smuzhiyun void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3651*4882a593Smuzhiyun int node, unsigned long caller)
3652*4882a593Smuzhiyun {
3653*4882a593Smuzhiyun return __do_kmalloc_node(size, flags, node, caller);
3654*4882a593Smuzhiyun }
3655*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_node_track_caller);
3656*4882a593Smuzhiyun #endif /* CONFIG_NUMA */
3657*4882a593Smuzhiyun
3658*4882a593Smuzhiyun /**
3659*4882a593Smuzhiyun * __do_kmalloc - allocate memory
3660*4882a593Smuzhiyun * @size: how many bytes of memory are required.
3661*4882a593Smuzhiyun * @flags: the type of memory to allocate (see kmalloc).
3662*4882a593Smuzhiyun * @caller: function caller for debug tracking of the caller
3663*4882a593Smuzhiyun *
3664*4882a593Smuzhiyun * Return: pointer to the allocated memory or %NULL in case of error
3665*4882a593Smuzhiyun */
__do_kmalloc(size_t size,gfp_t flags,unsigned long caller)3666*4882a593Smuzhiyun static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3667*4882a593Smuzhiyun unsigned long caller)
3668*4882a593Smuzhiyun {
3669*4882a593Smuzhiyun struct kmem_cache *cachep;
3670*4882a593Smuzhiyun void *ret;
3671*4882a593Smuzhiyun
3672*4882a593Smuzhiyun if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3673*4882a593Smuzhiyun return NULL;
3674*4882a593Smuzhiyun cachep = kmalloc_slab(size, flags);
3675*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3676*4882a593Smuzhiyun return cachep;
3677*4882a593Smuzhiyun ret = slab_alloc(cachep, flags, size, caller);
3678*4882a593Smuzhiyun
3679*4882a593Smuzhiyun ret = kasan_kmalloc(cachep, ret, size, flags);
3680*4882a593Smuzhiyun trace_kmalloc(caller, ret,
3681*4882a593Smuzhiyun size, cachep->size, flags);
3682*4882a593Smuzhiyun
3683*4882a593Smuzhiyun return ret;
3684*4882a593Smuzhiyun }
3685*4882a593Smuzhiyun
__kmalloc(size_t size,gfp_t flags)3686*4882a593Smuzhiyun void *__kmalloc(size_t size, gfp_t flags)
3687*4882a593Smuzhiyun {
3688*4882a593Smuzhiyun return __do_kmalloc(size, flags, _RET_IP_);
3689*4882a593Smuzhiyun }
3690*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc);
3691*4882a593Smuzhiyun
__kmalloc_track_caller(size_t size,gfp_t flags,unsigned long caller)3692*4882a593Smuzhiyun void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3693*4882a593Smuzhiyun {
3694*4882a593Smuzhiyun return __do_kmalloc(size, flags, caller);
3695*4882a593Smuzhiyun }
3696*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_track_caller);
3697*4882a593Smuzhiyun
3698*4882a593Smuzhiyun /**
3699*4882a593Smuzhiyun * kmem_cache_free - Deallocate an object
3700*4882a593Smuzhiyun * @cachep: The cache the allocation was from.
3701*4882a593Smuzhiyun * @objp: The previously allocated object.
3702*4882a593Smuzhiyun *
3703*4882a593Smuzhiyun * Free an object which was previously allocated from this
3704*4882a593Smuzhiyun * cache.
3705*4882a593Smuzhiyun */
kmem_cache_free(struct kmem_cache * cachep,void * objp)3706*4882a593Smuzhiyun void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3707*4882a593Smuzhiyun {
3708*4882a593Smuzhiyun unsigned long flags;
3709*4882a593Smuzhiyun cachep = cache_from_obj(cachep, objp);
3710*4882a593Smuzhiyun if (!cachep)
3711*4882a593Smuzhiyun return;
3712*4882a593Smuzhiyun
3713*4882a593Smuzhiyun local_irq_save(flags);
3714*4882a593Smuzhiyun debug_check_no_locks_freed(objp, cachep->object_size);
3715*4882a593Smuzhiyun if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3716*4882a593Smuzhiyun debug_check_no_obj_freed(objp, cachep->object_size);
3717*4882a593Smuzhiyun __cache_free(cachep, objp, _RET_IP_);
3718*4882a593Smuzhiyun local_irq_restore(flags);
3719*4882a593Smuzhiyun
3720*4882a593Smuzhiyun trace_kmem_cache_free(_RET_IP_, objp);
3721*4882a593Smuzhiyun }
3722*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_free);
3723*4882a593Smuzhiyun
kmem_cache_free_bulk(struct kmem_cache * orig_s,size_t size,void ** p)3724*4882a593Smuzhiyun void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
3725*4882a593Smuzhiyun {
3726*4882a593Smuzhiyun struct kmem_cache *s;
3727*4882a593Smuzhiyun size_t i;
3728*4882a593Smuzhiyun
3729*4882a593Smuzhiyun local_irq_disable();
3730*4882a593Smuzhiyun for (i = 0; i < size; i++) {
3731*4882a593Smuzhiyun void *objp = p[i];
3732*4882a593Smuzhiyun
3733*4882a593Smuzhiyun if (!orig_s) /* called via kfree_bulk */
3734*4882a593Smuzhiyun s = virt_to_cache(objp);
3735*4882a593Smuzhiyun else
3736*4882a593Smuzhiyun s = cache_from_obj(orig_s, objp);
3737*4882a593Smuzhiyun if (!s)
3738*4882a593Smuzhiyun continue;
3739*4882a593Smuzhiyun
3740*4882a593Smuzhiyun debug_check_no_locks_freed(objp, s->object_size);
3741*4882a593Smuzhiyun if (!(s->flags & SLAB_DEBUG_OBJECTS))
3742*4882a593Smuzhiyun debug_check_no_obj_freed(objp, s->object_size);
3743*4882a593Smuzhiyun
3744*4882a593Smuzhiyun __cache_free(s, objp, _RET_IP_);
3745*4882a593Smuzhiyun }
3746*4882a593Smuzhiyun local_irq_enable();
3747*4882a593Smuzhiyun
3748*4882a593Smuzhiyun /* FIXME: add tracing */
3749*4882a593Smuzhiyun }
3750*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_free_bulk);
3751*4882a593Smuzhiyun
3752*4882a593Smuzhiyun /**
3753*4882a593Smuzhiyun * kfree - free previously allocated memory
3754*4882a593Smuzhiyun * @objp: pointer returned by kmalloc.
3755*4882a593Smuzhiyun *
3756*4882a593Smuzhiyun * If @objp is NULL, no operation is performed.
3757*4882a593Smuzhiyun *
3758*4882a593Smuzhiyun * Don't free memory not originally allocated by kmalloc()
3759*4882a593Smuzhiyun * or you will run into trouble.
3760*4882a593Smuzhiyun */
kfree(const void * objp)3761*4882a593Smuzhiyun void kfree(const void *objp)
3762*4882a593Smuzhiyun {
3763*4882a593Smuzhiyun struct kmem_cache *c;
3764*4882a593Smuzhiyun unsigned long flags;
3765*4882a593Smuzhiyun
3766*4882a593Smuzhiyun trace_kfree(_RET_IP_, objp);
3767*4882a593Smuzhiyun
3768*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(objp)))
3769*4882a593Smuzhiyun return;
3770*4882a593Smuzhiyun local_irq_save(flags);
3771*4882a593Smuzhiyun kfree_debugcheck(objp);
3772*4882a593Smuzhiyun c = virt_to_cache(objp);
3773*4882a593Smuzhiyun if (!c) {
3774*4882a593Smuzhiyun local_irq_restore(flags);
3775*4882a593Smuzhiyun return;
3776*4882a593Smuzhiyun }
3777*4882a593Smuzhiyun debug_check_no_locks_freed(objp, c->object_size);
3778*4882a593Smuzhiyun
3779*4882a593Smuzhiyun debug_check_no_obj_freed(objp, c->object_size);
3780*4882a593Smuzhiyun __cache_free(c, (void *)objp, _RET_IP_);
3781*4882a593Smuzhiyun local_irq_restore(flags);
3782*4882a593Smuzhiyun }
3783*4882a593Smuzhiyun EXPORT_SYMBOL(kfree);
3784*4882a593Smuzhiyun
3785*4882a593Smuzhiyun /*
3786*4882a593Smuzhiyun * This initializes kmem_cache_node or resizes various caches for all nodes.
3787*4882a593Smuzhiyun */
setup_kmem_cache_nodes(struct kmem_cache * cachep,gfp_t gfp)3788*4882a593Smuzhiyun static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
3789*4882a593Smuzhiyun {
3790*4882a593Smuzhiyun int ret;
3791*4882a593Smuzhiyun int node;
3792*4882a593Smuzhiyun struct kmem_cache_node *n;
3793*4882a593Smuzhiyun
3794*4882a593Smuzhiyun for_each_online_node(node) {
3795*4882a593Smuzhiyun ret = setup_kmem_cache_node(cachep, node, gfp, true);
3796*4882a593Smuzhiyun if (ret)
3797*4882a593Smuzhiyun goto fail;
3798*4882a593Smuzhiyun
3799*4882a593Smuzhiyun }
3800*4882a593Smuzhiyun
3801*4882a593Smuzhiyun return 0;
3802*4882a593Smuzhiyun
3803*4882a593Smuzhiyun fail:
3804*4882a593Smuzhiyun if (!cachep->list.next) {
3805*4882a593Smuzhiyun /* Cache is not active yet. Roll back what we did */
3806*4882a593Smuzhiyun node--;
3807*4882a593Smuzhiyun while (node >= 0) {
3808*4882a593Smuzhiyun n = get_node(cachep, node);
3809*4882a593Smuzhiyun if (n) {
3810*4882a593Smuzhiyun kfree(n->shared);
3811*4882a593Smuzhiyun free_alien_cache(n->alien);
3812*4882a593Smuzhiyun kfree(n);
3813*4882a593Smuzhiyun cachep->node[node] = NULL;
3814*4882a593Smuzhiyun }
3815*4882a593Smuzhiyun node--;
3816*4882a593Smuzhiyun }
3817*4882a593Smuzhiyun }
3818*4882a593Smuzhiyun return -ENOMEM;
3819*4882a593Smuzhiyun }
3820*4882a593Smuzhiyun
3821*4882a593Smuzhiyun /* Always called with the slab_mutex held */
do_tune_cpucache(struct kmem_cache * cachep,int limit,int batchcount,int shared,gfp_t gfp)3822*4882a593Smuzhiyun static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3823*4882a593Smuzhiyun int batchcount, int shared, gfp_t gfp)
3824*4882a593Smuzhiyun {
3825*4882a593Smuzhiyun struct array_cache __percpu *cpu_cache, *prev;
3826*4882a593Smuzhiyun int cpu;
3827*4882a593Smuzhiyun
3828*4882a593Smuzhiyun cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
3829*4882a593Smuzhiyun if (!cpu_cache)
3830*4882a593Smuzhiyun return -ENOMEM;
3831*4882a593Smuzhiyun
3832*4882a593Smuzhiyun prev = cachep->cpu_cache;
3833*4882a593Smuzhiyun cachep->cpu_cache = cpu_cache;
3834*4882a593Smuzhiyun /*
3835*4882a593Smuzhiyun * Without a previous cpu_cache there's no need to synchronize remote
3836*4882a593Smuzhiyun * cpus, so skip the IPIs.
3837*4882a593Smuzhiyun */
3838*4882a593Smuzhiyun if (prev)
3839*4882a593Smuzhiyun kick_all_cpus_sync();
3840*4882a593Smuzhiyun
3841*4882a593Smuzhiyun check_irq_on();
3842*4882a593Smuzhiyun cachep->batchcount = batchcount;
3843*4882a593Smuzhiyun cachep->limit = limit;
3844*4882a593Smuzhiyun cachep->shared = shared;
3845*4882a593Smuzhiyun
3846*4882a593Smuzhiyun if (!prev)
3847*4882a593Smuzhiyun goto setup_node;
3848*4882a593Smuzhiyun
3849*4882a593Smuzhiyun for_each_online_cpu(cpu) {
3850*4882a593Smuzhiyun LIST_HEAD(list);
3851*4882a593Smuzhiyun int node;
3852*4882a593Smuzhiyun struct kmem_cache_node *n;
3853*4882a593Smuzhiyun struct array_cache *ac = per_cpu_ptr(prev, cpu);
3854*4882a593Smuzhiyun
3855*4882a593Smuzhiyun node = cpu_to_mem(cpu);
3856*4882a593Smuzhiyun n = get_node(cachep, node);
3857*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
3858*4882a593Smuzhiyun free_block(cachep, ac->entry, ac->avail, node, &list);
3859*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
3860*4882a593Smuzhiyun slabs_destroy(cachep, &list);
3861*4882a593Smuzhiyun }
3862*4882a593Smuzhiyun free_percpu(prev);
3863*4882a593Smuzhiyun
3864*4882a593Smuzhiyun setup_node:
3865*4882a593Smuzhiyun return setup_kmem_cache_nodes(cachep, gfp);
3866*4882a593Smuzhiyun }
3867*4882a593Smuzhiyun
3868*4882a593Smuzhiyun /* Called with slab_mutex held always */
enable_cpucache(struct kmem_cache * cachep,gfp_t gfp)3869*4882a593Smuzhiyun static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3870*4882a593Smuzhiyun {
3871*4882a593Smuzhiyun int err;
3872*4882a593Smuzhiyun int limit = 0;
3873*4882a593Smuzhiyun int shared = 0;
3874*4882a593Smuzhiyun int batchcount = 0;
3875*4882a593Smuzhiyun
3876*4882a593Smuzhiyun err = cache_random_seq_create(cachep, cachep->num, gfp);
3877*4882a593Smuzhiyun if (err)
3878*4882a593Smuzhiyun goto end;
3879*4882a593Smuzhiyun
3880*4882a593Smuzhiyun if (limit && shared && batchcount)
3881*4882a593Smuzhiyun goto skip_setup;
3882*4882a593Smuzhiyun /*
3883*4882a593Smuzhiyun * The head array serves three purposes:
3884*4882a593Smuzhiyun * - create a LIFO ordering, i.e. return objects that are cache-warm
3885*4882a593Smuzhiyun * - reduce the number of spinlock operations.
3886*4882a593Smuzhiyun * - reduce the number of linked list operations on the slab and
3887*4882a593Smuzhiyun * bufctl chains: array operations are cheaper.
3888*4882a593Smuzhiyun * The numbers are guessed, we should auto-tune as described by
3889*4882a593Smuzhiyun * Bonwick.
3890*4882a593Smuzhiyun */
3891*4882a593Smuzhiyun if (cachep->size > 131072)
3892*4882a593Smuzhiyun limit = 1;
3893*4882a593Smuzhiyun else if (cachep->size > PAGE_SIZE)
3894*4882a593Smuzhiyun limit = 8;
3895*4882a593Smuzhiyun else if (cachep->size > 1024)
3896*4882a593Smuzhiyun limit = 24;
3897*4882a593Smuzhiyun else if (cachep->size > 256)
3898*4882a593Smuzhiyun limit = 54;
3899*4882a593Smuzhiyun else
3900*4882a593Smuzhiyun limit = 120;
3901*4882a593Smuzhiyun
3902*4882a593Smuzhiyun /*
3903*4882a593Smuzhiyun * CPU bound tasks (e.g. network routing) can exhibit cpu bound
3904*4882a593Smuzhiyun * allocation behaviour: Most allocs on one cpu, most free operations
3905*4882a593Smuzhiyun * on another cpu. For these cases, an efficient object passing between
3906*4882a593Smuzhiyun * cpus is necessary. This is provided by a shared array. The array
3907*4882a593Smuzhiyun * replaces Bonwick's magazine layer.
3908*4882a593Smuzhiyun * On uniprocessor, it's functionally equivalent (but less efficient)
3909*4882a593Smuzhiyun * to a larger limit. Thus disabled by default.
3910*4882a593Smuzhiyun */
3911*4882a593Smuzhiyun shared = 0;
3912*4882a593Smuzhiyun if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
3913*4882a593Smuzhiyun shared = 8;
3914*4882a593Smuzhiyun
3915*4882a593Smuzhiyun #if DEBUG
3916*4882a593Smuzhiyun /*
3917*4882a593Smuzhiyun * With debugging enabled, large batchcount lead to excessively long
3918*4882a593Smuzhiyun * periods with disabled local interrupts. Limit the batchcount
3919*4882a593Smuzhiyun */
3920*4882a593Smuzhiyun if (limit > 32)
3921*4882a593Smuzhiyun limit = 32;
3922*4882a593Smuzhiyun #endif
3923*4882a593Smuzhiyun batchcount = (limit + 1) / 2;
3924*4882a593Smuzhiyun skip_setup:
3925*4882a593Smuzhiyun err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
3926*4882a593Smuzhiyun end:
3927*4882a593Smuzhiyun if (err)
3928*4882a593Smuzhiyun pr_err("enable_cpucache failed for %s, error %d\n",
3929*4882a593Smuzhiyun cachep->name, -err);
3930*4882a593Smuzhiyun return err;
3931*4882a593Smuzhiyun }
3932*4882a593Smuzhiyun
3933*4882a593Smuzhiyun /*
3934*4882a593Smuzhiyun * Drain an array if it contains any elements taking the node lock only if
3935*4882a593Smuzhiyun * necessary. Note that the node listlock also protects the array_cache
3936*4882a593Smuzhiyun * if drain_array() is used on the shared array.
3937*4882a593Smuzhiyun */
drain_array(struct kmem_cache * cachep,struct kmem_cache_node * n,struct array_cache * ac,int node)3938*4882a593Smuzhiyun static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
3939*4882a593Smuzhiyun struct array_cache *ac, int node)
3940*4882a593Smuzhiyun {
3941*4882a593Smuzhiyun LIST_HEAD(list);
3942*4882a593Smuzhiyun
3943*4882a593Smuzhiyun /* ac from n->shared can be freed if we don't hold the slab_mutex. */
3944*4882a593Smuzhiyun check_mutex_acquired();
3945*4882a593Smuzhiyun
3946*4882a593Smuzhiyun if (!ac || !ac->avail)
3947*4882a593Smuzhiyun return;
3948*4882a593Smuzhiyun
3949*4882a593Smuzhiyun if (ac->touched) {
3950*4882a593Smuzhiyun ac->touched = 0;
3951*4882a593Smuzhiyun return;
3952*4882a593Smuzhiyun }
3953*4882a593Smuzhiyun
3954*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
3955*4882a593Smuzhiyun drain_array_locked(cachep, ac, node, false, &list);
3956*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
3957*4882a593Smuzhiyun
3958*4882a593Smuzhiyun slabs_destroy(cachep, &list);
3959*4882a593Smuzhiyun }
3960*4882a593Smuzhiyun
3961*4882a593Smuzhiyun /**
3962*4882a593Smuzhiyun * cache_reap - Reclaim memory from caches.
3963*4882a593Smuzhiyun * @w: work descriptor
3964*4882a593Smuzhiyun *
3965*4882a593Smuzhiyun * Called from workqueue/eventd every few seconds.
3966*4882a593Smuzhiyun * Purpose:
3967*4882a593Smuzhiyun * - clear the per-cpu caches for this CPU.
3968*4882a593Smuzhiyun * - return freeable pages to the main free memory pool.
3969*4882a593Smuzhiyun *
3970*4882a593Smuzhiyun * If we cannot acquire the cache chain mutex then just give up - we'll try
3971*4882a593Smuzhiyun * again on the next iteration.
3972*4882a593Smuzhiyun */
cache_reap(struct work_struct * w)3973*4882a593Smuzhiyun static void cache_reap(struct work_struct *w)
3974*4882a593Smuzhiyun {
3975*4882a593Smuzhiyun struct kmem_cache *searchp;
3976*4882a593Smuzhiyun struct kmem_cache_node *n;
3977*4882a593Smuzhiyun int node = numa_mem_id();
3978*4882a593Smuzhiyun struct delayed_work *work = to_delayed_work(w);
3979*4882a593Smuzhiyun
3980*4882a593Smuzhiyun if (!mutex_trylock(&slab_mutex))
3981*4882a593Smuzhiyun /* Give up. Setup the next iteration. */
3982*4882a593Smuzhiyun goto out;
3983*4882a593Smuzhiyun
3984*4882a593Smuzhiyun list_for_each_entry(searchp, &slab_caches, list) {
3985*4882a593Smuzhiyun check_irq_on();
3986*4882a593Smuzhiyun
3987*4882a593Smuzhiyun /*
3988*4882a593Smuzhiyun * We only take the node lock if absolutely necessary and we
3989*4882a593Smuzhiyun * have established with reasonable certainty that
3990*4882a593Smuzhiyun * we can do some work if the lock was obtained.
3991*4882a593Smuzhiyun */
3992*4882a593Smuzhiyun n = get_node(searchp, node);
3993*4882a593Smuzhiyun
3994*4882a593Smuzhiyun reap_alien(searchp, n);
3995*4882a593Smuzhiyun
3996*4882a593Smuzhiyun drain_array(searchp, n, cpu_cache_get(searchp), node);
3997*4882a593Smuzhiyun
3998*4882a593Smuzhiyun /*
3999*4882a593Smuzhiyun * These are racy checks but it does not matter
4000*4882a593Smuzhiyun * if we skip one check or scan twice.
4001*4882a593Smuzhiyun */
4002*4882a593Smuzhiyun if (time_after(n->next_reap, jiffies))
4003*4882a593Smuzhiyun goto next;
4004*4882a593Smuzhiyun
4005*4882a593Smuzhiyun n->next_reap = jiffies + REAPTIMEOUT_NODE;
4006*4882a593Smuzhiyun
4007*4882a593Smuzhiyun drain_array(searchp, n, n->shared, node);
4008*4882a593Smuzhiyun
4009*4882a593Smuzhiyun if (n->free_touched)
4010*4882a593Smuzhiyun n->free_touched = 0;
4011*4882a593Smuzhiyun else {
4012*4882a593Smuzhiyun int freed;
4013*4882a593Smuzhiyun
4014*4882a593Smuzhiyun freed = drain_freelist(searchp, n, (n->free_limit +
4015*4882a593Smuzhiyun 5 * searchp->num - 1) / (5 * searchp->num));
4016*4882a593Smuzhiyun STATS_ADD_REAPED(searchp, freed);
4017*4882a593Smuzhiyun }
4018*4882a593Smuzhiyun next:
4019*4882a593Smuzhiyun cond_resched();
4020*4882a593Smuzhiyun }
4021*4882a593Smuzhiyun check_irq_on();
4022*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
4023*4882a593Smuzhiyun next_reap_node();
4024*4882a593Smuzhiyun out:
4025*4882a593Smuzhiyun /* Set up the next iteration */
4026*4882a593Smuzhiyun schedule_delayed_work_on(smp_processor_id(), work,
4027*4882a593Smuzhiyun round_jiffies_relative(REAPTIMEOUT_AC));
4028*4882a593Smuzhiyun }
4029*4882a593Smuzhiyun
get_slabinfo(struct kmem_cache * cachep,struct slabinfo * sinfo)4030*4882a593Smuzhiyun void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4031*4882a593Smuzhiyun {
4032*4882a593Smuzhiyun unsigned long active_objs, num_objs, active_slabs;
4033*4882a593Smuzhiyun unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0;
4034*4882a593Smuzhiyun unsigned long free_slabs = 0;
4035*4882a593Smuzhiyun int node;
4036*4882a593Smuzhiyun struct kmem_cache_node *n;
4037*4882a593Smuzhiyun
4038*4882a593Smuzhiyun for_each_kmem_cache_node(cachep, node, n) {
4039*4882a593Smuzhiyun check_irq_on();
4040*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
4041*4882a593Smuzhiyun
4042*4882a593Smuzhiyun total_slabs += n->total_slabs;
4043*4882a593Smuzhiyun free_slabs += n->free_slabs;
4044*4882a593Smuzhiyun free_objs += n->free_objects;
4045*4882a593Smuzhiyun
4046*4882a593Smuzhiyun if (n->shared)
4047*4882a593Smuzhiyun shared_avail += n->shared->avail;
4048*4882a593Smuzhiyun
4049*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
4050*4882a593Smuzhiyun }
4051*4882a593Smuzhiyun num_objs = total_slabs * cachep->num;
4052*4882a593Smuzhiyun active_slabs = total_slabs - free_slabs;
4053*4882a593Smuzhiyun active_objs = num_objs - free_objs;
4054*4882a593Smuzhiyun
4055*4882a593Smuzhiyun sinfo->active_objs = active_objs;
4056*4882a593Smuzhiyun sinfo->num_objs = num_objs;
4057*4882a593Smuzhiyun sinfo->active_slabs = active_slabs;
4058*4882a593Smuzhiyun sinfo->num_slabs = total_slabs;
4059*4882a593Smuzhiyun sinfo->shared_avail = shared_avail;
4060*4882a593Smuzhiyun sinfo->limit = cachep->limit;
4061*4882a593Smuzhiyun sinfo->batchcount = cachep->batchcount;
4062*4882a593Smuzhiyun sinfo->shared = cachep->shared;
4063*4882a593Smuzhiyun sinfo->objects_per_slab = cachep->num;
4064*4882a593Smuzhiyun sinfo->cache_order = cachep->gfporder;
4065*4882a593Smuzhiyun }
4066*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(get_slabinfo);
4067*4882a593Smuzhiyun
slabinfo_show_stats(struct seq_file * m,struct kmem_cache * cachep)4068*4882a593Smuzhiyun void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
4069*4882a593Smuzhiyun {
4070*4882a593Smuzhiyun #if STATS
4071*4882a593Smuzhiyun { /* node stats */
4072*4882a593Smuzhiyun unsigned long high = cachep->high_mark;
4073*4882a593Smuzhiyun unsigned long allocs = cachep->num_allocations;
4074*4882a593Smuzhiyun unsigned long grown = cachep->grown;
4075*4882a593Smuzhiyun unsigned long reaped = cachep->reaped;
4076*4882a593Smuzhiyun unsigned long errors = cachep->errors;
4077*4882a593Smuzhiyun unsigned long max_freeable = cachep->max_freeable;
4078*4882a593Smuzhiyun unsigned long node_allocs = cachep->node_allocs;
4079*4882a593Smuzhiyun unsigned long node_frees = cachep->node_frees;
4080*4882a593Smuzhiyun unsigned long overflows = cachep->node_overflow;
4081*4882a593Smuzhiyun
4082*4882a593Smuzhiyun seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu %4lu %4lu %4lu %4lu %4lu",
4083*4882a593Smuzhiyun allocs, high, grown,
4084*4882a593Smuzhiyun reaped, errors, max_freeable, node_allocs,
4085*4882a593Smuzhiyun node_frees, overflows);
4086*4882a593Smuzhiyun }
4087*4882a593Smuzhiyun /* cpu stats */
4088*4882a593Smuzhiyun {
4089*4882a593Smuzhiyun unsigned long allochit = atomic_read(&cachep->allochit);
4090*4882a593Smuzhiyun unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4091*4882a593Smuzhiyun unsigned long freehit = atomic_read(&cachep->freehit);
4092*4882a593Smuzhiyun unsigned long freemiss = atomic_read(&cachep->freemiss);
4093*4882a593Smuzhiyun
4094*4882a593Smuzhiyun seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4095*4882a593Smuzhiyun allochit, allocmiss, freehit, freemiss);
4096*4882a593Smuzhiyun }
4097*4882a593Smuzhiyun #endif
4098*4882a593Smuzhiyun }
4099*4882a593Smuzhiyun
4100*4882a593Smuzhiyun #define MAX_SLABINFO_WRITE 128
4101*4882a593Smuzhiyun /**
4102*4882a593Smuzhiyun * slabinfo_write - Tuning for the slab allocator
4103*4882a593Smuzhiyun * @file: unused
4104*4882a593Smuzhiyun * @buffer: user buffer
4105*4882a593Smuzhiyun * @count: data length
4106*4882a593Smuzhiyun * @ppos: unused
4107*4882a593Smuzhiyun *
4108*4882a593Smuzhiyun * Return: %0 on success, negative error code otherwise.
4109*4882a593Smuzhiyun */
slabinfo_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)4110*4882a593Smuzhiyun ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4111*4882a593Smuzhiyun size_t count, loff_t *ppos)
4112*4882a593Smuzhiyun {
4113*4882a593Smuzhiyun char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4114*4882a593Smuzhiyun int limit, batchcount, shared, res;
4115*4882a593Smuzhiyun struct kmem_cache *cachep;
4116*4882a593Smuzhiyun
4117*4882a593Smuzhiyun if (count > MAX_SLABINFO_WRITE)
4118*4882a593Smuzhiyun return -EINVAL;
4119*4882a593Smuzhiyun if (copy_from_user(&kbuf, buffer, count))
4120*4882a593Smuzhiyun return -EFAULT;
4121*4882a593Smuzhiyun kbuf[MAX_SLABINFO_WRITE] = '\0';
4122*4882a593Smuzhiyun
4123*4882a593Smuzhiyun tmp = strchr(kbuf, ' ');
4124*4882a593Smuzhiyun if (!tmp)
4125*4882a593Smuzhiyun return -EINVAL;
4126*4882a593Smuzhiyun *tmp = '\0';
4127*4882a593Smuzhiyun tmp++;
4128*4882a593Smuzhiyun if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4129*4882a593Smuzhiyun return -EINVAL;
4130*4882a593Smuzhiyun
4131*4882a593Smuzhiyun /* Find the cache in the chain of caches. */
4132*4882a593Smuzhiyun mutex_lock(&slab_mutex);
4133*4882a593Smuzhiyun res = -EINVAL;
4134*4882a593Smuzhiyun list_for_each_entry(cachep, &slab_caches, list) {
4135*4882a593Smuzhiyun if (!strcmp(cachep->name, kbuf)) {
4136*4882a593Smuzhiyun if (limit < 1 || batchcount < 1 ||
4137*4882a593Smuzhiyun batchcount > limit || shared < 0) {
4138*4882a593Smuzhiyun res = 0;
4139*4882a593Smuzhiyun } else {
4140*4882a593Smuzhiyun res = do_tune_cpucache(cachep, limit,
4141*4882a593Smuzhiyun batchcount, shared,
4142*4882a593Smuzhiyun GFP_KERNEL);
4143*4882a593Smuzhiyun }
4144*4882a593Smuzhiyun break;
4145*4882a593Smuzhiyun }
4146*4882a593Smuzhiyun }
4147*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
4148*4882a593Smuzhiyun if (res >= 0)
4149*4882a593Smuzhiyun res = count;
4150*4882a593Smuzhiyun return res;
4151*4882a593Smuzhiyun }
4152*4882a593Smuzhiyun
4153*4882a593Smuzhiyun #ifdef CONFIG_HARDENED_USERCOPY
4154*4882a593Smuzhiyun /*
4155*4882a593Smuzhiyun * Rejects incorrectly sized objects and objects that are to be copied
4156*4882a593Smuzhiyun * to/from userspace but do not fall entirely within the containing slab
4157*4882a593Smuzhiyun * cache's usercopy region.
4158*4882a593Smuzhiyun *
4159*4882a593Smuzhiyun * Returns NULL if check passes, otherwise const char * to name of cache
4160*4882a593Smuzhiyun * to indicate an error.
4161*4882a593Smuzhiyun */
__check_heap_object(const void * ptr,unsigned long n,struct page * page,bool to_user)4162*4882a593Smuzhiyun void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
4163*4882a593Smuzhiyun bool to_user)
4164*4882a593Smuzhiyun {
4165*4882a593Smuzhiyun struct kmem_cache *cachep;
4166*4882a593Smuzhiyun unsigned int objnr;
4167*4882a593Smuzhiyun unsigned long offset;
4168*4882a593Smuzhiyun
4169*4882a593Smuzhiyun ptr = kasan_reset_tag(ptr);
4170*4882a593Smuzhiyun
4171*4882a593Smuzhiyun /* Find and validate object. */
4172*4882a593Smuzhiyun cachep = page->slab_cache;
4173*4882a593Smuzhiyun objnr = obj_to_index(cachep, page, (void *)ptr);
4174*4882a593Smuzhiyun BUG_ON(objnr >= cachep->num);
4175*4882a593Smuzhiyun
4176*4882a593Smuzhiyun /* Find offset within object. */
4177*4882a593Smuzhiyun if (is_kfence_address(ptr))
4178*4882a593Smuzhiyun offset = ptr - kfence_object_start(ptr);
4179*4882a593Smuzhiyun else
4180*4882a593Smuzhiyun offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
4181*4882a593Smuzhiyun
4182*4882a593Smuzhiyun /* Allow address range falling entirely within usercopy region. */
4183*4882a593Smuzhiyun if (offset >= cachep->useroffset &&
4184*4882a593Smuzhiyun offset - cachep->useroffset <= cachep->usersize &&
4185*4882a593Smuzhiyun n <= cachep->useroffset - offset + cachep->usersize)
4186*4882a593Smuzhiyun return;
4187*4882a593Smuzhiyun
4188*4882a593Smuzhiyun /*
4189*4882a593Smuzhiyun * If the copy is still within the allocated object, produce
4190*4882a593Smuzhiyun * a warning instead of rejecting the copy. This is intended
4191*4882a593Smuzhiyun * to be a temporary method to find any missing usercopy
4192*4882a593Smuzhiyun * whitelists.
4193*4882a593Smuzhiyun */
4194*4882a593Smuzhiyun if (usercopy_fallback &&
4195*4882a593Smuzhiyun offset <= cachep->object_size &&
4196*4882a593Smuzhiyun n <= cachep->object_size - offset) {
4197*4882a593Smuzhiyun usercopy_warn("SLAB object", cachep->name, to_user, offset, n);
4198*4882a593Smuzhiyun return;
4199*4882a593Smuzhiyun }
4200*4882a593Smuzhiyun
4201*4882a593Smuzhiyun usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
4202*4882a593Smuzhiyun }
4203*4882a593Smuzhiyun #endif /* CONFIG_HARDENED_USERCOPY */
4204*4882a593Smuzhiyun
4205*4882a593Smuzhiyun /**
4206*4882a593Smuzhiyun * __ksize -- Uninstrumented ksize.
4207*4882a593Smuzhiyun * @objp: pointer to the object
4208*4882a593Smuzhiyun *
4209*4882a593Smuzhiyun * Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
4210*4882a593Smuzhiyun * safety checks as ksize() with KASAN instrumentation enabled.
4211*4882a593Smuzhiyun *
4212*4882a593Smuzhiyun * Return: size of the actual memory used by @objp in bytes
4213*4882a593Smuzhiyun */
__ksize(const void * objp)4214*4882a593Smuzhiyun size_t __ksize(const void *objp)
4215*4882a593Smuzhiyun {
4216*4882a593Smuzhiyun struct kmem_cache *c;
4217*4882a593Smuzhiyun size_t size;
4218*4882a593Smuzhiyun
4219*4882a593Smuzhiyun BUG_ON(!objp);
4220*4882a593Smuzhiyun if (unlikely(objp == ZERO_SIZE_PTR))
4221*4882a593Smuzhiyun return 0;
4222*4882a593Smuzhiyun
4223*4882a593Smuzhiyun c = virt_to_cache(objp);
4224*4882a593Smuzhiyun size = c ? c->object_size : 0;
4225*4882a593Smuzhiyun
4226*4882a593Smuzhiyun return size;
4227*4882a593Smuzhiyun }
4228*4882a593Smuzhiyun EXPORT_SYMBOL(__ksize);
4229