1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * SLUB: A slab allocator that limits cache line use instead of queuing
4*4882a593Smuzhiyun * objects in per cpu and per node lists.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * The allocator synchronizes using per slab locks or atomic operatios
7*4882a593Smuzhiyun * and only uses a centralized lock to manage a pool of partial slabs.
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * (C) 2007 SGI, Christoph Lameter
10*4882a593Smuzhiyun * (C) 2011 Linux Foundation, Christoph Lameter
11*4882a593Smuzhiyun */
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include <linux/mm.h>
14*4882a593Smuzhiyun #include <linux/swap.h> /* struct reclaim_state */
15*4882a593Smuzhiyun #include <linux/module.h>
16*4882a593Smuzhiyun #include <linux/bit_spinlock.h>
17*4882a593Smuzhiyun #include <linux/interrupt.h>
18*4882a593Smuzhiyun #include <linux/swab.h>
19*4882a593Smuzhiyun #include <linux/bitops.h>
20*4882a593Smuzhiyun #include <linux/slab.h>
21*4882a593Smuzhiyun #include "slab.h"
22*4882a593Smuzhiyun #include <linux/proc_fs.h>
23*4882a593Smuzhiyun #include <linux/seq_file.h>
24*4882a593Smuzhiyun #include <linux/kasan.h>
25*4882a593Smuzhiyun #include <linux/cpu.h>
26*4882a593Smuzhiyun #include <linux/cpuset.h>
27*4882a593Smuzhiyun #include <linux/mempolicy.h>
28*4882a593Smuzhiyun #include <linux/ctype.h>
29*4882a593Smuzhiyun #include <linux/debugobjects.h>
30*4882a593Smuzhiyun #include <linux/kallsyms.h>
31*4882a593Smuzhiyun #include <linux/kfence.h>
32*4882a593Smuzhiyun #include <linux/memory.h>
33*4882a593Smuzhiyun #include <linux/math64.h>
34*4882a593Smuzhiyun #include <linux/fault-inject.h>
35*4882a593Smuzhiyun #include <linux/stacktrace.h>
36*4882a593Smuzhiyun #include <linux/prefetch.h>
37*4882a593Smuzhiyun #include <linux/memcontrol.h>
38*4882a593Smuzhiyun #include <linux/random.h>
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun #include <linux/debugfs.h>
41*4882a593Smuzhiyun #include <trace/events/kmem.h>
42*4882a593Smuzhiyun #include <trace/hooks/mm.h>
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun #include "internal.h"
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun /*
47*4882a593Smuzhiyun * Lock order:
48*4882a593Smuzhiyun * 1. slab_mutex (Global Mutex)
49*4882a593Smuzhiyun * 2. node->list_lock
50*4882a593Smuzhiyun * 3. slab_lock(page) (Only on some arches and for debugging)
51*4882a593Smuzhiyun *
52*4882a593Smuzhiyun * slab_mutex
53*4882a593Smuzhiyun *
54*4882a593Smuzhiyun * The role of the slab_mutex is to protect the list of all the slabs
55*4882a593Smuzhiyun * and to synchronize major metadata changes to slab cache structures.
56*4882a593Smuzhiyun *
57*4882a593Smuzhiyun * The slab_lock is only used for debugging and on arches that do not
58*4882a593Smuzhiyun * have the ability to do a cmpxchg_double. It only protects:
59*4882a593Smuzhiyun * A. page->freelist -> List of object free in a page
60*4882a593Smuzhiyun * B. page->inuse -> Number of objects in use
61*4882a593Smuzhiyun * C. page->objects -> Number of objects in page
62*4882a593Smuzhiyun * D. page->frozen -> frozen state
63*4882a593Smuzhiyun *
64*4882a593Smuzhiyun * If a slab is frozen then it is exempt from list management. It is not
65*4882a593Smuzhiyun * on any list except per cpu partial list. The processor that froze the
66*4882a593Smuzhiyun * slab is the one who can perform list operations on the page. Other
67*4882a593Smuzhiyun * processors may put objects onto the freelist but the processor that
68*4882a593Smuzhiyun * froze the slab is the only one that can retrieve the objects from the
69*4882a593Smuzhiyun * page's freelist.
70*4882a593Smuzhiyun *
71*4882a593Smuzhiyun * The list_lock protects the partial and full list on each node and
72*4882a593Smuzhiyun * the partial slab counter. If taken then no new slabs may be added or
73*4882a593Smuzhiyun * removed from the lists nor make the number of partial slabs be modified.
74*4882a593Smuzhiyun * (Note that the total number of slabs is an atomic value that may be
75*4882a593Smuzhiyun * modified without taking the list lock).
76*4882a593Smuzhiyun *
77*4882a593Smuzhiyun * The list_lock is a centralized lock and thus we avoid taking it as
78*4882a593Smuzhiyun * much as possible. As long as SLUB does not have to handle partial
79*4882a593Smuzhiyun * slabs, operations can continue without any centralized lock. F.e.
80*4882a593Smuzhiyun * allocating a long series of objects that fill up slabs does not require
81*4882a593Smuzhiyun * the list lock.
82*4882a593Smuzhiyun * Interrupts are disabled during allocation and deallocation in order to
83*4882a593Smuzhiyun * make the slab allocator safe to use in the context of an irq. In addition
84*4882a593Smuzhiyun * interrupts are disabled to ensure that the processor does not change
85*4882a593Smuzhiyun * while handling per_cpu slabs, due to kernel preemption.
86*4882a593Smuzhiyun *
87*4882a593Smuzhiyun * SLUB assigns one slab for allocation to each processor.
88*4882a593Smuzhiyun * Allocations only occur from these slabs called cpu slabs.
89*4882a593Smuzhiyun *
90*4882a593Smuzhiyun * Slabs with free elements are kept on a partial list and during regular
91*4882a593Smuzhiyun * operations no list for full slabs is used. If an object in a full slab is
92*4882a593Smuzhiyun * freed then the slab will show up again on the partial lists.
93*4882a593Smuzhiyun * We track full slabs for debugging purposes though because otherwise we
94*4882a593Smuzhiyun * cannot scan all objects.
95*4882a593Smuzhiyun *
96*4882a593Smuzhiyun * Slabs are freed when they become empty. Teardown and setup is
97*4882a593Smuzhiyun * minimal so we rely on the page allocators per cpu caches for
98*4882a593Smuzhiyun * fast frees and allocs.
99*4882a593Smuzhiyun *
100*4882a593Smuzhiyun * page->frozen The slab is frozen and exempt from list processing.
101*4882a593Smuzhiyun * This means that the slab is dedicated to a purpose
102*4882a593Smuzhiyun * such as satisfying allocations for a specific
103*4882a593Smuzhiyun * processor. Objects may be freed in the slab while
104*4882a593Smuzhiyun * it is frozen but slab_free will then skip the usual
105*4882a593Smuzhiyun * list operations. It is up to the processor holding
106*4882a593Smuzhiyun * the slab to integrate the slab into the slab lists
107*4882a593Smuzhiyun * when the slab is no longer needed.
108*4882a593Smuzhiyun *
109*4882a593Smuzhiyun * One use of this flag is to mark slabs that are
110*4882a593Smuzhiyun * used for allocations. Then such a slab becomes a cpu
111*4882a593Smuzhiyun * slab. The cpu slab may be equipped with an additional
112*4882a593Smuzhiyun * freelist that allows lockless access to
113*4882a593Smuzhiyun * free objects in addition to the regular freelist
114*4882a593Smuzhiyun * that requires the slab lock.
115*4882a593Smuzhiyun *
116*4882a593Smuzhiyun * SLAB_DEBUG_FLAGS Slab requires special handling due to debug
117*4882a593Smuzhiyun * options set. This moves slab handling out of
118*4882a593Smuzhiyun * the fast path and disables lockless freelists.
119*4882a593Smuzhiyun */
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
122*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG_ON
123*4882a593Smuzhiyun DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
124*4882a593Smuzhiyun #else
125*4882a593Smuzhiyun DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
126*4882a593Smuzhiyun #endif
127*4882a593Smuzhiyun #endif
128*4882a593Smuzhiyun
kmem_cache_debug(struct kmem_cache * s)129*4882a593Smuzhiyun static inline bool kmem_cache_debug(struct kmem_cache *s)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
fixup_red_left(struct kmem_cache * s,void * p)134*4882a593Smuzhiyun void *fixup_red_left(struct kmem_cache *s, void *p)
135*4882a593Smuzhiyun {
136*4882a593Smuzhiyun if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
137*4882a593Smuzhiyun p += s->red_left_pad;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun return p;
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun
kmem_cache_has_cpu_partial(struct kmem_cache * s)142*4882a593Smuzhiyun static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun #ifdef CONFIG_SLUB_CPU_PARTIAL
145*4882a593Smuzhiyun return !kmem_cache_debug(s);
146*4882a593Smuzhiyun #else
147*4882a593Smuzhiyun return false;
148*4882a593Smuzhiyun #endif
149*4882a593Smuzhiyun }
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun /*
152*4882a593Smuzhiyun * Issues still to be resolved:
153*4882a593Smuzhiyun *
154*4882a593Smuzhiyun * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
155*4882a593Smuzhiyun *
156*4882a593Smuzhiyun * - Variable sizing of the per node arrays
157*4882a593Smuzhiyun */
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun /* Enable to test recovery from slab corruption on boot */
160*4882a593Smuzhiyun #undef SLUB_RESILIENCY_TEST
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun /* Enable to log cmpxchg failures */
163*4882a593Smuzhiyun #undef SLUB_DEBUG_CMPXCHG
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun /*
166*4882a593Smuzhiyun * Mininum number of partial slabs. These will be left on the partial
167*4882a593Smuzhiyun * lists even if they are empty. kmem_cache_shrink may reclaim them.
168*4882a593Smuzhiyun */
169*4882a593Smuzhiyun #define MIN_PARTIAL 5
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun /*
172*4882a593Smuzhiyun * Maximum number of desirable partial slabs.
173*4882a593Smuzhiyun * The existence of more partial slabs makes kmem_cache_shrink
174*4882a593Smuzhiyun * sort the partial list by the number of objects in use.
175*4882a593Smuzhiyun */
176*4882a593Smuzhiyun #define MAX_PARTIAL 10
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
179*4882a593Smuzhiyun SLAB_POISON | SLAB_STORE_USER)
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun /*
182*4882a593Smuzhiyun * These debug flags cannot use CMPXCHG because there might be consistency
183*4882a593Smuzhiyun * issues when checking or reading debug information
184*4882a593Smuzhiyun */
185*4882a593Smuzhiyun #define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
186*4882a593Smuzhiyun SLAB_TRACE)
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun /*
190*4882a593Smuzhiyun * Debugging flags that require metadata to be stored in the slab. These get
191*4882a593Smuzhiyun * disabled when slub_debug=O is used and a cache's min order increases with
192*4882a593Smuzhiyun * metadata.
193*4882a593Smuzhiyun */
194*4882a593Smuzhiyun #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun #define OO_SHIFT 16
197*4882a593Smuzhiyun #define OO_MASK ((1 << OO_SHIFT) - 1)
198*4882a593Smuzhiyun #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun /* Internal SLUB flags */
201*4882a593Smuzhiyun /* Poison object */
202*4882a593Smuzhiyun #define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
203*4882a593Smuzhiyun /* Use cmpxchg_double */
204*4882a593Smuzhiyun #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun #ifdef CONFIG_SLUB_SYSFS
207*4882a593Smuzhiyun static int sysfs_slab_add(struct kmem_cache *);
208*4882a593Smuzhiyun static int sysfs_slab_alias(struct kmem_cache *, const char *);
209*4882a593Smuzhiyun #else
sysfs_slab_add(struct kmem_cache * s)210*4882a593Smuzhiyun static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
sysfs_slab_alias(struct kmem_cache * s,const char * p)211*4882a593Smuzhiyun static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
212*4882a593Smuzhiyun { return 0; }
213*4882a593Smuzhiyun #endif
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
216*4882a593Smuzhiyun static void debugfs_slab_add(struct kmem_cache *);
217*4882a593Smuzhiyun #else
debugfs_slab_add(struct kmem_cache * s)218*4882a593Smuzhiyun static inline void debugfs_slab_add(struct kmem_cache *s) { }
219*4882a593Smuzhiyun #endif
220*4882a593Smuzhiyun
stat(const struct kmem_cache * s,enum stat_item si)221*4882a593Smuzhiyun static inline void stat(const struct kmem_cache *s, enum stat_item si)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun #ifdef CONFIG_SLUB_STATS
224*4882a593Smuzhiyun /*
225*4882a593Smuzhiyun * The rmw is racy on a preemptible kernel but this is acceptable, so
226*4882a593Smuzhiyun * avoid this_cpu_add()'s irq-disable overhead.
227*4882a593Smuzhiyun */
228*4882a593Smuzhiyun raw_cpu_inc(s->cpu_slab->stat[si]);
229*4882a593Smuzhiyun #endif
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun /********************************************************************
233*4882a593Smuzhiyun * Core slab cache functions
234*4882a593Smuzhiyun *******************************************************************/
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun /*
237*4882a593Smuzhiyun * Returns freelist pointer (ptr). With hardening, this is obfuscated
238*4882a593Smuzhiyun * with an XOR of the address where the pointer is held and a per-cache
239*4882a593Smuzhiyun * random number.
240*4882a593Smuzhiyun */
freelist_ptr(const struct kmem_cache * s,void * ptr,unsigned long ptr_addr)241*4882a593Smuzhiyun static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
242*4882a593Smuzhiyun unsigned long ptr_addr)
243*4882a593Smuzhiyun {
244*4882a593Smuzhiyun #ifdef CONFIG_SLAB_FREELIST_HARDENED
245*4882a593Smuzhiyun /*
246*4882a593Smuzhiyun * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
247*4882a593Smuzhiyun * Normally, this doesn't cause any issues, as both set_freepointer()
248*4882a593Smuzhiyun * and get_freepointer() are called with a pointer with the same tag.
249*4882a593Smuzhiyun * However, there are some issues with CONFIG_SLUB_DEBUG code. For
250*4882a593Smuzhiyun * example, when __free_slub() iterates over objects in a cache, it
251*4882a593Smuzhiyun * passes untagged pointers to check_object(). check_object() in turns
252*4882a593Smuzhiyun * calls get_freepointer() with an untagged pointer, which causes the
253*4882a593Smuzhiyun * freepointer to be restored incorrectly.
254*4882a593Smuzhiyun */
255*4882a593Smuzhiyun return (void *)((unsigned long)ptr ^ s->random ^
256*4882a593Smuzhiyun swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
257*4882a593Smuzhiyun #else
258*4882a593Smuzhiyun return ptr;
259*4882a593Smuzhiyun #endif
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun /* Returns the freelist pointer recorded at location ptr_addr. */
freelist_dereference(const struct kmem_cache * s,void * ptr_addr)263*4882a593Smuzhiyun static inline void *freelist_dereference(const struct kmem_cache *s,
264*4882a593Smuzhiyun void *ptr_addr)
265*4882a593Smuzhiyun {
266*4882a593Smuzhiyun return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
267*4882a593Smuzhiyun (unsigned long)ptr_addr);
268*4882a593Smuzhiyun }
269*4882a593Smuzhiyun
get_freepointer(struct kmem_cache * s,void * object)270*4882a593Smuzhiyun static inline void *get_freepointer(struct kmem_cache *s, void *object)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun object = kasan_reset_tag(object);
273*4882a593Smuzhiyun return freelist_dereference(s, object + s->offset);
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun
prefetch_freepointer(const struct kmem_cache * s,void * object)276*4882a593Smuzhiyun static void prefetch_freepointer(const struct kmem_cache *s, void *object)
277*4882a593Smuzhiyun {
278*4882a593Smuzhiyun prefetch(object + s->offset);
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun
get_freepointer_safe(struct kmem_cache * s,void * object)281*4882a593Smuzhiyun static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
282*4882a593Smuzhiyun {
283*4882a593Smuzhiyun unsigned long freepointer_addr;
284*4882a593Smuzhiyun void *p;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun if (!debug_pagealloc_enabled_static())
287*4882a593Smuzhiyun return get_freepointer(s, object);
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun object = kasan_reset_tag(object);
290*4882a593Smuzhiyun freepointer_addr = (unsigned long)object + s->offset;
291*4882a593Smuzhiyun copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
292*4882a593Smuzhiyun return freelist_ptr(s, p, freepointer_addr);
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun
set_freepointer(struct kmem_cache * s,void * object,void * fp)295*4882a593Smuzhiyun static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
296*4882a593Smuzhiyun {
297*4882a593Smuzhiyun unsigned long freeptr_addr = (unsigned long)object + s->offset;
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun #ifdef CONFIG_SLAB_FREELIST_HARDENED
300*4882a593Smuzhiyun BUG_ON(object == fp); /* naive detection of double free or corruption */
301*4882a593Smuzhiyun #endif
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
304*4882a593Smuzhiyun *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun /* Loop over all objects in a slab */
308*4882a593Smuzhiyun #define for_each_object(__p, __s, __addr, __objects) \
309*4882a593Smuzhiyun for (__p = fixup_red_left(__s, __addr); \
310*4882a593Smuzhiyun __p < (__addr) + (__objects) * (__s)->size; \
311*4882a593Smuzhiyun __p += (__s)->size)
312*4882a593Smuzhiyun
order_objects(unsigned int order,unsigned int size)313*4882a593Smuzhiyun static inline unsigned int order_objects(unsigned int order, unsigned int size)
314*4882a593Smuzhiyun {
315*4882a593Smuzhiyun return ((unsigned int)PAGE_SIZE << order) / size;
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun
oo_make(unsigned int order,unsigned int size)318*4882a593Smuzhiyun static inline struct kmem_cache_order_objects oo_make(unsigned int order,
319*4882a593Smuzhiyun unsigned int size)
320*4882a593Smuzhiyun {
321*4882a593Smuzhiyun struct kmem_cache_order_objects x = {
322*4882a593Smuzhiyun (order << OO_SHIFT) + order_objects(order, size)
323*4882a593Smuzhiyun };
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun return x;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
oo_order(struct kmem_cache_order_objects x)328*4882a593Smuzhiyun static inline unsigned int oo_order(struct kmem_cache_order_objects x)
329*4882a593Smuzhiyun {
330*4882a593Smuzhiyun return x.x >> OO_SHIFT;
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun
oo_objects(struct kmem_cache_order_objects x)333*4882a593Smuzhiyun static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
334*4882a593Smuzhiyun {
335*4882a593Smuzhiyun return x.x & OO_MASK;
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun /*
339*4882a593Smuzhiyun * Per slab locking using the pagelock
340*4882a593Smuzhiyun */
slab_lock(struct page * page)341*4882a593Smuzhiyun static __always_inline void slab_lock(struct page *page)
342*4882a593Smuzhiyun {
343*4882a593Smuzhiyun VM_BUG_ON_PAGE(PageTail(page), page);
344*4882a593Smuzhiyun bit_spin_lock(PG_locked, &page->flags);
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun
slab_unlock(struct page * page)347*4882a593Smuzhiyun static __always_inline void slab_unlock(struct page *page)
348*4882a593Smuzhiyun {
349*4882a593Smuzhiyun VM_BUG_ON_PAGE(PageTail(page), page);
350*4882a593Smuzhiyun __bit_spin_unlock(PG_locked, &page->flags);
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun /* Interrupts must be disabled (for the fallback code to work right) */
__cmpxchg_double_slab(struct kmem_cache * s,struct page * page,void * freelist_old,unsigned long counters_old,void * freelist_new,unsigned long counters_new,const char * n)354*4882a593Smuzhiyun static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
355*4882a593Smuzhiyun void *freelist_old, unsigned long counters_old,
356*4882a593Smuzhiyun void *freelist_new, unsigned long counters_new,
357*4882a593Smuzhiyun const char *n)
358*4882a593Smuzhiyun {
359*4882a593Smuzhiyun VM_BUG_ON(!irqs_disabled());
360*4882a593Smuzhiyun #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
361*4882a593Smuzhiyun defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
362*4882a593Smuzhiyun if (s->flags & __CMPXCHG_DOUBLE) {
363*4882a593Smuzhiyun if (cmpxchg_double(&page->freelist, &page->counters,
364*4882a593Smuzhiyun freelist_old, counters_old,
365*4882a593Smuzhiyun freelist_new, counters_new))
366*4882a593Smuzhiyun return true;
367*4882a593Smuzhiyun } else
368*4882a593Smuzhiyun #endif
369*4882a593Smuzhiyun {
370*4882a593Smuzhiyun slab_lock(page);
371*4882a593Smuzhiyun if (page->freelist == freelist_old &&
372*4882a593Smuzhiyun page->counters == counters_old) {
373*4882a593Smuzhiyun page->freelist = freelist_new;
374*4882a593Smuzhiyun page->counters = counters_new;
375*4882a593Smuzhiyun slab_unlock(page);
376*4882a593Smuzhiyun return true;
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun slab_unlock(page);
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun cpu_relax();
382*4882a593Smuzhiyun stat(s, CMPXCHG_DOUBLE_FAIL);
383*4882a593Smuzhiyun
384*4882a593Smuzhiyun #ifdef SLUB_DEBUG_CMPXCHG
385*4882a593Smuzhiyun pr_info("%s %s: cmpxchg double redo ", n, s->name);
386*4882a593Smuzhiyun #endif
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun return false;
389*4882a593Smuzhiyun }
390*4882a593Smuzhiyun
cmpxchg_double_slab(struct kmem_cache * s,struct page * page,void * freelist_old,unsigned long counters_old,void * freelist_new,unsigned long counters_new,const char * n)391*4882a593Smuzhiyun static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
392*4882a593Smuzhiyun void *freelist_old, unsigned long counters_old,
393*4882a593Smuzhiyun void *freelist_new, unsigned long counters_new,
394*4882a593Smuzhiyun const char *n)
395*4882a593Smuzhiyun {
396*4882a593Smuzhiyun #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
397*4882a593Smuzhiyun defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
398*4882a593Smuzhiyun if (s->flags & __CMPXCHG_DOUBLE) {
399*4882a593Smuzhiyun if (cmpxchg_double(&page->freelist, &page->counters,
400*4882a593Smuzhiyun freelist_old, counters_old,
401*4882a593Smuzhiyun freelist_new, counters_new))
402*4882a593Smuzhiyun return true;
403*4882a593Smuzhiyun } else
404*4882a593Smuzhiyun #endif
405*4882a593Smuzhiyun {
406*4882a593Smuzhiyun unsigned long flags;
407*4882a593Smuzhiyun
408*4882a593Smuzhiyun local_irq_save(flags);
409*4882a593Smuzhiyun slab_lock(page);
410*4882a593Smuzhiyun if (page->freelist == freelist_old &&
411*4882a593Smuzhiyun page->counters == counters_old) {
412*4882a593Smuzhiyun page->freelist = freelist_new;
413*4882a593Smuzhiyun page->counters = counters_new;
414*4882a593Smuzhiyun slab_unlock(page);
415*4882a593Smuzhiyun local_irq_restore(flags);
416*4882a593Smuzhiyun return true;
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun slab_unlock(page);
419*4882a593Smuzhiyun local_irq_restore(flags);
420*4882a593Smuzhiyun }
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun cpu_relax();
423*4882a593Smuzhiyun stat(s, CMPXCHG_DOUBLE_FAIL);
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun #ifdef SLUB_DEBUG_CMPXCHG
426*4882a593Smuzhiyun pr_info("%s %s: cmpxchg double redo ", n, s->name);
427*4882a593Smuzhiyun #endif
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun return false;
430*4882a593Smuzhiyun }
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
433*4882a593Smuzhiyun static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
434*4882a593Smuzhiyun static DEFINE_SPINLOCK(object_map_lock);
435*4882a593Smuzhiyun
__fill_map(unsigned long * obj_map,struct kmem_cache * s,struct page * page)436*4882a593Smuzhiyun static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
437*4882a593Smuzhiyun struct page *page)
438*4882a593Smuzhiyun {
439*4882a593Smuzhiyun void *addr = page_address(page);
440*4882a593Smuzhiyun void *p;
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun bitmap_zero(obj_map, page->objects);
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun for (p = page->freelist; p; p = get_freepointer(s, p))
445*4882a593Smuzhiyun set_bit(__obj_to_index(s, addr, p), obj_map);
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun /*
449*4882a593Smuzhiyun * Determine a map of object in use on a page.
450*4882a593Smuzhiyun *
451*4882a593Smuzhiyun * Node listlock must be held to guarantee that the page does
452*4882a593Smuzhiyun * not vanish from under us.
453*4882a593Smuzhiyun */
get_map(struct kmem_cache * s,struct page * page)454*4882a593Smuzhiyun static unsigned long *get_map(struct kmem_cache *s, struct page *page)
455*4882a593Smuzhiyun __acquires(&object_map_lock)
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun VM_BUG_ON(!irqs_disabled());
458*4882a593Smuzhiyun
459*4882a593Smuzhiyun spin_lock(&object_map_lock);
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun __fill_map(object_map, s, page);
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun return object_map;
464*4882a593Smuzhiyun }
465*4882a593Smuzhiyun
put_map(unsigned long * map)466*4882a593Smuzhiyun static void put_map(unsigned long *map) __releases(&object_map_lock)
467*4882a593Smuzhiyun {
468*4882a593Smuzhiyun VM_BUG_ON(map != object_map);
469*4882a593Smuzhiyun spin_unlock(&object_map_lock);
470*4882a593Smuzhiyun }
471*4882a593Smuzhiyun
size_from_object(struct kmem_cache * s)472*4882a593Smuzhiyun static inline unsigned int size_from_object(struct kmem_cache *s)
473*4882a593Smuzhiyun {
474*4882a593Smuzhiyun if (s->flags & SLAB_RED_ZONE)
475*4882a593Smuzhiyun return s->size - s->red_left_pad;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun return s->size;
478*4882a593Smuzhiyun }
479*4882a593Smuzhiyun
restore_red_left(struct kmem_cache * s,void * p)480*4882a593Smuzhiyun static inline void *restore_red_left(struct kmem_cache *s, void *p)
481*4882a593Smuzhiyun {
482*4882a593Smuzhiyun if (s->flags & SLAB_RED_ZONE)
483*4882a593Smuzhiyun p -= s->red_left_pad;
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun return p;
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun /*
489*4882a593Smuzhiyun * Debug settings:
490*4882a593Smuzhiyun */
491*4882a593Smuzhiyun #if defined(CONFIG_SLUB_DEBUG_ON)
492*4882a593Smuzhiyun slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
493*4882a593Smuzhiyun #else
494*4882a593Smuzhiyun slab_flags_t slub_debug;
495*4882a593Smuzhiyun #endif
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun static char *slub_debug_string;
498*4882a593Smuzhiyun static int disable_higher_order_debug;
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun /*
501*4882a593Smuzhiyun * slub is about to manipulate internal object metadata. This memory lies
502*4882a593Smuzhiyun * outside the range of the allocated object, so accessing it would normally
503*4882a593Smuzhiyun * be reported by kasan as a bounds error. metadata_access_enable() is used
504*4882a593Smuzhiyun * to tell kasan that these accesses are OK.
505*4882a593Smuzhiyun */
metadata_access_enable(void)506*4882a593Smuzhiyun static inline void metadata_access_enable(void)
507*4882a593Smuzhiyun {
508*4882a593Smuzhiyun kasan_disable_current();
509*4882a593Smuzhiyun }
510*4882a593Smuzhiyun
metadata_access_disable(void)511*4882a593Smuzhiyun static inline void metadata_access_disable(void)
512*4882a593Smuzhiyun {
513*4882a593Smuzhiyun kasan_enable_current();
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun /*
517*4882a593Smuzhiyun * Object debugging
518*4882a593Smuzhiyun */
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun /* Verify that a pointer has an address that is valid within a slab page */
check_valid_pointer(struct kmem_cache * s,struct page * page,void * object)521*4882a593Smuzhiyun static inline int check_valid_pointer(struct kmem_cache *s,
522*4882a593Smuzhiyun struct page *page, void *object)
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun void *base;
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun if (!object)
527*4882a593Smuzhiyun return 1;
528*4882a593Smuzhiyun
529*4882a593Smuzhiyun base = page_address(page);
530*4882a593Smuzhiyun object = kasan_reset_tag(object);
531*4882a593Smuzhiyun object = restore_red_left(s, object);
532*4882a593Smuzhiyun if (object < base || object >= base + page->objects * s->size ||
533*4882a593Smuzhiyun (object - base) % s->size) {
534*4882a593Smuzhiyun return 0;
535*4882a593Smuzhiyun }
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun return 1;
538*4882a593Smuzhiyun }
539*4882a593Smuzhiyun
print_section(char * level,char * text,u8 * addr,unsigned int length)540*4882a593Smuzhiyun static void print_section(char *level, char *text, u8 *addr,
541*4882a593Smuzhiyun unsigned int length)
542*4882a593Smuzhiyun {
543*4882a593Smuzhiyun metadata_access_enable();
544*4882a593Smuzhiyun print_hex_dump(level, text, DUMP_PREFIX_ADDRESS,
545*4882a593Smuzhiyun 16, 1, kasan_reset_tag((void *)addr), length, 1);
546*4882a593Smuzhiyun metadata_access_disable();
547*4882a593Smuzhiyun }
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun /*
550*4882a593Smuzhiyun * See comment in calculate_sizes().
551*4882a593Smuzhiyun */
freeptr_outside_object(struct kmem_cache * s)552*4882a593Smuzhiyun static inline bool freeptr_outside_object(struct kmem_cache *s)
553*4882a593Smuzhiyun {
554*4882a593Smuzhiyun return s->offset >= s->inuse;
555*4882a593Smuzhiyun }
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun /*
558*4882a593Smuzhiyun * Return offset of the end of info block which is inuse + free pointer if
559*4882a593Smuzhiyun * not overlapping with object.
560*4882a593Smuzhiyun */
get_info_end(struct kmem_cache * s)561*4882a593Smuzhiyun static inline unsigned int get_info_end(struct kmem_cache *s)
562*4882a593Smuzhiyun {
563*4882a593Smuzhiyun if (freeptr_outside_object(s))
564*4882a593Smuzhiyun return s->inuse + sizeof(void *);
565*4882a593Smuzhiyun else
566*4882a593Smuzhiyun return s->inuse;
567*4882a593Smuzhiyun }
568*4882a593Smuzhiyun
get_track(struct kmem_cache * s,void * object,enum track_item alloc)569*4882a593Smuzhiyun static struct track *get_track(struct kmem_cache *s, void *object,
570*4882a593Smuzhiyun enum track_item alloc)
571*4882a593Smuzhiyun {
572*4882a593Smuzhiyun struct track *p;
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun p = object + get_info_end(s);
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun return kasan_reset_tag(p + alloc);
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun /*
580*4882a593Smuzhiyun * This function will be used to loop through all the slab objects in
581*4882a593Smuzhiyun * a page to give track structure for each object, the function fn will
582*4882a593Smuzhiyun * be using this track structure and extract required info into its private
583*4882a593Smuzhiyun * data, the return value will be the number of track structures that are
584*4882a593Smuzhiyun * processed.
585*4882a593Smuzhiyun */
get_each_object_track(struct kmem_cache * s,struct page * page,enum track_item alloc,int (* fn)(const struct kmem_cache *,const void *,const struct track *,void *),void * private)586*4882a593Smuzhiyun unsigned long get_each_object_track(struct kmem_cache *s,
587*4882a593Smuzhiyun struct page *page, enum track_item alloc,
588*4882a593Smuzhiyun int (*fn)(const struct kmem_cache *, const void *,
589*4882a593Smuzhiyun const struct track *, void *), void *private)
590*4882a593Smuzhiyun {
591*4882a593Smuzhiyun void *p;
592*4882a593Smuzhiyun struct track *t;
593*4882a593Smuzhiyun int ret;
594*4882a593Smuzhiyun unsigned long num_track = 0;
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun if (!slub_debug || !(s->flags & SLAB_STORE_USER))
597*4882a593Smuzhiyun return 0;
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun slab_lock(page);
600*4882a593Smuzhiyun for_each_object(p, s, page_address(page), page->objects) {
601*4882a593Smuzhiyun t = get_track(s, p, alloc);
602*4882a593Smuzhiyun metadata_access_enable();
603*4882a593Smuzhiyun ret = fn(s, p, t, private);
604*4882a593Smuzhiyun metadata_access_disable();
605*4882a593Smuzhiyun if (ret < 0)
606*4882a593Smuzhiyun break;
607*4882a593Smuzhiyun num_track += 1;
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun slab_unlock(page);
610*4882a593Smuzhiyun return num_track;
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(get_each_object_track);
613*4882a593Smuzhiyun
set_track(struct kmem_cache * s,void * object,enum track_item alloc,unsigned long addr)614*4882a593Smuzhiyun static void set_track(struct kmem_cache *s, void *object,
615*4882a593Smuzhiyun enum track_item alloc, unsigned long addr)
616*4882a593Smuzhiyun {
617*4882a593Smuzhiyun struct track *p = get_track(s, object, alloc);
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun if (addr) {
620*4882a593Smuzhiyun #ifdef CONFIG_STACKTRACE
621*4882a593Smuzhiyun unsigned int nr_entries;
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun metadata_access_enable();
624*4882a593Smuzhiyun nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
625*4882a593Smuzhiyun TRACK_ADDRS_COUNT, 3);
626*4882a593Smuzhiyun metadata_access_disable();
627*4882a593Smuzhiyun
628*4882a593Smuzhiyun if (nr_entries < TRACK_ADDRS_COUNT)
629*4882a593Smuzhiyun p->addrs[nr_entries] = 0;
630*4882a593Smuzhiyun trace_android_vh_save_track_hash(alloc == TRACK_ALLOC,
631*4882a593Smuzhiyun (unsigned long)p);
632*4882a593Smuzhiyun #endif
633*4882a593Smuzhiyun p->addr = addr;
634*4882a593Smuzhiyun p->cpu = smp_processor_id();
635*4882a593Smuzhiyun p->pid = current->pid;
636*4882a593Smuzhiyun p->when = jiffies;
637*4882a593Smuzhiyun } else {
638*4882a593Smuzhiyun memset(p, 0, sizeof(struct track));
639*4882a593Smuzhiyun }
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun
init_tracking(struct kmem_cache * s,void * object)642*4882a593Smuzhiyun static void init_tracking(struct kmem_cache *s, void *object)
643*4882a593Smuzhiyun {
644*4882a593Smuzhiyun if (!(s->flags & SLAB_STORE_USER))
645*4882a593Smuzhiyun return;
646*4882a593Smuzhiyun
647*4882a593Smuzhiyun set_track(s, object, TRACK_FREE, 0UL);
648*4882a593Smuzhiyun set_track(s, object, TRACK_ALLOC, 0UL);
649*4882a593Smuzhiyun }
650*4882a593Smuzhiyun
print_track(const char * s,struct track * t,unsigned long pr_time)651*4882a593Smuzhiyun static void print_track(const char *s, struct track *t, unsigned long pr_time)
652*4882a593Smuzhiyun {
653*4882a593Smuzhiyun if (!t->addr)
654*4882a593Smuzhiyun return;
655*4882a593Smuzhiyun
656*4882a593Smuzhiyun pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
657*4882a593Smuzhiyun s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
658*4882a593Smuzhiyun #ifdef CONFIG_STACKTRACE
659*4882a593Smuzhiyun {
660*4882a593Smuzhiyun int i;
661*4882a593Smuzhiyun for (i = 0; i < TRACK_ADDRS_COUNT; i++)
662*4882a593Smuzhiyun if (t->addrs[i])
663*4882a593Smuzhiyun pr_err("\t%pS\n", (void *)t->addrs[i]);
664*4882a593Smuzhiyun else
665*4882a593Smuzhiyun break;
666*4882a593Smuzhiyun }
667*4882a593Smuzhiyun #endif
668*4882a593Smuzhiyun }
669*4882a593Smuzhiyun
print_tracking(struct kmem_cache * s,void * object)670*4882a593Smuzhiyun void print_tracking(struct kmem_cache *s, void *object)
671*4882a593Smuzhiyun {
672*4882a593Smuzhiyun unsigned long pr_time = jiffies;
673*4882a593Smuzhiyun if (!(s->flags & SLAB_STORE_USER))
674*4882a593Smuzhiyun return;
675*4882a593Smuzhiyun
676*4882a593Smuzhiyun print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
677*4882a593Smuzhiyun print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
678*4882a593Smuzhiyun }
679*4882a593Smuzhiyun
print_page_info(struct page * page)680*4882a593Smuzhiyun static void print_page_info(struct page *page)
681*4882a593Smuzhiyun {
682*4882a593Smuzhiyun pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
683*4882a593Smuzhiyun page, page->objects, page->inuse, page->freelist, page->flags);
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun }
686*4882a593Smuzhiyun
slab_bug(struct kmem_cache * s,char * fmt,...)687*4882a593Smuzhiyun static void slab_bug(struct kmem_cache *s, char *fmt, ...)
688*4882a593Smuzhiyun {
689*4882a593Smuzhiyun struct va_format vaf;
690*4882a593Smuzhiyun va_list args;
691*4882a593Smuzhiyun
692*4882a593Smuzhiyun va_start(args, fmt);
693*4882a593Smuzhiyun vaf.fmt = fmt;
694*4882a593Smuzhiyun vaf.va = &args;
695*4882a593Smuzhiyun pr_err("=============================================================================\n");
696*4882a593Smuzhiyun pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
697*4882a593Smuzhiyun pr_err("-----------------------------------------------------------------------------\n\n");
698*4882a593Smuzhiyun va_end(args);
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun
slab_fix(struct kmem_cache * s,char * fmt,...)701*4882a593Smuzhiyun static void slab_fix(struct kmem_cache *s, char *fmt, ...)
702*4882a593Smuzhiyun {
703*4882a593Smuzhiyun struct va_format vaf;
704*4882a593Smuzhiyun va_list args;
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun va_start(args, fmt);
707*4882a593Smuzhiyun vaf.fmt = fmt;
708*4882a593Smuzhiyun vaf.va = &args;
709*4882a593Smuzhiyun pr_err("FIX %s: %pV\n", s->name, &vaf);
710*4882a593Smuzhiyun va_end(args);
711*4882a593Smuzhiyun }
712*4882a593Smuzhiyun
freelist_corrupted(struct kmem_cache * s,struct page * page,void ** freelist,void * nextfree)713*4882a593Smuzhiyun static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
714*4882a593Smuzhiyun void **freelist, void *nextfree)
715*4882a593Smuzhiyun {
716*4882a593Smuzhiyun if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
717*4882a593Smuzhiyun !check_valid_pointer(s, page, nextfree) && freelist) {
718*4882a593Smuzhiyun object_err(s, page, *freelist, "Freechain corrupt");
719*4882a593Smuzhiyun *freelist = NULL;
720*4882a593Smuzhiyun slab_fix(s, "Isolate corrupted freechain");
721*4882a593Smuzhiyun return true;
722*4882a593Smuzhiyun }
723*4882a593Smuzhiyun
724*4882a593Smuzhiyun return false;
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun
print_trailer(struct kmem_cache * s,struct page * page,u8 * p)727*4882a593Smuzhiyun static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
728*4882a593Smuzhiyun {
729*4882a593Smuzhiyun unsigned int off; /* Offset of last byte */
730*4882a593Smuzhiyun u8 *addr = page_address(page);
731*4882a593Smuzhiyun
732*4882a593Smuzhiyun print_tracking(s, p);
733*4882a593Smuzhiyun
734*4882a593Smuzhiyun print_page_info(page);
735*4882a593Smuzhiyun
736*4882a593Smuzhiyun pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
737*4882a593Smuzhiyun p, p - addr, get_freepointer(s, p));
738*4882a593Smuzhiyun
739*4882a593Smuzhiyun if (s->flags & SLAB_RED_ZONE)
740*4882a593Smuzhiyun print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
741*4882a593Smuzhiyun s->red_left_pad);
742*4882a593Smuzhiyun else if (p > addr + 16)
743*4882a593Smuzhiyun print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun print_section(KERN_ERR, "Object ", p,
746*4882a593Smuzhiyun min_t(unsigned int, s->object_size, PAGE_SIZE));
747*4882a593Smuzhiyun if (s->flags & SLAB_RED_ZONE)
748*4882a593Smuzhiyun print_section(KERN_ERR, "Redzone ", p + s->object_size,
749*4882a593Smuzhiyun s->inuse - s->object_size);
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun off = get_info_end(s);
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun if (s->flags & SLAB_STORE_USER)
754*4882a593Smuzhiyun off += 2 * sizeof(struct track);
755*4882a593Smuzhiyun
756*4882a593Smuzhiyun off += kasan_metadata_size(s);
757*4882a593Smuzhiyun
758*4882a593Smuzhiyun if (off != size_from_object(s))
759*4882a593Smuzhiyun /* Beginning of the filler is the free pointer */
760*4882a593Smuzhiyun print_section(KERN_ERR, "Padding ", p + off,
761*4882a593Smuzhiyun size_from_object(s) - off);
762*4882a593Smuzhiyun
763*4882a593Smuzhiyun dump_stack();
764*4882a593Smuzhiyun }
765*4882a593Smuzhiyun
object_err(struct kmem_cache * s,struct page * page,u8 * object,char * reason)766*4882a593Smuzhiyun void object_err(struct kmem_cache *s, struct page *page,
767*4882a593Smuzhiyun u8 *object, char *reason)
768*4882a593Smuzhiyun {
769*4882a593Smuzhiyun slab_bug(s, "%s", reason);
770*4882a593Smuzhiyun print_trailer(s, page, object);
771*4882a593Smuzhiyun add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
772*4882a593Smuzhiyun }
773*4882a593Smuzhiyun
slab_err(struct kmem_cache * s,struct page * page,const char * fmt,...)774*4882a593Smuzhiyun static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
775*4882a593Smuzhiyun const char *fmt, ...)
776*4882a593Smuzhiyun {
777*4882a593Smuzhiyun va_list args;
778*4882a593Smuzhiyun char buf[100];
779*4882a593Smuzhiyun
780*4882a593Smuzhiyun va_start(args, fmt);
781*4882a593Smuzhiyun vsnprintf(buf, sizeof(buf), fmt, args);
782*4882a593Smuzhiyun va_end(args);
783*4882a593Smuzhiyun slab_bug(s, "%s", buf);
784*4882a593Smuzhiyun print_page_info(page);
785*4882a593Smuzhiyun dump_stack();
786*4882a593Smuzhiyun add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
787*4882a593Smuzhiyun }
788*4882a593Smuzhiyun
init_object(struct kmem_cache * s,void * object,u8 val)789*4882a593Smuzhiyun static void init_object(struct kmem_cache *s, void *object, u8 val)
790*4882a593Smuzhiyun {
791*4882a593Smuzhiyun u8 *p = kasan_reset_tag(object);
792*4882a593Smuzhiyun
793*4882a593Smuzhiyun if (s->flags & SLAB_RED_ZONE)
794*4882a593Smuzhiyun memset(p - s->red_left_pad, val, s->red_left_pad);
795*4882a593Smuzhiyun
796*4882a593Smuzhiyun if (s->flags & __OBJECT_POISON) {
797*4882a593Smuzhiyun memset(p, POISON_FREE, s->object_size - 1);
798*4882a593Smuzhiyun p[s->object_size - 1] = POISON_END;
799*4882a593Smuzhiyun }
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun if (s->flags & SLAB_RED_ZONE)
802*4882a593Smuzhiyun memset(p + s->object_size, val, s->inuse - s->object_size);
803*4882a593Smuzhiyun }
804*4882a593Smuzhiyun
restore_bytes(struct kmem_cache * s,char * message,u8 data,void * from,void * to)805*4882a593Smuzhiyun static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
806*4882a593Smuzhiyun void *from, void *to)
807*4882a593Smuzhiyun {
808*4882a593Smuzhiyun slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
809*4882a593Smuzhiyun memset(from, data, to - from);
810*4882a593Smuzhiyun }
811*4882a593Smuzhiyun
check_bytes_and_report(struct kmem_cache * s,struct page * page,u8 * object,char * what,u8 * start,unsigned int value,unsigned int bytes)812*4882a593Smuzhiyun static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
813*4882a593Smuzhiyun u8 *object, char *what,
814*4882a593Smuzhiyun u8 *start, unsigned int value, unsigned int bytes)
815*4882a593Smuzhiyun {
816*4882a593Smuzhiyun u8 *fault;
817*4882a593Smuzhiyun u8 *end;
818*4882a593Smuzhiyun u8 *addr = page_address(page);
819*4882a593Smuzhiyun
820*4882a593Smuzhiyun metadata_access_enable();
821*4882a593Smuzhiyun fault = memchr_inv(kasan_reset_tag(start), value, bytes);
822*4882a593Smuzhiyun metadata_access_disable();
823*4882a593Smuzhiyun if (!fault)
824*4882a593Smuzhiyun return 1;
825*4882a593Smuzhiyun
826*4882a593Smuzhiyun end = start + bytes;
827*4882a593Smuzhiyun while (end > fault && end[-1] == value)
828*4882a593Smuzhiyun end--;
829*4882a593Smuzhiyun
830*4882a593Smuzhiyun slab_bug(s, "%s overwritten", what);
831*4882a593Smuzhiyun pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
832*4882a593Smuzhiyun fault, end - 1, fault - addr,
833*4882a593Smuzhiyun fault[0], value);
834*4882a593Smuzhiyun print_trailer(s, page, object);
835*4882a593Smuzhiyun add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun restore_bytes(s, what, value, fault, end);
838*4882a593Smuzhiyun return 0;
839*4882a593Smuzhiyun }
840*4882a593Smuzhiyun
841*4882a593Smuzhiyun /*
842*4882a593Smuzhiyun * Object layout:
843*4882a593Smuzhiyun *
844*4882a593Smuzhiyun * object address
845*4882a593Smuzhiyun * Bytes of the object to be managed.
846*4882a593Smuzhiyun * If the freepointer may overlay the object then the free
847*4882a593Smuzhiyun * pointer is at the middle of the object.
848*4882a593Smuzhiyun *
849*4882a593Smuzhiyun * Poisoning uses 0x6b (POISON_FREE) and the last byte is
850*4882a593Smuzhiyun * 0xa5 (POISON_END)
851*4882a593Smuzhiyun *
852*4882a593Smuzhiyun * object + s->object_size
853*4882a593Smuzhiyun * Padding to reach word boundary. This is also used for Redzoning.
854*4882a593Smuzhiyun * Padding is extended by another word if Redzoning is enabled and
855*4882a593Smuzhiyun * object_size == inuse.
856*4882a593Smuzhiyun *
857*4882a593Smuzhiyun * We fill with 0xbb (RED_INACTIVE) for inactive objects and with
858*4882a593Smuzhiyun * 0xcc (RED_ACTIVE) for objects in use.
859*4882a593Smuzhiyun *
860*4882a593Smuzhiyun * object + s->inuse
861*4882a593Smuzhiyun * Meta data starts here.
862*4882a593Smuzhiyun *
863*4882a593Smuzhiyun * A. Free pointer (if we cannot overwrite object on free)
864*4882a593Smuzhiyun * B. Tracking data for SLAB_STORE_USER
865*4882a593Smuzhiyun * C. Padding to reach required alignment boundary or at mininum
866*4882a593Smuzhiyun * one word if debugging is on to be able to detect writes
867*4882a593Smuzhiyun * before the word boundary.
868*4882a593Smuzhiyun *
869*4882a593Smuzhiyun * Padding is done using 0x5a (POISON_INUSE)
870*4882a593Smuzhiyun *
871*4882a593Smuzhiyun * object + s->size
872*4882a593Smuzhiyun * Nothing is used beyond s->size.
873*4882a593Smuzhiyun *
874*4882a593Smuzhiyun * If slabcaches are merged then the object_size and inuse boundaries are mostly
875*4882a593Smuzhiyun * ignored. And therefore no slab options that rely on these boundaries
876*4882a593Smuzhiyun * may be used with merged slabcaches.
877*4882a593Smuzhiyun */
878*4882a593Smuzhiyun
check_pad_bytes(struct kmem_cache * s,struct page * page,u8 * p)879*4882a593Smuzhiyun static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
880*4882a593Smuzhiyun {
881*4882a593Smuzhiyun unsigned long off = get_info_end(s); /* The end of info */
882*4882a593Smuzhiyun
883*4882a593Smuzhiyun if (s->flags & SLAB_STORE_USER)
884*4882a593Smuzhiyun /* We also have user information there */
885*4882a593Smuzhiyun off += 2 * sizeof(struct track);
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun off += kasan_metadata_size(s);
888*4882a593Smuzhiyun
889*4882a593Smuzhiyun if (size_from_object(s) == off)
890*4882a593Smuzhiyun return 1;
891*4882a593Smuzhiyun
892*4882a593Smuzhiyun return check_bytes_and_report(s, page, p, "Object padding",
893*4882a593Smuzhiyun p + off, POISON_INUSE, size_from_object(s) - off);
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun
896*4882a593Smuzhiyun /* Check the pad bytes at the end of a slab page */
slab_pad_check(struct kmem_cache * s,struct page * page)897*4882a593Smuzhiyun static int slab_pad_check(struct kmem_cache *s, struct page *page)
898*4882a593Smuzhiyun {
899*4882a593Smuzhiyun u8 *start;
900*4882a593Smuzhiyun u8 *fault;
901*4882a593Smuzhiyun u8 *end;
902*4882a593Smuzhiyun u8 *pad;
903*4882a593Smuzhiyun int length;
904*4882a593Smuzhiyun int remainder;
905*4882a593Smuzhiyun
906*4882a593Smuzhiyun if (!(s->flags & SLAB_POISON))
907*4882a593Smuzhiyun return 1;
908*4882a593Smuzhiyun
909*4882a593Smuzhiyun start = page_address(page);
910*4882a593Smuzhiyun length = page_size(page);
911*4882a593Smuzhiyun end = start + length;
912*4882a593Smuzhiyun remainder = length % s->size;
913*4882a593Smuzhiyun if (!remainder)
914*4882a593Smuzhiyun return 1;
915*4882a593Smuzhiyun
916*4882a593Smuzhiyun pad = end - remainder;
917*4882a593Smuzhiyun metadata_access_enable();
918*4882a593Smuzhiyun fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
919*4882a593Smuzhiyun metadata_access_disable();
920*4882a593Smuzhiyun if (!fault)
921*4882a593Smuzhiyun return 1;
922*4882a593Smuzhiyun while (end > fault && end[-1] == POISON_INUSE)
923*4882a593Smuzhiyun end--;
924*4882a593Smuzhiyun
925*4882a593Smuzhiyun slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
926*4882a593Smuzhiyun fault, end - 1, fault - start);
927*4882a593Smuzhiyun print_section(KERN_ERR, "Padding ", pad, remainder);
928*4882a593Smuzhiyun
929*4882a593Smuzhiyun restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
930*4882a593Smuzhiyun return 0;
931*4882a593Smuzhiyun }
932*4882a593Smuzhiyun
check_object(struct kmem_cache * s,struct page * page,void * object,u8 val)933*4882a593Smuzhiyun static int check_object(struct kmem_cache *s, struct page *page,
934*4882a593Smuzhiyun void *object, u8 val)
935*4882a593Smuzhiyun {
936*4882a593Smuzhiyun u8 *p = object;
937*4882a593Smuzhiyun u8 *endobject = object + s->object_size;
938*4882a593Smuzhiyun
939*4882a593Smuzhiyun if (s->flags & SLAB_RED_ZONE) {
940*4882a593Smuzhiyun if (!check_bytes_and_report(s, page, object, "Left Redzone",
941*4882a593Smuzhiyun object - s->red_left_pad, val, s->red_left_pad))
942*4882a593Smuzhiyun return 0;
943*4882a593Smuzhiyun
944*4882a593Smuzhiyun if (!check_bytes_and_report(s, page, object, "Right Redzone",
945*4882a593Smuzhiyun endobject, val, s->inuse - s->object_size))
946*4882a593Smuzhiyun return 0;
947*4882a593Smuzhiyun } else {
948*4882a593Smuzhiyun if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
949*4882a593Smuzhiyun check_bytes_and_report(s, page, p, "Alignment padding",
950*4882a593Smuzhiyun endobject, POISON_INUSE,
951*4882a593Smuzhiyun s->inuse - s->object_size);
952*4882a593Smuzhiyun }
953*4882a593Smuzhiyun }
954*4882a593Smuzhiyun
955*4882a593Smuzhiyun if (s->flags & SLAB_POISON) {
956*4882a593Smuzhiyun if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
957*4882a593Smuzhiyun (!check_bytes_and_report(s, page, p, "Poison", p,
958*4882a593Smuzhiyun POISON_FREE, s->object_size - 1) ||
959*4882a593Smuzhiyun !check_bytes_and_report(s, page, p, "End Poison",
960*4882a593Smuzhiyun p + s->object_size - 1, POISON_END, 1)))
961*4882a593Smuzhiyun return 0;
962*4882a593Smuzhiyun /*
963*4882a593Smuzhiyun * check_pad_bytes cleans up on its own.
964*4882a593Smuzhiyun */
965*4882a593Smuzhiyun check_pad_bytes(s, page, p);
966*4882a593Smuzhiyun }
967*4882a593Smuzhiyun
968*4882a593Smuzhiyun if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
969*4882a593Smuzhiyun /*
970*4882a593Smuzhiyun * Object and freepointer overlap. Cannot check
971*4882a593Smuzhiyun * freepointer while object is allocated.
972*4882a593Smuzhiyun */
973*4882a593Smuzhiyun return 1;
974*4882a593Smuzhiyun
975*4882a593Smuzhiyun /* Check free pointer validity */
976*4882a593Smuzhiyun if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
977*4882a593Smuzhiyun object_err(s, page, p, "Freepointer corrupt");
978*4882a593Smuzhiyun /*
979*4882a593Smuzhiyun * No choice but to zap it and thus lose the remainder
980*4882a593Smuzhiyun * of the free objects in this slab. May cause
981*4882a593Smuzhiyun * another error because the object count is now wrong.
982*4882a593Smuzhiyun */
983*4882a593Smuzhiyun set_freepointer(s, p, NULL);
984*4882a593Smuzhiyun return 0;
985*4882a593Smuzhiyun }
986*4882a593Smuzhiyun return 1;
987*4882a593Smuzhiyun }
988*4882a593Smuzhiyun
check_slab(struct kmem_cache * s,struct page * page)989*4882a593Smuzhiyun static int check_slab(struct kmem_cache *s, struct page *page)
990*4882a593Smuzhiyun {
991*4882a593Smuzhiyun int maxobj;
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun VM_BUG_ON(!irqs_disabled());
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun if (!PageSlab(page)) {
996*4882a593Smuzhiyun slab_err(s, page, "Not a valid slab page");
997*4882a593Smuzhiyun return 0;
998*4882a593Smuzhiyun }
999*4882a593Smuzhiyun
1000*4882a593Smuzhiyun maxobj = order_objects(compound_order(page), s->size);
1001*4882a593Smuzhiyun if (page->objects > maxobj) {
1002*4882a593Smuzhiyun slab_err(s, page, "objects %u > max %u",
1003*4882a593Smuzhiyun page->objects, maxobj);
1004*4882a593Smuzhiyun return 0;
1005*4882a593Smuzhiyun }
1006*4882a593Smuzhiyun if (page->inuse > page->objects) {
1007*4882a593Smuzhiyun slab_err(s, page, "inuse %u > max %u",
1008*4882a593Smuzhiyun page->inuse, page->objects);
1009*4882a593Smuzhiyun return 0;
1010*4882a593Smuzhiyun }
1011*4882a593Smuzhiyun /* Slab_pad_check fixes things up after itself */
1012*4882a593Smuzhiyun slab_pad_check(s, page);
1013*4882a593Smuzhiyun return 1;
1014*4882a593Smuzhiyun }
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun /*
1017*4882a593Smuzhiyun * Determine if a certain object on a page is on the freelist. Must hold the
1018*4882a593Smuzhiyun * slab lock to guarantee that the chains are in a consistent state.
1019*4882a593Smuzhiyun */
on_freelist(struct kmem_cache * s,struct page * page,void * search)1020*4882a593Smuzhiyun static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
1021*4882a593Smuzhiyun {
1022*4882a593Smuzhiyun int nr = 0;
1023*4882a593Smuzhiyun void *fp;
1024*4882a593Smuzhiyun void *object = NULL;
1025*4882a593Smuzhiyun int max_objects;
1026*4882a593Smuzhiyun
1027*4882a593Smuzhiyun fp = page->freelist;
1028*4882a593Smuzhiyun while (fp && nr <= page->objects) {
1029*4882a593Smuzhiyun if (fp == search)
1030*4882a593Smuzhiyun return 1;
1031*4882a593Smuzhiyun if (!check_valid_pointer(s, page, fp)) {
1032*4882a593Smuzhiyun if (object) {
1033*4882a593Smuzhiyun object_err(s, page, object,
1034*4882a593Smuzhiyun "Freechain corrupt");
1035*4882a593Smuzhiyun set_freepointer(s, object, NULL);
1036*4882a593Smuzhiyun } else {
1037*4882a593Smuzhiyun slab_err(s, page, "Freepointer corrupt");
1038*4882a593Smuzhiyun page->freelist = NULL;
1039*4882a593Smuzhiyun page->inuse = page->objects;
1040*4882a593Smuzhiyun slab_fix(s, "Freelist cleared");
1041*4882a593Smuzhiyun return 0;
1042*4882a593Smuzhiyun }
1043*4882a593Smuzhiyun break;
1044*4882a593Smuzhiyun }
1045*4882a593Smuzhiyun object = fp;
1046*4882a593Smuzhiyun fp = get_freepointer(s, object);
1047*4882a593Smuzhiyun nr++;
1048*4882a593Smuzhiyun }
1049*4882a593Smuzhiyun
1050*4882a593Smuzhiyun max_objects = order_objects(compound_order(page), s->size);
1051*4882a593Smuzhiyun if (max_objects > MAX_OBJS_PER_PAGE)
1052*4882a593Smuzhiyun max_objects = MAX_OBJS_PER_PAGE;
1053*4882a593Smuzhiyun
1054*4882a593Smuzhiyun if (page->objects != max_objects) {
1055*4882a593Smuzhiyun slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
1056*4882a593Smuzhiyun page->objects, max_objects);
1057*4882a593Smuzhiyun page->objects = max_objects;
1058*4882a593Smuzhiyun slab_fix(s, "Number of objects adjusted.");
1059*4882a593Smuzhiyun }
1060*4882a593Smuzhiyun if (page->inuse != page->objects - nr) {
1061*4882a593Smuzhiyun slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1062*4882a593Smuzhiyun page->inuse, page->objects - nr);
1063*4882a593Smuzhiyun page->inuse = page->objects - nr;
1064*4882a593Smuzhiyun slab_fix(s, "Object count adjusted.");
1065*4882a593Smuzhiyun }
1066*4882a593Smuzhiyun return search == NULL;
1067*4882a593Smuzhiyun }
1068*4882a593Smuzhiyun
trace(struct kmem_cache * s,struct page * page,void * object,int alloc)1069*4882a593Smuzhiyun static void trace(struct kmem_cache *s, struct page *page, void *object,
1070*4882a593Smuzhiyun int alloc)
1071*4882a593Smuzhiyun {
1072*4882a593Smuzhiyun if (s->flags & SLAB_TRACE) {
1073*4882a593Smuzhiyun pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1074*4882a593Smuzhiyun s->name,
1075*4882a593Smuzhiyun alloc ? "alloc" : "free",
1076*4882a593Smuzhiyun object, page->inuse,
1077*4882a593Smuzhiyun page->freelist);
1078*4882a593Smuzhiyun
1079*4882a593Smuzhiyun if (!alloc)
1080*4882a593Smuzhiyun print_section(KERN_INFO, "Object ", (void *)object,
1081*4882a593Smuzhiyun s->object_size);
1082*4882a593Smuzhiyun
1083*4882a593Smuzhiyun dump_stack();
1084*4882a593Smuzhiyun }
1085*4882a593Smuzhiyun }
1086*4882a593Smuzhiyun
1087*4882a593Smuzhiyun /*
1088*4882a593Smuzhiyun * Tracking of fully allocated slabs for debugging purposes.
1089*4882a593Smuzhiyun */
add_full(struct kmem_cache * s,struct kmem_cache_node * n,struct page * page)1090*4882a593Smuzhiyun static void add_full(struct kmem_cache *s,
1091*4882a593Smuzhiyun struct kmem_cache_node *n, struct page *page)
1092*4882a593Smuzhiyun {
1093*4882a593Smuzhiyun if (!(s->flags & SLAB_STORE_USER))
1094*4882a593Smuzhiyun return;
1095*4882a593Smuzhiyun
1096*4882a593Smuzhiyun lockdep_assert_held(&n->list_lock);
1097*4882a593Smuzhiyun list_add(&page->slab_list, &n->full);
1098*4882a593Smuzhiyun }
1099*4882a593Smuzhiyun
remove_full(struct kmem_cache * s,struct kmem_cache_node * n,struct page * page)1100*4882a593Smuzhiyun static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1101*4882a593Smuzhiyun {
1102*4882a593Smuzhiyun if (!(s->flags & SLAB_STORE_USER))
1103*4882a593Smuzhiyun return;
1104*4882a593Smuzhiyun
1105*4882a593Smuzhiyun lockdep_assert_held(&n->list_lock);
1106*4882a593Smuzhiyun list_del(&page->slab_list);
1107*4882a593Smuzhiyun }
1108*4882a593Smuzhiyun
1109*4882a593Smuzhiyun /* Tracking of the number of slabs for debugging purposes */
slabs_node(struct kmem_cache * s,int node)1110*4882a593Smuzhiyun static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1111*4882a593Smuzhiyun {
1112*4882a593Smuzhiyun struct kmem_cache_node *n = get_node(s, node);
1113*4882a593Smuzhiyun
1114*4882a593Smuzhiyun return atomic_long_read(&n->nr_slabs);
1115*4882a593Smuzhiyun }
1116*4882a593Smuzhiyun
node_nr_slabs(struct kmem_cache_node * n)1117*4882a593Smuzhiyun static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1118*4882a593Smuzhiyun {
1119*4882a593Smuzhiyun return atomic_long_read(&n->nr_slabs);
1120*4882a593Smuzhiyun }
1121*4882a593Smuzhiyun
inc_slabs_node(struct kmem_cache * s,int node,int objects)1122*4882a593Smuzhiyun static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1123*4882a593Smuzhiyun {
1124*4882a593Smuzhiyun struct kmem_cache_node *n = get_node(s, node);
1125*4882a593Smuzhiyun
1126*4882a593Smuzhiyun /*
1127*4882a593Smuzhiyun * May be called early in order to allocate a slab for the
1128*4882a593Smuzhiyun * kmem_cache_node structure. Solve the chicken-egg
1129*4882a593Smuzhiyun * dilemma by deferring the increment of the count during
1130*4882a593Smuzhiyun * bootstrap (see early_kmem_cache_node_alloc).
1131*4882a593Smuzhiyun */
1132*4882a593Smuzhiyun if (likely(n)) {
1133*4882a593Smuzhiyun atomic_long_inc(&n->nr_slabs);
1134*4882a593Smuzhiyun atomic_long_add(objects, &n->total_objects);
1135*4882a593Smuzhiyun }
1136*4882a593Smuzhiyun }
dec_slabs_node(struct kmem_cache * s,int node,int objects)1137*4882a593Smuzhiyun static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1138*4882a593Smuzhiyun {
1139*4882a593Smuzhiyun struct kmem_cache_node *n = get_node(s, node);
1140*4882a593Smuzhiyun
1141*4882a593Smuzhiyun atomic_long_dec(&n->nr_slabs);
1142*4882a593Smuzhiyun atomic_long_sub(objects, &n->total_objects);
1143*4882a593Smuzhiyun }
1144*4882a593Smuzhiyun
1145*4882a593Smuzhiyun /* Object debug checks for alloc/free paths */
setup_object_debug(struct kmem_cache * s,struct page * page,void * object)1146*4882a593Smuzhiyun static void setup_object_debug(struct kmem_cache *s, struct page *page,
1147*4882a593Smuzhiyun void *object)
1148*4882a593Smuzhiyun {
1149*4882a593Smuzhiyun if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
1150*4882a593Smuzhiyun return;
1151*4882a593Smuzhiyun
1152*4882a593Smuzhiyun init_object(s, object, SLUB_RED_INACTIVE);
1153*4882a593Smuzhiyun init_tracking(s, object);
1154*4882a593Smuzhiyun }
1155*4882a593Smuzhiyun
1156*4882a593Smuzhiyun static
setup_page_debug(struct kmem_cache * s,struct page * page,void * addr)1157*4882a593Smuzhiyun void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
1158*4882a593Smuzhiyun {
1159*4882a593Smuzhiyun if (!kmem_cache_debug_flags(s, SLAB_POISON))
1160*4882a593Smuzhiyun return;
1161*4882a593Smuzhiyun
1162*4882a593Smuzhiyun metadata_access_enable();
1163*4882a593Smuzhiyun memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
1164*4882a593Smuzhiyun metadata_access_disable();
1165*4882a593Smuzhiyun }
1166*4882a593Smuzhiyun
alloc_consistency_checks(struct kmem_cache * s,struct page * page,void * object)1167*4882a593Smuzhiyun static inline int alloc_consistency_checks(struct kmem_cache *s,
1168*4882a593Smuzhiyun struct page *page, void *object)
1169*4882a593Smuzhiyun {
1170*4882a593Smuzhiyun if (!check_slab(s, page))
1171*4882a593Smuzhiyun return 0;
1172*4882a593Smuzhiyun
1173*4882a593Smuzhiyun if (!check_valid_pointer(s, page, object)) {
1174*4882a593Smuzhiyun object_err(s, page, object, "Freelist Pointer check fails");
1175*4882a593Smuzhiyun return 0;
1176*4882a593Smuzhiyun }
1177*4882a593Smuzhiyun
1178*4882a593Smuzhiyun if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1179*4882a593Smuzhiyun return 0;
1180*4882a593Smuzhiyun
1181*4882a593Smuzhiyun return 1;
1182*4882a593Smuzhiyun }
1183*4882a593Smuzhiyun
alloc_debug_processing(struct kmem_cache * s,struct page * page,void * object,unsigned long addr)1184*4882a593Smuzhiyun static noinline int alloc_debug_processing(struct kmem_cache *s,
1185*4882a593Smuzhiyun struct page *page,
1186*4882a593Smuzhiyun void *object, unsigned long addr)
1187*4882a593Smuzhiyun {
1188*4882a593Smuzhiyun if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1189*4882a593Smuzhiyun if (!alloc_consistency_checks(s, page, object))
1190*4882a593Smuzhiyun goto bad;
1191*4882a593Smuzhiyun }
1192*4882a593Smuzhiyun
1193*4882a593Smuzhiyun /* Success perform special debug activities for allocs */
1194*4882a593Smuzhiyun if (s->flags & SLAB_STORE_USER)
1195*4882a593Smuzhiyun set_track(s, object, TRACK_ALLOC, addr);
1196*4882a593Smuzhiyun trace(s, page, object, 1);
1197*4882a593Smuzhiyun init_object(s, object, SLUB_RED_ACTIVE);
1198*4882a593Smuzhiyun return 1;
1199*4882a593Smuzhiyun
1200*4882a593Smuzhiyun bad:
1201*4882a593Smuzhiyun if (PageSlab(page)) {
1202*4882a593Smuzhiyun /*
1203*4882a593Smuzhiyun * If this is a slab page then lets do the best we can
1204*4882a593Smuzhiyun * to avoid issues in the future. Marking all objects
1205*4882a593Smuzhiyun * as used avoids touching the remaining objects.
1206*4882a593Smuzhiyun */
1207*4882a593Smuzhiyun slab_fix(s, "Marking all objects used");
1208*4882a593Smuzhiyun page->inuse = page->objects;
1209*4882a593Smuzhiyun page->freelist = NULL;
1210*4882a593Smuzhiyun }
1211*4882a593Smuzhiyun return 0;
1212*4882a593Smuzhiyun }
1213*4882a593Smuzhiyun
free_consistency_checks(struct kmem_cache * s,struct page * page,void * object,unsigned long addr)1214*4882a593Smuzhiyun static inline int free_consistency_checks(struct kmem_cache *s,
1215*4882a593Smuzhiyun struct page *page, void *object, unsigned long addr)
1216*4882a593Smuzhiyun {
1217*4882a593Smuzhiyun if (!check_valid_pointer(s, page, object)) {
1218*4882a593Smuzhiyun slab_err(s, page, "Invalid object pointer 0x%p", object);
1219*4882a593Smuzhiyun return 0;
1220*4882a593Smuzhiyun }
1221*4882a593Smuzhiyun
1222*4882a593Smuzhiyun if (on_freelist(s, page, object)) {
1223*4882a593Smuzhiyun object_err(s, page, object, "Object already free");
1224*4882a593Smuzhiyun return 0;
1225*4882a593Smuzhiyun }
1226*4882a593Smuzhiyun
1227*4882a593Smuzhiyun if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1228*4882a593Smuzhiyun return 0;
1229*4882a593Smuzhiyun
1230*4882a593Smuzhiyun if (unlikely(s != page->slab_cache)) {
1231*4882a593Smuzhiyun if (!PageSlab(page)) {
1232*4882a593Smuzhiyun slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1233*4882a593Smuzhiyun object);
1234*4882a593Smuzhiyun } else if (!page->slab_cache) {
1235*4882a593Smuzhiyun pr_err("SLUB <none>: no slab for object 0x%p.\n",
1236*4882a593Smuzhiyun object);
1237*4882a593Smuzhiyun dump_stack();
1238*4882a593Smuzhiyun } else
1239*4882a593Smuzhiyun object_err(s, page, object,
1240*4882a593Smuzhiyun "page slab pointer corrupt.");
1241*4882a593Smuzhiyun return 0;
1242*4882a593Smuzhiyun }
1243*4882a593Smuzhiyun return 1;
1244*4882a593Smuzhiyun }
1245*4882a593Smuzhiyun
1246*4882a593Smuzhiyun /* Supports checking bulk free of a constructed freelist */
free_debug_processing(struct kmem_cache * s,struct page * page,void * head,void * tail,int bulk_cnt,unsigned long addr)1247*4882a593Smuzhiyun static noinline int free_debug_processing(
1248*4882a593Smuzhiyun struct kmem_cache *s, struct page *page,
1249*4882a593Smuzhiyun void *head, void *tail, int bulk_cnt,
1250*4882a593Smuzhiyun unsigned long addr)
1251*4882a593Smuzhiyun {
1252*4882a593Smuzhiyun struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1253*4882a593Smuzhiyun void *object = head;
1254*4882a593Smuzhiyun int cnt = 0;
1255*4882a593Smuzhiyun unsigned long flags;
1256*4882a593Smuzhiyun int ret = 0;
1257*4882a593Smuzhiyun
1258*4882a593Smuzhiyun spin_lock_irqsave(&n->list_lock, flags);
1259*4882a593Smuzhiyun slab_lock(page);
1260*4882a593Smuzhiyun
1261*4882a593Smuzhiyun if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1262*4882a593Smuzhiyun if (!check_slab(s, page))
1263*4882a593Smuzhiyun goto out;
1264*4882a593Smuzhiyun }
1265*4882a593Smuzhiyun
1266*4882a593Smuzhiyun next_object:
1267*4882a593Smuzhiyun cnt++;
1268*4882a593Smuzhiyun
1269*4882a593Smuzhiyun if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1270*4882a593Smuzhiyun if (!free_consistency_checks(s, page, object, addr))
1271*4882a593Smuzhiyun goto out;
1272*4882a593Smuzhiyun }
1273*4882a593Smuzhiyun
1274*4882a593Smuzhiyun if (s->flags & SLAB_STORE_USER)
1275*4882a593Smuzhiyun set_track(s, object, TRACK_FREE, addr);
1276*4882a593Smuzhiyun trace(s, page, object, 0);
1277*4882a593Smuzhiyun /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
1278*4882a593Smuzhiyun init_object(s, object, SLUB_RED_INACTIVE);
1279*4882a593Smuzhiyun
1280*4882a593Smuzhiyun /* Reached end of constructed freelist yet? */
1281*4882a593Smuzhiyun if (object != tail) {
1282*4882a593Smuzhiyun object = get_freepointer(s, object);
1283*4882a593Smuzhiyun goto next_object;
1284*4882a593Smuzhiyun }
1285*4882a593Smuzhiyun ret = 1;
1286*4882a593Smuzhiyun
1287*4882a593Smuzhiyun out:
1288*4882a593Smuzhiyun if (cnt != bulk_cnt)
1289*4882a593Smuzhiyun slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1290*4882a593Smuzhiyun bulk_cnt, cnt);
1291*4882a593Smuzhiyun
1292*4882a593Smuzhiyun slab_unlock(page);
1293*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
1294*4882a593Smuzhiyun if (!ret)
1295*4882a593Smuzhiyun slab_fix(s, "Object at 0x%p not freed", object);
1296*4882a593Smuzhiyun return ret;
1297*4882a593Smuzhiyun }
1298*4882a593Smuzhiyun
1299*4882a593Smuzhiyun /*
1300*4882a593Smuzhiyun * Parse a block of slub_debug options. Blocks are delimited by ';'
1301*4882a593Smuzhiyun *
1302*4882a593Smuzhiyun * @str: start of block
1303*4882a593Smuzhiyun * @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
1304*4882a593Smuzhiyun * @slabs: return start of list of slabs, or NULL when there's no list
1305*4882a593Smuzhiyun * @init: assume this is initial parsing and not per-kmem-create parsing
1306*4882a593Smuzhiyun *
1307*4882a593Smuzhiyun * returns the start of next block if there's any, or NULL
1308*4882a593Smuzhiyun */
1309*4882a593Smuzhiyun static char *
parse_slub_debug_flags(char * str,slab_flags_t * flags,char ** slabs,bool init)1310*4882a593Smuzhiyun parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
1311*4882a593Smuzhiyun {
1312*4882a593Smuzhiyun bool higher_order_disable = false;
1313*4882a593Smuzhiyun
1314*4882a593Smuzhiyun /* Skip any completely empty blocks */
1315*4882a593Smuzhiyun while (*str && *str == ';')
1316*4882a593Smuzhiyun str++;
1317*4882a593Smuzhiyun
1318*4882a593Smuzhiyun if (*str == ',') {
1319*4882a593Smuzhiyun /*
1320*4882a593Smuzhiyun * No options but restriction on slabs. This means full
1321*4882a593Smuzhiyun * debugging for slabs matching a pattern.
1322*4882a593Smuzhiyun */
1323*4882a593Smuzhiyun *flags = DEBUG_DEFAULT_FLAGS;
1324*4882a593Smuzhiyun goto check_slabs;
1325*4882a593Smuzhiyun }
1326*4882a593Smuzhiyun *flags = 0;
1327*4882a593Smuzhiyun
1328*4882a593Smuzhiyun /* Determine which debug features should be switched on */
1329*4882a593Smuzhiyun for (; *str && *str != ',' && *str != ';'; str++) {
1330*4882a593Smuzhiyun switch (tolower(*str)) {
1331*4882a593Smuzhiyun case '-':
1332*4882a593Smuzhiyun *flags = 0;
1333*4882a593Smuzhiyun break;
1334*4882a593Smuzhiyun case 'f':
1335*4882a593Smuzhiyun *flags |= SLAB_CONSISTENCY_CHECKS;
1336*4882a593Smuzhiyun break;
1337*4882a593Smuzhiyun case 'z':
1338*4882a593Smuzhiyun *flags |= SLAB_RED_ZONE;
1339*4882a593Smuzhiyun break;
1340*4882a593Smuzhiyun case 'p':
1341*4882a593Smuzhiyun *flags |= SLAB_POISON;
1342*4882a593Smuzhiyun break;
1343*4882a593Smuzhiyun case 'u':
1344*4882a593Smuzhiyun *flags |= SLAB_STORE_USER;
1345*4882a593Smuzhiyun break;
1346*4882a593Smuzhiyun case 't':
1347*4882a593Smuzhiyun *flags |= SLAB_TRACE;
1348*4882a593Smuzhiyun break;
1349*4882a593Smuzhiyun case 'a':
1350*4882a593Smuzhiyun *flags |= SLAB_FAILSLAB;
1351*4882a593Smuzhiyun break;
1352*4882a593Smuzhiyun case 'o':
1353*4882a593Smuzhiyun /*
1354*4882a593Smuzhiyun * Avoid enabling debugging on caches if its minimum
1355*4882a593Smuzhiyun * order would increase as a result.
1356*4882a593Smuzhiyun */
1357*4882a593Smuzhiyun higher_order_disable = true;
1358*4882a593Smuzhiyun break;
1359*4882a593Smuzhiyun default:
1360*4882a593Smuzhiyun if (init)
1361*4882a593Smuzhiyun pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1362*4882a593Smuzhiyun }
1363*4882a593Smuzhiyun }
1364*4882a593Smuzhiyun check_slabs:
1365*4882a593Smuzhiyun if (*str == ',')
1366*4882a593Smuzhiyun *slabs = ++str;
1367*4882a593Smuzhiyun else
1368*4882a593Smuzhiyun *slabs = NULL;
1369*4882a593Smuzhiyun
1370*4882a593Smuzhiyun /* Skip over the slab list */
1371*4882a593Smuzhiyun while (*str && *str != ';')
1372*4882a593Smuzhiyun str++;
1373*4882a593Smuzhiyun
1374*4882a593Smuzhiyun /* Skip any completely empty blocks */
1375*4882a593Smuzhiyun while (*str && *str == ';')
1376*4882a593Smuzhiyun str++;
1377*4882a593Smuzhiyun
1378*4882a593Smuzhiyun if (init && higher_order_disable)
1379*4882a593Smuzhiyun disable_higher_order_debug = 1;
1380*4882a593Smuzhiyun
1381*4882a593Smuzhiyun if (*str)
1382*4882a593Smuzhiyun return str;
1383*4882a593Smuzhiyun else
1384*4882a593Smuzhiyun return NULL;
1385*4882a593Smuzhiyun }
1386*4882a593Smuzhiyun
setup_slub_debug(char * str)1387*4882a593Smuzhiyun static int __init setup_slub_debug(char *str)
1388*4882a593Smuzhiyun {
1389*4882a593Smuzhiyun slab_flags_t flags;
1390*4882a593Smuzhiyun slab_flags_t global_flags;
1391*4882a593Smuzhiyun char *saved_str;
1392*4882a593Smuzhiyun char *slab_list;
1393*4882a593Smuzhiyun bool global_slub_debug_changed = false;
1394*4882a593Smuzhiyun bool slab_list_specified = false;
1395*4882a593Smuzhiyun
1396*4882a593Smuzhiyun global_flags = DEBUG_DEFAULT_FLAGS;
1397*4882a593Smuzhiyun if (*str++ != '=' || !*str)
1398*4882a593Smuzhiyun /*
1399*4882a593Smuzhiyun * No options specified. Switch on full debugging.
1400*4882a593Smuzhiyun */
1401*4882a593Smuzhiyun goto out;
1402*4882a593Smuzhiyun
1403*4882a593Smuzhiyun saved_str = str;
1404*4882a593Smuzhiyun while (str) {
1405*4882a593Smuzhiyun str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1406*4882a593Smuzhiyun
1407*4882a593Smuzhiyun if (!slab_list) {
1408*4882a593Smuzhiyun global_flags = flags;
1409*4882a593Smuzhiyun global_slub_debug_changed = true;
1410*4882a593Smuzhiyun } else {
1411*4882a593Smuzhiyun slab_list_specified = true;
1412*4882a593Smuzhiyun }
1413*4882a593Smuzhiyun }
1414*4882a593Smuzhiyun
1415*4882a593Smuzhiyun /*
1416*4882a593Smuzhiyun * For backwards compatibility, a single list of flags with list of
1417*4882a593Smuzhiyun * slabs means debugging is only changed for those slabs, so the global
1418*4882a593Smuzhiyun * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
1419*4882a593Smuzhiyun * on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
1420*4882a593Smuzhiyun * long as there is no option specifying flags without a slab list.
1421*4882a593Smuzhiyun */
1422*4882a593Smuzhiyun if (slab_list_specified) {
1423*4882a593Smuzhiyun if (!global_slub_debug_changed)
1424*4882a593Smuzhiyun global_flags = slub_debug;
1425*4882a593Smuzhiyun slub_debug_string = saved_str;
1426*4882a593Smuzhiyun }
1427*4882a593Smuzhiyun out:
1428*4882a593Smuzhiyun slub_debug = global_flags;
1429*4882a593Smuzhiyun if (slub_debug != 0 || slub_debug_string)
1430*4882a593Smuzhiyun static_branch_enable(&slub_debug_enabled);
1431*4882a593Smuzhiyun if ((static_branch_unlikely(&init_on_alloc) ||
1432*4882a593Smuzhiyun static_branch_unlikely(&init_on_free)) &&
1433*4882a593Smuzhiyun (slub_debug & SLAB_POISON))
1434*4882a593Smuzhiyun pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1435*4882a593Smuzhiyun return 1;
1436*4882a593Smuzhiyun }
1437*4882a593Smuzhiyun
1438*4882a593Smuzhiyun __setup("slub_debug", setup_slub_debug);
1439*4882a593Smuzhiyun
1440*4882a593Smuzhiyun /*
1441*4882a593Smuzhiyun * kmem_cache_flags - apply debugging options to the cache
1442*4882a593Smuzhiyun * @object_size: the size of an object without meta data
1443*4882a593Smuzhiyun * @flags: flags to set
1444*4882a593Smuzhiyun * @name: name of the cache
1445*4882a593Smuzhiyun *
1446*4882a593Smuzhiyun * Debug option(s) are applied to @flags. In addition to the debug
1447*4882a593Smuzhiyun * option(s), if a slab name (or multiple) is specified i.e.
1448*4882a593Smuzhiyun * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
1449*4882a593Smuzhiyun * then only the select slabs will receive the debug option(s).
1450*4882a593Smuzhiyun */
kmem_cache_flags(unsigned int object_size,slab_flags_t flags,const char * name)1451*4882a593Smuzhiyun slab_flags_t kmem_cache_flags(unsigned int object_size,
1452*4882a593Smuzhiyun slab_flags_t flags, const char *name)
1453*4882a593Smuzhiyun {
1454*4882a593Smuzhiyun char *iter;
1455*4882a593Smuzhiyun size_t len;
1456*4882a593Smuzhiyun char *next_block;
1457*4882a593Smuzhiyun slab_flags_t block_flags;
1458*4882a593Smuzhiyun
1459*4882a593Smuzhiyun len = strlen(name);
1460*4882a593Smuzhiyun next_block = slub_debug_string;
1461*4882a593Smuzhiyun /* Go through all blocks of debug options, see if any matches our slab's name */
1462*4882a593Smuzhiyun while (next_block) {
1463*4882a593Smuzhiyun next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1464*4882a593Smuzhiyun if (!iter)
1465*4882a593Smuzhiyun continue;
1466*4882a593Smuzhiyun /* Found a block that has a slab list, search it */
1467*4882a593Smuzhiyun while (*iter) {
1468*4882a593Smuzhiyun char *end, *glob;
1469*4882a593Smuzhiyun size_t cmplen;
1470*4882a593Smuzhiyun
1471*4882a593Smuzhiyun end = strchrnul(iter, ',');
1472*4882a593Smuzhiyun if (next_block && next_block < end)
1473*4882a593Smuzhiyun end = next_block - 1;
1474*4882a593Smuzhiyun
1475*4882a593Smuzhiyun glob = strnchr(iter, end - iter, '*');
1476*4882a593Smuzhiyun if (glob)
1477*4882a593Smuzhiyun cmplen = glob - iter;
1478*4882a593Smuzhiyun else
1479*4882a593Smuzhiyun cmplen = max_t(size_t, len, (end - iter));
1480*4882a593Smuzhiyun
1481*4882a593Smuzhiyun if (!strncmp(name, iter, cmplen)) {
1482*4882a593Smuzhiyun flags |= block_flags;
1483*4882a593Smuzhiyun return flags;
1484*4882a593Smuzhiyun }
1485*4882a593Smuzhiyun
1486*4882a593Smuzhiyun if (!*end || *end == ';')
1487*4882a593Smuzhiyun break;
1488*4882a593Smuzhiyun iter = end + 1;
1489*4882a593Smuzhiyun }
1490*4882a593Smuzhiyun }
1491*4882a593Smuzhiyun
1492*4882a593Smuzhiyun return flags | slub_debug;
1493*4882a593Smuzhiyun }
1494*4882a593Smuzhiyun #else /* !CONFIG_SLUB_DEBUG */
setup_object_debug(struct kmem_cache * s,struct page * page,void * object)1495*4882a593Smuzhiyun static inline void setup_object_debug(struct kmem_cache *s,
1496*4882a593Smuzhiyun struct page *page, void *object) {}
1497*4882a593Smuzhiyun static inline
setup_page_debug(struct kmem_cache * s,struct page * page,void * addr)1498*4882a593Smuzhiyun void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
1499*4882a593Smuzhiyun
alloc_debug_processing(struct kmem_cache * s,struct page * page,void * object,unsigned long addr)1500*4882a593Smuzhiyun static inline int alloc_debug_processing(struct kmem_cache *s,
1501*4882a593Smuzhiyun struct page *page, void *object, unsigned long addr) { return 0; }
1502*4882a593Smuzhiyun
free_debug_processing(struct kmem_cache * s,struct page * page,void * head,void * tail,int bulk_cnt,unsigned long addr)1503*4882a593Smuzhiyun static inline int free_debug_processing(
1504*4882a593Smuzhiyun struct kmem_cache *s, struct page *page,
1505*4882a593Smuzhiyun void *head, void *tail, int bulk_cnt,
1506*4882a593Smuzhiyun unsigned long addr) { return 0; }
1507*4882a593Smuzhiyun
slab_pad_check(struct kmem_cache * s,struct page * page)1508*4882a593Smuzhiyun static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1509*4882a593Smuzhiyun { return 1; }
check_object(struct kmem_cache * s,struct page * page,void * object,u8 val)1510*4882a593Smuzhiyun static inline int check_object(struct kmem_cache *s, struct page *page,
1511*4882a593Smuzhiyun void *object, u8 val) { return 1; }
add_full(struct kmem_cache * s,struct kmem_cache_node * n,struct page * page)1512*4882a593Smuzhiyun static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1513*4882a593Smuzhiyun struct page *page) {}
remove_full(struct kmem_cache * s,struct kmem_cache_node * n,struct page * page)1514*4882a593Smuzhiyun static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1515*4882a593Smuzhiyun struct page *page) {}
kmem_cache_flags(unsigned int object_size,slab_flags_t flags,const char * name)1516*4882a593Smuzhiyun slab_flags_t kmem_cache_flags(unsigned int object_size,
1517*4882a593Smuzhiyun slab_flags_t flags, const char *name)
1518*4882a593Smuzhiyun {
1519*4882a593Smuzhiyun return flags;
1520*4882a593Smuzhiyun }
1521*4882a593Smuzhiyun #define slub_debug 0
1522*4882a593Smuzhiyun
1523*4882a593Smuzhiyun #define disable_higher_order_debug 0
1524*4882a593Smuzhiyun
slabs_node(struct kmem_cache * s,int node)1525*4882a593Smuzhiyun static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1526*4882a593Smuzhiyun { return 0; }
node_nr_slabs(struct kmem_cache_node * n)1527*4882a593Smuzhiyun static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1528*4882a593Smuzhiyun { return 0; }
inc_slabs_node(struct kmem_cache * s,int node,int objects)1529*4882a593Smuzhiyun static inline void inc_slabs_node(struct kmem_cache *s, int node,
1530*4882a593Smuzhiyun int objects) {}
dec_slabs_node(struct kmem_cache * s,int node,int objects)1531*4882a593Smuzhiyun static inline void dec_slabs_node(struct kmem_cache *s, int node,
1532*4882a593Smuzhiyun int objects) {}
1533*4882a593Smuzhiyun
freelist_corrupted(struct kmem_cache * s,struct page * page,void ** freelist,void * nextfree)1534*4882a593Smuzhiyun static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
1535*4882a593Smuzhiyun void **freelist, void *nextfree)
1536*4882a593Smuzhiyun {
1537*4882a593Smuzhiyun return false;
1538*4882a593Smuzhiyun }
1539*4882a593Smuzhiyun #endif /* CONFIG_SLUB_DEBUG */
1540*4882a593Smuzhiyun
1541*4882a593Smuzhiyun /*
1542*4882a593Smuzhiyun * Hooks for other subsystems that check memory allocations. In a typical
1543*4882a593Smuzhiyun * production configuration these hooks all should produce no code at all.
1544*4882a593Smuzhiyun */
kmalloc_large_node_hook(void * ptr,size_t size,gfp_t flags)1545*4882a593Smuzhiyun static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1546*4882a593Smuzhiyun {
1547*4882a593Smuzhiyun ptr = kasan_kmalloc_large(ptr, size, flags);
1548*4882a593Smuzhiyun /* As ptr might get tagged, call kmemleak hook after KASAN. */
1549*4882a593Smuzhiyun kmemleak_alloc(ptr, size, 1, flags);
1550*4882a593Smuzhiyun return ptr;
1551*4882a593Smuzhiyun }
1552*4882a593Smuzhiyun
kfree_hook(void * x)1553*4882a593Smuzhiyun static __always_inline void kfree_hook(void *x)
1554*4882a593Smuzhiyun {
1555*4882a593Smuzhiyun kmemleak_free(x);
1556*4882a593Smuzhiyun kasan_kfree_large(x);
1557*4882a593Smuzhiyun }
1558*4882a593Smuzhiyun
slab_free_hook(struct kmem_cache * s,void * x,bool init)1559*4882a593Smuzhiyun static __always_inline bool slab_free_hook(struct kmem_cache *s,
1560*4882a593Smuzhiyun void *x, bool init)
1561*4882a593Smuzhiyun {
1562*4882a593Smuzhiyun kmemleak_free_recursive(x, s->flags);
1563*4882a593Smuzhiyun
1564*4882a593Smuzhiyun /*
1565*4882a593Smuzhiyun * Trouble is that we may no longer disable interrupts in the fast path
1566*4882a593Smuzhiyun * So in order to make the debug calls that expect irqs to be
1567*4882a593Smuzhiyun * disabled we need to disable interrupts temporarily.
1568*4882a593Smuzhiyun */
1569*4882a593Smuzhiyun #ifdef CONFIG_LOCKDEP
1570*4882a593Smuzhiyun {
1571*4882a593Smuzhiyun unsigned long flags;
1572*4882a593Smuzhiyun
1573*4882a593Smuzhiyun local_irq_save(flags);
1574*4882a593Smuzhiyun debug_check_no_locks_freed(x, s->object_size);
1575*4882a593Smuzhiyun local_irq_restore(flags);
1576*4882a593Smuzhiyun }
1577*4882a593Smuzhiyun #endif
1578*4882a593Smuzhiyun if (!(s->flags & SLAB_DEBUG_OBJECTS))
1579*4882a593Smuzhiyun debug_check_no_obj_freed(x, s->object_size);
1580*4882a593Smuzhiyun
1581*4882a593Smuzhiyun /* Use KCSAN to help debug racy use-after-free. */
1582*4882a593Smuzhiyun if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1583*4882a593Smuzhiyun __kcsan_check_access(x, s->object_size,
1584*4882a593Smuzhiyun KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
1585*4882a593Smuzhiyun
1586*4882a593Smuzhiyun /*
1587*4882a593Smuzhiyun * As memory initialization might be integrated into KASAN,
1588*4882a593Smuzhiyun * kasan_slab_free and initialization memset's must be
1589*4882a593Smuzhiyun * kept together to avoid discrepancies in behavior.
1590*4882a593Smuzhiyun *
1591*4882a593Smuzhiyun * The initialization memset's clear the object and the metadata,
1592*4882a593Smuzhiyun * but don't touch the SLAB redzone.
1593*4882a593Smuzhiyun */
1594*4882a593Smuzhiyun if (init) {
1595*4882a593Smuzhiyun int rsize;
1596*4882a593Smuzhiyun
1597*4882a593Smuzhiyun if (!kasan_has_integrated_init())
1598*4882a593Smuzhiyun memset(kasan_reset_tag(x), 0, s->object_size);
1599*4882a593Smuzhiyun rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
1600*4882a593Smuzhiyun memset((char *)kasan_reset_tag(x) + s->inuse, 0,
1601*4882a593Smuzhiyun s->size - s->inuse - rsize);
1602*4882a593Smuzhiyun }
1603*4882a593Smuzhiyun /* KASAN might put x into memory quarantine, delaying its reuse. */
1604*4882a593Smuzhiyun return kasan_slab_free(s, x, init);
1605*4882a593Smuzhiyun }
1606*4882a593Smuzhiyun
slab_free_freelist_hook(struct kmem_cache * s,void ** head,void ** tail,int * cnt)1607*4882a593Smuzhiyun static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1608*4882a593Smuzhiyun void **head, void **tail,
1609*4882a593Smuzhiyun int *cnt)
1610*4882a593Smuzhiyun {
1611*4882a593Smuzhiyun
1612*4882a593Smuzhiyun void *object;
1613*4882a593Smuzhiyun void *next = *head;
1614*4882a593Smuzhiyun void *old_tail = *tail ? *tail : *head;
1615*4882a593Smuzhiyun
1616*4882a593Smuzhiyun if (is_kfence_address(next)) {
1617*4882a593Smuzhiyun slab_free_hook(s, next, false);
1618*4882a593Smuzhiyun return true;
1619*4882a593Smuzhiyun }
1620*4882a593Smuzhiyun
1621*4882a593Smuzhiyun /* Head and tail of the reconstructed freelist */
1622*4882a593Smuzhiyun *head = NULL;
1623*4882a593Smuzhiyun *tail = NULL;
1624*4882a593Smuzhiyun
1625*4882a593Smuzhiyun do {
1626*4882a593Smuzhiyun object = next;
1627*4882a593Smuzhiyun next = get_freepointer(s, object);
1628*4882a593Smuzhiyun
1629*4882a593Smuzhiyun /* If object's reuse doesn't have to be delayed */
1630*4882a593Smuzhiyun if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
1631*4882a593Smuzhiyun /* Move object to the new freelist */
1632*4882a593Smuzhiyun set_freepointer(s, object, *head);
1633*4882a593Smuzhiyun *head = object;
1634*4882a593Smuzhiyun if (!*tail)
1635*4882a593Smuzhiyun *tail = object;
1636*4882a593Smuzhiyun } else {
1637*4882a593Smuzhiyun /*
1638*4882a593Smuzhiyun * Adjust the reconstructed freelist depth
1639*4882a593Smuzhiyun * accordingly if object's reuse is delayed.
1640*4882a593Smuzhiyun */
1641*4882a593Smuzhiyun --(*cnt);
1642*4882a593Smuzhiyun }
1643*4882a593Smuzhiyun } while (object != old_tail);
1644*4882a593Smuzhiyun
1645*4882a593Smuzhiyun if (*head == *tail)
1646*4882a593Smuzhiyun *tail = NULL;
1647*4882a593Smuzhiyun
1648*4882a593Smuzhiyun return *head != NULL;
1649*4882a593Smuzhiyun }
1650*4882a593Smuzhiyun
setup_object(struct kmem_cache * s,struct page * page,void * object)1651*4882a593Smuzhiyun static void *setup_object(struct kmem_cache *s, struct page *page,
1652*4882a593Smuzhiyun void *object)
1653*4882a593Smuzhiyun {
1654*4882a593Smuzhiyun setup_object_debug(s, page, object);
1655*4882a593Smuzhiyun object = kasan_init_slab_obj(s, object);
1656*4882a593Smuzhiyun if (unlikely(s->ctor)) {
1657*4882a593Smuzhiyun kasan_unpoison_object_data(s, object);
1658*4882a593Smuzhiyun s->ctor(object);
1659*4882a593Smuzhiyun kasan_poison_object_data(s, object);
1660*4882a593Smuzhiyun }
1661*4882a593Smuzhiyun return object;
1662*4882a593Smuzhiyun }
1663*4882a593Smuzhiyun
1664*4882a593Smuzhiyun /*
1665*4882a593Smuzhiyun * Slab allocation and freeing
1666*4882a593Smuzhiyun */
alloc_slab_page(struct kmem_cache * s,gfp_t flags,int node,struct kmem_cache_order_objects oo)1667*4882a593Smuzhiyun static inline struct page *alloc_slab_page(struct kmem_cache *s,
1668*4882a593Smuzhiyun gfp_t flags, int node, struct kmem_cache_order_objects oo)
1669*4882a593Smuzhiyun {
1670*4882a593Smuzhiyun struct page *page;
1671*4882a593Smuzhiyun unsigned int order = oo_order(oo);
1672*4882a593Smuzhiyun
1673*4882a593Smuzhiyun if (node == NUMA_NO_NODE)
1674*4882a593Smuzhiyun page = alloc_pages(flags, order);
1675*4882a593Smuzhiyun else
1676*4882a593Smuzhiyun page = __alloc_pages_node(node, flags, order);
1677*4882a593Smuzhiyun
1678*4882a593Smuzhiyun if (page)
1679*4882a593Smuzhiyun account_slab_page(page, order, s);
1680*4882a593Smuzhiyun
1681*4882a593Smuzhiyun return page;
1682*4882a593Smuzhiyun }
1683*4882a593Smuzhiyun
1684*4882a593Smuzhiyun #ifdef CONFIG_SLAB_FREELIST_RANDOM
1685*4882a593Smuzhiyun /* Pre-initialize the random sequence cache */
init_cache_random_seq(struct kmem_cache * s)1686*4882a593Smuzhiyun static int init_cache_random_seq(struct kmem_cache *s)
1687*4882a593Smuzhiyun {
1688*4882a593Smuzhiyun unsigned int count = oo_objects(s->oo);
1689*4882a593Smuzhiyun int err;
1690*4882a593Smuzhiyun
1691*4882a593Smuzhiyun /* Bailout if already initialised */
1692*4882a593Smuzhiyun if (s->random_seq)
1693*4882a593Smuzhiyun return 0;
1694*4882a593Smuzhiyun
1695*4882a593Smuzhiyun err = cache_random_seq_create(s, count, GFP_KERNEL);
1696*4882a593Smuzhiyun if (err) {
1697*4882a593Smuzhiyun pr_err("SLUB: Unable to initialize free list for %s\n",
1698*4882a593Smuzhiyun s->name);
1699*4882a593Smuzhiyun return err;
1700*4882a593Smuzhiyun }
1701*4882a593Smuzhiyun
1702*4882a593Smuzhiyun /* Transform to an offset on the set of pages */
1703*4882a593Smuzhiyun if (s->random_seq) {
1704*4882a593Smuzhiyun unsigned int i;
1705*4882a593Smuzhiyun
1706*4882a593Smuzhiyun for (i = 0; i < count; i++)
1707*4882a593Smuzhiyun s->random_seq[i] *= s->size;
1708*4882a593Smuzhiyun }
1709*4882a593Smuzhiyun return 0;
1710*4882a593Smuzhiyun }
1711*4882a593Smuzhiyun
1712*4882a593Smuzhiyun /* Initialize each random sequence freelist per cache */
init_freelist_randomization(void)1713*4882a593Smuzhiyun static void __init init_freelist_randomization(void)
1714*4882a593Smuzhiyun {
1715*4882a593Smuzhiyun struct kmem_cache *s;
1716*4882a593Smuzhiyun
1717*4882a593Smuzhiyun mutex_lock(&slab_mutex);
1718*4882a593Smuzhiyun
1719*4882a593Smuzhiyun list_for_each_entry(s, &slab_caches, list)
1720*4882a593Smuzhiyun init_cache_random_seq(s);
1721*4882a593Smuzhiyun
1722*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
1723*4882a593Smuzhiyun }
1724*4882a593Smuzhiyun
1725*4882a593Smuzhiyun /* Get the next entry on the pre-computed freelist randomized */
next_freelist_entry(struct kmem_cache * s,struct page * page,unsigned long * pos,void * start,unsigned long page_limit,unsigned long freelist_count)1726*4882a593Smuzhiyun static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1727*4882a593Smuzhiyun unsigned long *pos, void *start,
1728*4882a593Smuzhiyun unsigned long page_limit,
1729*4882a593Smuzhiyun unsigned long freelist_count)
1730*4882a593Smuzhiyun {
1731*4882a593Smuzhiyun unsigned int idx;
1732*4882a593Smuzhiyun
1733*4882a593Smuzhiyun /*
1734*4882a593Smuzhiyun * If the target page allocation failed, the number of objects on the
1735*4882a593Smuzhiyun * page might be smaller than the usual size defined by the cache.
1736*4882a593Smuzhiyun */
1737*4882a593Smuzhiyun do {
1738*4882a593Smuzhiyun idx = s->random_seq[*pos];
1739*4882a593Smuzhiyun *pos += 1;
1740*4882a593Smuzhiyun if (*pos >= freelist_count)
1741*4882a593Smuzhiyun *pos = 0;
1742*4882a593Smuzhiyun } while (unlikely(idx >= page_limit));
1743*4882a593Smuzhiyun
1744*4882a593Smuzhiyun return (char *)start + idx;
1745*4882a593Smuzhiyun }
1746*4882a593Smuzhiyun
1747*4882a593Smuzhiyun /* Shuffle the single linked freelist based on a random pre-computed sequence */
shuffle_freelist(struct kmem_cache * s,struct page * page)1748*4882a593Smuzhiyun static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1749*4882a593Smuzhiyun {
1750*4882a593Smuzhiyun void *start;
1751*4882a593Smuzhiyun void *cur;
1752*4882a593Smuzhiyun void *next;
1753*4882a593Smuzhiyun unsigned long idx, pos, page_limit, freelist_count;
1754*4882a593Smuzhiyun
1755*4882a593Smuzhiyun if (page->objects < 2 || !s->random_seq)
1756*4882a593Smuzhiyun return false;
1757*4882a593Smuzhiyun
1758*4882a593Smuzhiyun freelist_count = oo_objects(s->oo);
1759*4882a593Smuzhiyun pos = get_random_int() % freelist_count;
1760*4882a593Smuzhiyun
1761*4882a593Smuzhiyun page_limit = page->objects * s->size;
1762*4882a593Smuzhiyun start = fixup_red_left(s, page_address(page));
1763*4882a593Smuzhiyun
1764*4882a593Smuzhiyun /* First entry is used as the base of the freelist */
1765*4882a593Smuzhiyun cur = next_freelist_entry(s, page, &pos, start, page_limit,
1766*4882a593Smuzhiyun freelist_count);
1767*4882a593Smuzhiyun cur = setup_object(s, page, cur);
1768*4882a593Smuzhiyun page->freelist = cur;
1769*4882a593Smuzhiyun
1770*4882a593Smuzhiyun for (idx = 1; idx < page->objects; idx++) {
1771*4882a593Smuzhiyun next = next_freelist_entry(s, page, &pos, start, page_limit,
1772*4882a593Smuzhiyun freelist_count);
1773*4882a593Smuzhiyun next = setup_object(s, page, next);
1774*4882a593Smuzhiyun set_freepointer(s, cur, next);
1775*4882a593Smuzhiyun cur = next;
1776*4882a593Smuzhiyun }
1777*4882a593Smuzhiyun set_freepointer(s, cur, NULL);
1778*4882a593Smuzhiyun
1779*4882a593Smuzhiyun return true;
1780*4882a593Smuzhiyun }
1781*4882a593Smuzhiyun #else
init_cache_random_seq(struct kmem_cache * s)1782*4882a593Smuzhiyun static inline int init_cache_random_seq(struct kmem_cache *s)
1783*4882a593Smuzhiyun {
1784*4882a593Smuzhiyun return 0;
1785*4882a593Smuzhiyun }
init_freelist_randomization(void)1786*4882a593Smuzhiyun static inline void init_freelist_randomization(void) { }
shuffle_freelist(struct kmem_cache * s,struct page * page)1787*4882a593Smuzhiyun static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1788*4882a593Smuzhiyun {
1789*4882a593Smuzhiyun return false;
1790*4882a593Smuzhiyun }
1791*4882a593Smuzhiyun #endif /* CONFIG_SLAB_FREELIST_RANDOM */
1792*4882a593Smuzhiyun
allocate_slab(struct kmem_cache * s,gfp_t flags,int node)1793*4882a593Smuzhiyun static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1794*4882a593Smuzhiyun {
1795*4882a593Smuzhiyun struct page *page;
1796*4882a593Smuzhiyun struct kmem_cache_order_objects oo = s->oo;
1797*4882a593Smuzhiyun gfp_t alloc_gfp;
1798*4882a593Smuzhiyun void *start, *p, *next;
1799*4882a593Smuzhiyun int idx;
1800*4882a593Smuzhiyun bool shuffle;
1801*4882a593Smuzhiyun
1802*4882a593Smuzhiyun flags &= gfp_allowed_mask;
1803*4882a593Smuzhiyun
1804*4882a593Smuzhiyun if (gfpflags_allow_blocking(flags))
1805*4882a593Smuzhiyun local_irq_enable();
1806*4882a593Smuzhiyun
1807*4882a593Smuzhiyun flags |= s->allocflags;
1808*4882a593Smuzhiyun
1809*4882a593Smuzhiyun /*
1810*4882a593Smuzhiyun * Let the initial higher-order allocation fail under memory pressure
1811*4882a593Smuzhiyun * so we fall-back to the minimum order allocation.
1812*4882a593Smuzhiyun */
1813*4882a593Smuzhiyun alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1814*4882a593Smuzhiyun if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1815*4882a593Smuzhiyun alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1816*4882a593Smuzhiyun
1817*4882a593Smuzhiyun page = alloc_slab_page(s, alloc_gfp, node, oo);
1818*4882a593Smuzhiyun if (unlikely(!page)) {
1819*4882a593Smuzhiyun oo = s->min;
1820*4882a593Smuzhiyun alloc_gfp = flags;
1821*4882a593Smuzhiyun /*
1822*4882a593Smuzhiyun * Allocation may have failed due to fragmentation.
1823*4882a593Smuzhiyun * Try a lower order alloc if possible
1824*4882a593Smuzhiyun */
1825*4882a593Smuzhiyun page = alloc_slab_page(s, alloc_gfp, node, oo);
1826*4882a593Smuzhiyun if (unlikely(!page))
1827*4882a593Smuzhiyun goto out;
1828*4882a593Smuzhiyun stat(s, ORDER_FALLBACK);
1829*4882a593Smuzhiyun }
1830*4882a593Smuzhiyun
1831*4882a593Smuzhiyun page->objects = oo_objects(oo);
1832*4882a593Smuzhiyun
1833*4882a593Smuzhiyun page->slab_cache = s;
1834*4882a593Smuzhiyun __SetPageSlab(page);
1835*4882a593Smuzhiyun if (page_is_pfmemalloc(page))
1836*4882a593Smuzhiyun SetPageSlabPfmemalloc(page);
1837*4882a593Smuzhiyun
1838*4882a593Smuzhiyun kasan_poison_slab(page);
1839*4882a593Smuzhiyun
1840*4882a593Smuzhiyun start = page_address(page);
1841*4882a593Smuzhiyun
1842*4882a593Smuzhiyun setup_page_debug(s, page, start);
1843*4882a593Smuzhiyun
1844*4882a593Smuzhiyun shuffle = shuffle_freelist(s, page);
1845*4882a593Smuzhiyun
1846*4882a593Smuzhiyun if (!shuffle) {
1847*4882a593Smuzhiyun start = fixup_red_left(s, start);
1848*4882a593Smuzhiyun start = setup_object(s, page, start);
1849*4882a593Smuzhiyun page->freelist = start;
1850*4882a593Smuzhiyun for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1851*4882a593Smuzhiyun next = p + s->size;
1852*4882a593Smuzhiyun next = setup_object(s, page, next);
1853*4882a593Smuzhiyun set_freepointer(s, p, next);
1854*4882a593Smuzhiyun p = next;
1855*4882a593Smuzhiyun }
1856*4882a593Smuzhiyun set_freepointer(s, p, NULL);
1857*4882a593Smuzhiyun }
1858*4882a593Smuzhiyun
1859*4882a593Smuzhiyun page->inuse = page->objects;
1860*4882a593Smuzhiyun page->frozen = 1;
1861*4882a593Smuzhiyun
1862*4882a593Smuzhiyun out:
1863*4882a593Smuzhiyun if (gfpflags_allow_blocking(flags))
1864*4882a593Smuzhiyun local_irq_disable();
1865*4882a593Smuzhiyun if (!page)
1866*4882a593Smuzhiyun return NULL;
1867*4882a593Smuzhiyun
1868*4882a593Smuzhiyun inc_slabs_node(s, page_to_nid(page), page->objects);
1869*4882a593Smuzhiyun
1870*4882a593Smuzhiyun return page;
1871*4882a593Smuzhiyun }
1872*4882a593Smuzhiyun
new_slab(struct kmem_cache * s,gfp_t flags,int node)1873*4882a593Smuzhiyun static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1874*4882a593Smuzhiyun {
1875*4882a593Smuzhiyun if (unlikely(flags & GFP_SLAB_BUG_MASK))
1876*4882a593Smuzhiyun flags = kmalloc_fix_flags(flags);
1877*4882a593Smuzhiyun
1878*4882a593Smuzhiyun return allocate_slab(s,
1879*4882a593Smuzhiyun flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1880*4882a593Smuzhiyun }
1881*4882a593Smuzhiyun
__free_slab(struct kmem_cache * s,struct page * page)1882*4882a593Smuzhiyun static void __free_slab(struct kmem_cache *s, struct page *page)
1883*4882a593Smuzhiyun {
1884*4882a593Smuzhiyun int order = compound_order(page);
1885*4882a593Smuzhiyun int pages = 1 << order;
1886*4882a593Smuzhiyun
1887*4882a593Smuzhiyun if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
1888*4882a593Smuzhiyun void *p;
1889*4882a593Smuzhiyun
1890*4882a593Smuzhiyun slab_pad_check(s, page);
1891*4882a593Smuzhiyun for_each_object(p, s, page_address(page),
1892*4882a593Smuzhiyun page->objects)
1893*4882a593Smuzhiyun check_object(s, page, p, SLUB_RED_INACTIVE);
1894*4882a593Smuzhiyun }
1895*4882a593Smuzhiyun
1896*4882a593Smuzhiyun __ClearPageSlabPfmemalloc(page);
1897*4882a593Smuzhiyun __ClearPageSlab(page);
1898*4882a593Smuzhiyun
1899*4882a593Smuzhiyun page->mapping = NULL;
1900*4882a593Smuzhiyun if (current->reclaim_state)
1901*4882a593Smuzhiyun current->reclaim_state->reclaimed_slab += pages;
1902*4882a593Smuzhiyun unaccount_slab_page(page, order, s);
1903*4882a593Smuzhiyun __free_pages(page, order);
1904*4882a593Smuzhiyun }
1905*4882a593Smuzhiyun
rcu_free_slab(struct rcu_head * h)1906*4882a593Smuzhiyun static void rcu_free_slab(struct rcu_head *h)
1907*4882a593Smuzhiyun {
1908*4882a593Smuzhiyun struct page *page = container_of(h, struct page, rcu_head);
1909*4882a593Smuzhiyun
1910*4882a593Smuzhiyun __free_slab(page->slab_cache, page);
1911*4882a593Smuzhiyun }
1912*4882a593Smuzhiyun
free_slab(struct kmem_cache * s,struct page * page)1913*4882a593Smuzhiyun static void free_slab(struct kmem_cache *s, struct page *page)
1914*4882a593Smuzhiyun {
1915*4882a593Smuzhiyun if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1916*4882a593Smuzhiyun call_rcu(&page->rcu_head, rcu_free_slab);
1917*4882a593Smuzhiyun } else
1918*4882a593Smuzhiyun __free_slab(s, page);
1919*4882a593Smuzhiyun }
1920*4882a593Smuzhiyun
discard_slab(struct kmem_cache * s,struct page * page)1921*4882a593Smuzhiyun static void discard_slab(struct kmem_cache *s, struct page *page)
1922*4882a593Smuzhiyun {
1923*4882a593Smuzhiyun dec_slabs_node(s, page_to_nid(page), page->objects);
1924*4882a593Smuzhiyun free_slab(s, page);
1925*4882a593Smuzhiyun }
1926*4882a593Smuzhiyun
1927*4882a593Smuzhiyun /*
1928*4882a593Smuzhiyun * Management of partially allocated slabs.
1929*4882a593Smuzhiyun */
1930*4882a593Smuzhiyun static inline void
__add_partial(struct kmem_cache_node * n,struct page * page,int tail)1931*4882a593Smuzhiyun __add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1932*4882a593Smuzhiyun {
1933*4882a593Smuzhiyun n->nr_partial++;
1934*4882a593Smuzhiyun if (tail == DEACTIVATE_TO_TAIL)
1935*4882a593Smuzhiyun list_add_tail(&page->slab_list, &n->partial);
1936*4882a593Smuzhiyun else
1937*4882a593Smuzhiyun list_add(&page->slab_list, &n->partial);
1938*4882a593Smuzhiyun }
1939*4882a593Smuzhiyun
add_partial(struct kmem_cache_node * n,struct page * page,int tail)1940*4882a593Smuzhiyun static inline void add_partial(struct kmem_cache_node *n,
1941*4882a593Smuzhiyun struct page *page, int tail)
1942*4882a593Smuzhiyun {
1943*4882a593Smuzhiyun lockdep_assert_held(&n->list_lock);
1944*4882a593Smuzhiyun __add_partial(n, page, tail);
1945*4882a593Smuzhiyun }
1946*4882a593Smuzhiyun
remove_partial(struct kmem_cache_node * n,struct page * page)1947*4882a593Smuzhiyun static inline void remove_partial(struct kmem_cache_node *n,
1948*4882a593Smuzhiyun struct page *page)
1949*4882a593Smuzhiyun {
1950*4882a593Smuzhiyun lockdep_assert_held(&n->list_lock);
1951*4882a593Smuzhiyun list_del(&page->slab_list);
1952*4882a593Smuzhiyun n->nr_partial--;
1953*4882a593Smuzhiyun }
1954*4882a593Smuzhiyun
1955*4882a593Smuzhiyun /*
1956*4882a593Smuzhiyun * Remove slab from the partial list, freeze it and
1957*4882a593Smuzhiyun * return the pointer to the freelist.
1958*4882a593Smuzhiyun *
1959*4882a593Smuzhiyun * Returns a list of objects or NULL if it fails.
1960*4882a593Smuzhiyun */
acquire_slab(struct kmem_cache * s,struct kmem_cache_node * n,struct page * page,int mode,int * objects)1961*4882a593Smuzhiyun static inline void *acquire_slab(struct kmem_cache *s,
1962*4882a593Smuzhiyun struct kmem_cache_node *n, struct page *page,
1963*4882a593Smuzhiyun int mode, int *objects)
1964*4882a593Smuzhiyun {
1965*4882a593Smuzhiyun void *freelist;
1966*4882a593Smuzhiyun unsigned long counters;
1967*4882a593Smuzhiyun struct page new;
1968*4882a593Smuzhiyun
1969*4882a593Smuzhiyun lockdep_assert_held(&n->list_lock);
1970*4882a593Smuzhiyun
1971*4882a593Smuzhiyun /*
1972*4882a593Smuzhiyun * Zap the freelist and set the frozen bit.
1973*4882a593Smuzhiyun * The old freelist is the list of objects for the
1974*4882a593Smuzhiyun * per cpu allocation list.
1975*4882a593Smuzhiyun */
1976*4882a593Smuzhiyun freelist = page->freelist;
1977*4882a593Smuzhiyun counters = page->counters;
1978*4882a593Smuzhiyun new.counters = counters;
1979*4882a593Smuzhiyun *objects = new.objects - new.inuse;
1980*4882a593Smuzhiyun if (mode) {
1981*4882a593Smuzhiyun new.inuse = page->objects;
1982*4882a593Smuzhiyun new.freelist = NULL;
1983*4882a593Smuzhiyun } else {
1984*4882a593Smuzhiyun new.freelist = freelist;
1985*4882a593Smuzhiyun }
1986*4882a593Smuzhiyun
1987*4882a593Smuzhiyun VM_BUG_ON(new.frozen);
1988*4882a593Smuzhiyun new.frozen = 1;
1989*4882a593Smuzhiyun
1990*4882a593Smuzhiyun if (!__cmpxchg_double_slab(s, page,
1991*4882a593Smuzhiyun freelist, counters,
1992*4882a593Smuzhiyun new.freelist, new.counters,
1993*4882a593Smuzhiyun "acquire_slab"))
1994*4882a593Smuzhiyun return NULL;
1995*4882a593Smuzhiyun
1996*4882a593Smuzhiyun remove_partial(n, page);
1997*4882a593Smuzhiyun WARN_ON(!freelist);
1998*4882a593Smuzhiyun return freelist;
1999*4882a593Smuzhiyun }
2000*4882a593Smuzhiyun
2001*4882a593Smuzhiyun static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
2002*4882a593Smuzhiyun static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
2003*4882a593Smuzhiyun
2004*4882a593Smuzhiyun /*
2005*4882a593Smuzhiyun * Try to allocate a partial slab from a specific node.
2006*4882a593Smuzhiyun */
get_partial_node(struct kmem_cache * s,struct kmem_cache_node * n,struct kmem_cache_cpu * c,gfp_t flags)2007*4882a593Smuzhiyun static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
2008*4882a593Smuzhiyun struct kmem_cache_cpu *c, gfp_t flags)
2009*4882a593Smuzhiyun {
2010*4882a593Smuzhiyun struct page *page, *page2;
2011*4882a593Smuzhiyun void *object = NULL;
2012*4882a593Smuzhiyun unsigned int available = 0;
2013*4882a593Smuzhiyun int objects;
2014*4882a593Smuzhiyun
2015*4882a593Smuzhiyun /*
2016*4882a593Smuzhiyun * Racy check. If we mistakenly see no partial slabs then we
2017*4882a593Smuzhiyun * just allocate an empty slab. If we mistakenly try to get a
2018*4882a593Smuzhiyun * partial slab and there is none available then get_partial()
2019*4882a593Smuzhiyun * will return NULL.
2020*4882a593Smuzhiyun */
2021*4882a593Smuzhiyun if (!n || !n->nr_partial)
2022*4882a593Smuzhiyun return NULL;
2023*4882a593Smuzhiyun
2024*4882a593Smuzhiyun spin_lock(&n->list_lock);
2025*4882a593Smuzhiyun list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
2026*4882a593Smuzhiyun void *t;
2027*4882a593Smuzhiyun
2028*4882a593Smuzhiyun if (!pfmemalloc_match(page, flags))
2029*4882a593Smuzhiyun continue;
2030*4882a593Smuzhiyun
2031*4882a593Smuzhiyun t = acquire_slab(s, n, page, object == NULL, &objects);
2032*4882a593Smuzhiyun if (!t)
2033*4882a593Smuzhiyun break;
2034*4882a593Smuzhiyun
2035*4882a593Smuzhiyun available += objects;
2036*4882a593Smuzhiyun if (!object) {
2037*4882a593Smuzhiyun c->page = page;
2038*4882a593Smuzhiyun stat(s, ALLOC_FROM_PARTIAL);
2039*4882a593Smuzhiyun object = t;
2040*4882a593Smuzhiyun } else {
2041*4882a593Smuzhiyun put_cpu_partial(s, page, 0);
2042*4882a593Smuzhiyun stat(s, CPU_PARTIAL_NODE);
2043*4882a593Smuzhiyun }
2044*4882a593Smuzhiyun if (!kmem_cache_has_cpu_partial(s)
2045*4882a593Smuzhiyun || available > slub_cpu_partial(s) / 2)
2046*4882a593Smuzhiyun break;
2047*4882a593Smuzhiyun
2048*4882a593Smuzhiyun }
2049*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2050*4882a593Smuzhiyun return object;
2051*4882a593Smuzhiyun }
2052*4882a593Smuzhiyun
2053*4882a593Smuzhiyun /*
2054*4882a593Smuzhiyun * Get a page from somewhere. Search in increasing NUMA distances.
2055*4882a593Smuzhiyun */
get_any_partial(struct kmem_cache * s,gfp_t flags,struct kmem_cache_cpu * c)2056*4882a593Smuzhiyun static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
2057*4882a593Smuzhiyun struct kmem_cache_cpu *c)
2058*4882a593Smuzhiyun {
2059*4882a593Smuzhiyun #ifdef CONFIG_NUMA
2060*4882a593Smuzhiyun struct zonelist *zonelist;
2061*4882a593Smuzhiyun struct zoneref *z;
2062*4882a593Smuzhiyun struct zone *zone;
2063*4882a593Smuzhiyun enum zone_type highest_zoneidx = gfp_zone(flags);
2064*4882a593Smuzhiyun void *object;
2065*4882a593Smuzhiyun unsigned int cpuset_mems_cookie;
2066*4882a593Smuzhiyun
2067*4882a593Smuzhiyun /*
2068*4882a593Smuzhiyun * The defrag ratio allows a configuration of the tradeoffs between
2069*4882a593Smuzhiyun * inter node defragmentation and node local allocations. A lower
2070*4882a593Smuzhiyun * defrag_ratio increases the tendency to do local allocations
2071*4882a593Smuzhiyun * instead of attempting to obtain partial slabs from other nodes.
2072*4882a593Smuzhiyun *
2073*4882a593Smuzhiyun * If the defrag_ratio is set to 0 then kmalloc() always
2074*4882a593Smuzhiyun * returns node local objects. If the ratio is higher then kmalloc()
2075*4882a593Smuzhiyun * may return off node objects because partial slabs are obtained
2076*4882a593Smuzhiyun * from other nodes and filled up.
2077*4882a593Smuzhiyun *
2078*4882a593Smuzhiyun * If /sys/kernel/slab/xx/remote_node_defrag_ratio is set to 100
2079*4882a593Smuzhiyun * (which makes defrag_ratio = 1000) then every (well almost)
2080*4882a593Smuzhiyun * allocation will first attempt to defrag slab caches on other nodes.
2081*4882a593Smuzhiyun * This means scanning over all nodes to look for partial slabs which
2082*4882a593Smuzhiyun * may be expensive if we do it every time we are trying to find a slab
2083*4882a593Smuzhiyun * with available objects.
2084*4882a593Smuzhiyun */
2085*4882a593Smuzhiyun if (!s->remote_node_defrag_ratio ||
2086*4882a593Smuzhiyun get_cycles() % 1024 > s->remote_node_defrag_ratio)
2087*4882a593Smuzhiyun return NULL;
2088*4882a593Smuzhiyun
2089*4882a593Smuzhiyun do {
2090*4882a593Smuzhiyun cpuset_mems_cookie = read_mems_allowed_begin();
2091*4882a593Smuzhiyun zonelist = node_zonelist(mempolicy_slab_node(), flags);
2092*4882a593Smuzhiyun for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
2093*4882a593Smuzhiyun struct kmem_cache_node *n;
2094*4882a593Smuzhiyun
2095*4882a593Smuzhiyun n = get_node(s, zone_to_nid(zone));
2096*4882a593Smuzhiyun
2097*4882a593Smuzhiyun if (n && cpuset_zone_allowed(zone, flags) &&
2098*4882a593Smuzhiyun n->nr_partial > s->min_partial) {
2099*4882a593Smuzhiyun object = get_partial_node(s, n, c, flags);
2100*4882a593Smuzhiyun if (object) {
2101*4882a593Smuzhiyun /*
2102*4882a593Smuzhiyun * Don't check read_mems_allowed_retry()
2103*4882a593Smuzhiyun * here - if mems_allowed was updated in
2104*4882a593Smuzhiyun * parallel, that was a harmless race
2105*4882a593Smuzhiyun * between allocation and the cpuset
2106*4882a593Smuzhiyun * update
2107*4882a593Smuzhiyun */
2108*4882a593Smuzhiyun return object;
2109*4882a593Smuzhiyun }
2110*4882a593Smuzhiyun }
2111*4882a593Smuzhiyun }
2112*4882a593Smuzhiyun } while (read_mems_allowed_retry(cpuset_mems_cookie));
2113*4882a593Smuzhiyun #endif /* CONFIG_NUMA */
2114*4882a593Smuzhiyun return NULL;
2115*4882a593Smuzhiyun }
2116*4882a593Smuzhiyun
2117*4882a593Smuzhiyun /*
2118*4882a593Smuzhiyun * Get a partial page, lock it and return it.
2119*4882a593Smuzhiyun */
get_partial(struct kmem_cache * s,gfp_t flags,int node,struct kmem_cache_cpu * c)2120*4882a593Smuzhiyun static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
2121*4882a593Smuzhiyun struct kmem_cache_cpu *c)
2122*4882a593Smuzhiyun {
2123*4882a593Smuzhiyun void *object;
2124*4882a593Smuzhiyun int searchnode = node;
2125*4882a593Smuzhiyun
2126*4882a593Smuzhiyun if (node == NUMA_NO_NODE)
2127*4882a593Smuzhiyun searchnode = numa_mem_id();
2128*4882a593Smuzhiyun
2129*4882a593Smuzhiyun object = get_partial_node(s, get_node(s, searchnode), c, flags);
2130*4882a593Smuzhiyun if (object || node != NUMA_NO_NODE)
2131*4882a593Smuzhiyun return object;
2132*4882a593Smuzhiyun
2133*4882a593Smuzhiyun return get_any_partial(s, flags, c);
2134*4882a593Smuzhiyun }
2135*4882a593Smuzhiyun
2136*4882a593Smuzhiyun #ifdef CONFIG_PREEMPTION
2137*4882a593Smuzhiyun /*
2138*4882a593Smuzhiyun * Calculate the next globally unique transaction for disambiguation
2139*4882a593Smuzhiyun * during cmpxchg. The transactions start with the cpu number and are then
2140*4882a593Smuzhiyun * incremented by CONFIG_NR_CPUS.
2141*4882a593Smuzhiyun */
2142*4882a593Smuzhiyun #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2143*4882a593Smuzhiyun #else
2144*4882a593Smuzhiyun /*
2145*4882a593Smuzhiyun * No preemption supported therefore also no need to check for
2146*4882a593Smuzhiyun * different cpus.
2147*4882a593Smuzhiyun */
2148*4882a593Smuzhiyun #define TID_STEP 1
2149*4882a593Smuzhiyun #endif
2150*4882a593Smuzhiyun
next_tid(unsigned long tid)2151*4882a593Smuzhiyun static inline unsigned long next_tid(unsigned long tid)
2152*4882a593Smuzhiyun {
2153*4882a593Smuzhiyun return tid + TID_STEP;
2154*4882a593Smuzhiyun }
2155*4882a593Smuzhiyun
2156*4882a593Smuzhiyun #ifdef SLUB_DEBUG_CMPXCHG
tid_to_cpu(unsigned long tid)2157*4882a593Smuzhiyun static inline unsigned int tid_to_cpu(unsigned long tid)
2158*4882a593Smuzhiyun {
2159*4882a593Smuzhiyun return tid % TID_STEP;
2160*4882a593Smuzhiyun }
2161*4882a593Smuzhiyun
tid_to_event(unsigned long tid)2162*4882a593Smuzhiyun static inline unsigned long tid_to_event(unsigned long tid)
2163*4882a593Smuzhiyun {
2164*4882a593Smuzhiyun return tid / TID_STEP;
2165*4882a593Smuzhiyun }
2166*4882a593Smuzhiyun #endif
2167*4882a593Smuzhiyun
init_tid(int cpu)2168*4882a593Smuzhiyun static inline unsigned int init_tid(int cpu)
2169*4882a593Smuzhiyun {
2170*4882a593Smuzhiyun return cpu;
2171*4882a593Smuzhiyun }
2172*4882a593Smuzhiyun
note_cmpxchg_failure(const char * n,const struct kmem_cache * s,unsigned long tid)2173*4882a593Smuzhiyun static inline void note_cmpxchg_failure(const char *n,
2174*4882a593Smuzhiyun const struct kmem_cache *s, unsigned long tid)
2175*4882a593Smuzhiyun {
2176*4882a593Smuzhiyun #ifdef SLUB_DEBUG_CMPXCHG
2177*4882a593Smuzhiyun unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2178*4882a593Smuzhiyun
2179*4882a593Smuzhiyun pr_info("%s %s: cmpxchg redo ", n, s->name);
2180*4882a593Smuzhiyun
2181*4882a593Smuzhiyun #ifdef CONFIG_PREEMPTION
2182*4882a593Smuzhiyun if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2183*4882a593Smuzhiyun pr_warn("due to cpu change %d -> %d\n",
2184*4882a593Smuzhiyun tid_to_cpu(tid), tid_to_cpu(actual_tid));
2185*4882a593Smuzhiyun else
2186*4882a593Smuzhiyun #endif
2187*4882a593Smuzhiyun if (tid_to_event(tid) != tid_to_event(actual_tid))
2188*4882a593Smuzhiyun pr_warn("due to cpu running other code. Event %ld->%ld\n",
2189*4882a593Smuzhiyun tid_to_event(tid), tid_to_event(actual_tid));
2190*4882a593Smuzhiyun else
2191*4882a593Smuzhiyun pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2192*4882a593Smuzhiyun actual_tid, tid, next_tid(tid));
2193*4882a593Smuzhiyun #endif
2194*4882a593Smuzhiyun stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2195*4882a593Smuzhiyun }
2196*4882a593Smuzhiyun
init_kmem_cache_cpus(struct kmem_cache * s)2197*4882a593Smuzhiyun static void init_kmem_cache_cpus(struct kmem_cache *s)
2198*4882a593Smuzhiyun {
2199*4882a593Smuzhiyun int cpu;
2200*4882a593Smuzhiyun
2201*4882a593Smuzhiyun for_each_possible_cpu(cpu)
2202*4882a593Smuzhiyun per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2203*4882a593Smuzhiyun }
2204*4882a593Smuzhiyun
2205*4882a593Smuzhiyun /*
2206*4882a593Smuzhiyun * Remove the cpu slab
2207*4882a593Smuzhiyun */
deactivate_slab(struct kmem_cache * s,struct page * page,void * freelist,struct kmem_cache_cpu * c)2208*4882a593Smuzhiyun static void deactivate_slab(struct kmem_cache *s, struct page *page,
2209*4882a593Smuzhiyun void *freelist, struct kmem_cache_cpu *c)
2210*4882a593Smuzhiyun {
2211*4882a593Smuzhiyun enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2212*4882a593Smuzhiyun struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2213*4882a593Smuzhiyun int lock = 0;
2214*4882a593Smuzhiyun enum slab_modes l = M_NONE, m = M_NONE;
2215*4882a593Smuzhiyun void *nextfree;
2216*4882a593Smuzhiyun int tail = DEACTIVATE_TO_HEAD;
2217*4882a593Smuzhiyun struct page new;
2218*4882a593Smuzhiyun struct page old;
2219*4882a593Smuzhiyun
2220*4882a593Smuzhiyun if (page->freelist) {
2221*4882a593Smuzhiyun stat(s, DEACTIVATE_REMOTE_FREES);
2222*4882a593Smuzhiyun tail = DEACTIVATE_TO_TAIL;
2223*4882a593Smuzhiyun }
2224*4882a593Smuzhiyun
2225*4882a593Smuzhiyun /*
2226*4882a593Smuzhiyun * Stage one: Free all available per cpu objects back
2227*4882a593Smuzhiyun * to the page freelist while it is still frozen. Leave the
2228*4882a593Smuzhiyun * last one.
2229*4882a593Smuzhiyun *
2230*4882a593Smuzhiyun * There is no need to take the list->lock because the page
2231*4882a593Smuzhiyun * is still frozen.
2232*4882a593Smuzhiyun */
2233*4882a593Smuzhiyun while (freelist && (nextfree = get_freepointer(s, freelist))) {
2234*4882a593Smuzhiyun void *prior;
2235*4882a593Smuzhiyun unsigned long counters;
2236*4882a593Smuzhiyun
2237*4882a593Smuzhiyun /*
2238*4882a593Smuzhiyun * If 'nextfree' is invalid, it is possible that the object at
2239*4882a593Smuzhiyun * 'freelist' is already corrupted. So isolate all objects
2240*4882a593Smuzhiyun * starting at 'freelist'.
2241*4882a593Smuzhiyun */
2242*4882a593Smuzhiyun if (freelist_corrupted(s, page, &freelist, nextfree))
2243*4882a593Smuzhiyun break;
2244*4882a593Smuzhiyun
2245*4882a593Smuzhiyun do {
2246*4882a593Smuzhiyun prior = page->freelist;
2247*4882a593Smuzhiyun counters = page->counters;
2248*4882a593Smuzhiyun set_freepointer(s, freelist, prior);
2249*4882a593Smuzhiyun new.counters = counters;
2250*4882a593Smuzhiyun new.inuse--;
2251*4882a593Smuzhiyun VM_BUG_ON(!new.frozen);
2252*4882a593Smuzhiyun
2253*4882a593Smuzhiyun } while (!__cmpxchg_double_slab(s, page,
2254*4882a593Smuzhiyun prior, counters,
2255*4882a593Smuzhiyun freelist, new.counters,
2256*4882a593Smuzhiyun "drain percpu freelist"));
2257*4882a593Smuzhiyun
2258*4882a593Smuzhiyun freelist = nextfree;
2259*4882a593Smuzhiyun }
2260*4882a593Smuzhiyun
2261*4882a593Smuzhiyun /*
2262*4882a593Smuzhiyun * Stage two: Ensure that the page is unfrozen while the
2263*4882a593Smuzhiyun * list presence reflects the actual number of objects
2264*4882a593Smuzhiyun * during unfreeze.
2265*4882a593Smuzhiyun *
2266*4882a593Smuzhiyun * We setup the list membership and then perform a cmpxchg
2267*4882a593Smuzhiyun * with the count. If there is a mismatch then the page
2268*4882a593Smuzhiyun * is not unfrozen but the page is on the wrong list.
2269*4882a593Smuzhiyun *
2270*4882a593Smuzhiyun * Then we restart the process which may have to remove
2271*4882a593Smuzhiyun * the page from the list that we just put it on again
2272*4882a593Smuzhiyun * because the number of objects in the slab may have
2273*4882a593Smuzhiyun * changed.
2274*4882a593Smuzhiyun */
2275*4882a593Smuzhiyun redo:
2276*4882a593Smuzhiyun
2277*4882a593Smuzhiyun old.freelist = page->freelist;
2278*4882a593Smuzhiyun old.counters = page->counters;
2279*4882a593Smuzhiyun VM_BUG_ON(!old.frozen);
2280*4882a593Smuzhiyun
2281*4882a593Smuzhiyun /* Determine target state of the slab */
2282*4882a593Smuzhiyun new.counters = old.counters;
2283*4882a593Smuzhiyun if (freelist) {
2284*4882a593Smuzhiyun new.inuse--;
2285*4882a593Smuzhiyun set_freepointer(s, freelist, old.freelist);
2286*4882a593Smuzhiyun new.freelist = freelist;
2287*4882a593Smuzhiyun } else
2288*4882a593Smuzhiyun new.freelist = old.freelist;
2289*4882a593Smuzhiyun
2290*4882a593Smuzhiyun new.frozen = 0;
2291*4882a593Smuzhiyun
2292*4882a593Smuzhiyun if (!new.inuse && n->nr_partial >= s->min_partial)
2293*4882a593Smuzhiyun m = M_FREE;
2294*4882a593Smuzhiyun else if (new.freelist) {
2295*4882a593Smuzhiyun m = M_PARTIAL;
2296*4882a593Smuzhiyun if (!lock) {
2297*4882a593Smuzhiyun lock = 1;
2298*4882a593Smuzhiyun /*
2299*4882a593Smuzhiyun * Taking the spinlock removes the possibility
2300*4882a593Smuzhiyun * that acquire_slab() will see a slab page that
2301*4882a593Smuzhiyun * is frozen
2302*4882a593Smuzhiyun */
2303*4882a593Smuzhiyun spin_lock(&n->list_lock);
2304*4882a593Smuzhiyun }
2305*4882a593Smuzhiyun } else {
2306*4882a593Smuzhiyun m = M_FULL;
2307*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
2308*4882a593Smuzhiyun if ((s->flags & SLAB_STORE_USER) && !lock) {
2309*4882a593Smuzhiyun lock = 1;
2310*4882a593Smuzhiyun /*
2311*4882a593Smuzhiyun * This also ensures that the scanning of full
2312*4882a593Smuzhiyun * slabs from diagnostic functions will not see
2313*4882a593Smuzhiyun * any frozen slabs.
2314*4882a593Smuzhiyun */
2315*4882a593Smuzhiyun spin_lock(&n->list_lock);
2316*4882a593Smuzhiyun }
2317*4882a593Smuzhiyun #endif
2318*4882a593Smuzhiyun }
2319*4882a593Smuzhiyun
2320*4882a593Smuzhiyun if (l != m) {
2321*4882a593Smuzhiyun if (l == M_PARTIAL)
2322*4882a593Smuzhiyun remove_partial(n, page);
2323*4882a593Smuzhiyun else if (l == M_FULL)
2324*4882a593Smuzhiyun remove_full(s, n, page);
2325*4882a593Smuzhiyun
2326*4882a593Smuzhiyun if (m == M_PARTIAL)
2327*4882a593Smuzhiyun add_partial(n, page, tail);
2328*4882a593Smuzhiyun else if (m == M_FULL)
2329*4882a593Smuzhiyun add_full(s, n, page);
2330*4882a593Smuzhiyun }
2331*4882a593Smuzhiyun
2332*4882a593Smuzhiyun l = m;
2333*4882a593Smuzhiyun if (!__cmpxchg_double_slab(s, page,
2334*4882a593Smuzhiyun old.freelist, old.counters,
2335*4882a593Smuzhiyun new.freelist, new.counters,
2336*4882a593Smuzhiyun "unfreezing slab"))
2337*4882a593Smuzhiyun goto redo;
2338*4882a593Smuzhiyun
2339*4882a593Smuzhiyun if (lock)
2340*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2341*4882a593Smuzhiyun
2342*4882a593Smuzhiyun if (m == M_PARTIAL)
2343*4882a593Smuzhiyun stat(s, tail);
2344*4882a593Smuzhiyun else if (m == M_FULL)
2345*4882a593Smuzhiyun stat(s, DEACTIVATE_FULL);
2346*4882a593Smuzhiyun else if (m == M_FREE) {
2347*4882a593Smuzhiyun stat(s, DEACTIVATE_EMPTY);
2348*4882a593Smuzhiyun discard_slab(s, page);
2349*4882a593Smuzhiyun stat(s, FREE_SLAB);
2350*4882a593Smuzhiyun }
2351*4882a593Smuzhiyun
2352*4882a593Smuzhiyun c->page = NULL;
2353*4882a593Smuzhiyun c->freelist = NULL;
2354*4882a593Smuzhiyun c->tid = next_tid(c->tid);
2355*4882a593Smuzhiyun }
2356*4882a593Smuzhiyun
2357*4882a593Smuzhiyun /*
2358*4882a593Smuzhiyun * Unfreeze all the cpu partial slabs.
2359*4882a593Smuzhiyun *
2360*4882a593Smuzhiyun * This function must be called with interrupts disabled
2361*4882a593Smuzhiyun * for the cpu using c (or some other guarantee must be there
2362*4882a593Smuzhiyun * to guarantee no concurrent accesses).
2363*4882a593Smuzhiyun */
unfreeze_partials(struct kmem_cache * s,struct kmem_cache_cpu * c)2364*4882a593Smuzhiyun static void unfreeze_partials(struct kmem_cache *s,
2365*4882a593Smuzhiyun struct kmem_cache_cpu *c)
2366*4882a593Smuzhiyun {
2367*4882a593Smuzhiyun #ifdef CONFIG_SLUB_CPU_PARTIAL
2368*4882a593Smuzhiyun struct kmem_cache_node *n = NULL, *n2 = NULL;
2369*4882a593Smuzhiyun struct page *page, *discard_page = NULL;
2370*4882a593Smuzhiyun
2371*4882a593Smuzhiyun while ((page = slub_percpu_partial(c))) {
2372*4882a593Smuzhiyun struct page new;
2373*4882a593Smuzhiyun struct page old;
2374*4882a593Smuzhiyun
2375*4882a593Smuzhiyun slub_set_percpu_partial(c, page);
2376*4882a593Smuzhiyun
2377*4882a593Smuzhiyun n2 = get_node(s, page_to_nid(page));
2378*4882a593Smuzhiyun if (n != n2) {
2379*4882a593Smuzhiyun if (n)
2380*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2381*4882a593Smuzhiyun
2382*4882a593Smuzhiyun n = n2;
2383*4882a593Smuzhiyun spin_lock(&n->list_lock);
2384*4882a593Smuzhiyun }
2385*4882a593Smuzhiyun
2386*4882a593Smuzhiyun do {
2387*4882a593Smuzhiyun
2388*4882a593Smuzhiyun old.freelist = page->freelist;
2389*4882a593Smuzhiyun old.counters = page->counters;
2390*4882a593Smuzhiyun VM_BUG_ON(!old.frozen);
2391*4882a593Smuzhiyun
2392*4882a593Smuzhiyun new.counters = old.counters;
2393*4882a593Smuzhiyun new.freelist = old.freelist;
2394*4882a593Smuzhiyun
2395*4882a593Smuzhiyun new.frozen = 0;
2396*4882a593Smuzhiyun
2397*4882a593Smuzhiyun } while (!__cmpxchg_double_slab(s, page,
2398*4882a593Smuzhiyun old.freelist, old.counters,
2399*4882a593Smuzhiyun new.freelist, new.counters,
2400*4882a593Smuzhiyun "unfreezing slab"));
2401*4882a593Smuzhiyun
2402*4882a593Smuzhiyun if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2403*4882a593Smuzhiyun page->next = discard_page;
2404*4882a593Smuzhiyun discard_page = page;
2405*4882a593Smuzhiyun } else {
2406*4882a593Smuzhiyun add_partial(n, page, DEACTIVATE_TO_TAIL);
2407*4882a593Smuzhiyun stat(s, FREE_ADD_PARTIAL);
2408*4882a593Smuzhiyun }
2409*4882a593Smuzhiyun }
2410*4882a593Smuzhiyun
2411*4882a593Smuzhiyun if (n)
2412*4882a593Smuzhiyun spin_unlock(&n->list_lock);
2413*4882a593Smuzhiyun
2414*4882a593Smuzhiyun while (discard_page) {
2415*4882a593Smuzhiyun page = discard_page;
2416*4882a593Smuzhiyun discard_page = discard_page->next;
2417*4882a593Smuzhiyun
2418*4882a593Smuzhiyun stat(s, DEACTIVATE_EMPTY);
2419*4882a593Smuzhiyun discard_slab(s, page);
2420*4882a593Smuzhiyun stat(s, FREE_SLAB);
2421*4882a593Smuzhiyun }
2422*4882a593Smuzhiyun #endif /* CONFIG_SLUB_CPU_PARTIAL */
2423*4882a593Smuzhiyun }
2424*4882a593Smuzhiyun
2425*4882a593Smuzhiyun /*
2426*4882a593Smuzhiyun * Put a page that was just frozen (in __slab_free|get_partial_node) into a
2427*4882a593Smuzhiyun * partial page slot if available.
2428*4882a593Smuzhiyun *
2429*4882a593Smuzhiyun * If we did not find a slot then simply move all the partials to the
2430*4882a593Smuzhiyun * per node partial list.
2431*4882a593Smuzhiyun */
put_cpu_partial(struct kmem_cache * s,struct page * page,int drain)2432*4882a593Smuzhiyun static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2433*4882a593Smuzhiyun {
2434*4882a593Smuzhiyun #ifdef CONFIG_SLUB_CPU_PARTIAL
2435*4882a593Smuzhiyun struct page *oldpage;
2436*4882a593Smuzhiyun int pages;
2437*4882a593Smuzhiyun int pobjects;
2438*4882a593Smuzhiyun
2439*4882a593Smuzhiyun preempt_disable();
2440*4882a593Smuzhiyun do {
2441*4882a593Smuzhiyun pages = 0;
2442*4882a593Smuzhiyun pobjects = 0;
2443*4882a593Smuzhiyun oldpage = this_cpu_read(s->cpu_slab->partial);
2444*4882a593Smuzhiyun
2445*4882a593Smuzhiyun if (oldpage) {
2446*4882a593Smuzhiyun pobjects = oldpage->pobjects;
2447*4882a593Smuzhiyun pages = oldpage->pages;
2448*4882a593Smuzhiyun if (drain && pobjects > slub_cpu_partial(s)) {
2449*4882a593Smuzhiyun unsigned long flags;
2450*4882a593Smuzhiyun /*
2451*4882a593Smuzhiyun * partial array is full. Move the existing
2452*4882a593Smuzhiyun * set to the per node partial list.
2453*4882a593Smuzhiyun */
2454*4882a593Smuzhiyun local_irq_save(flags);
2455*4882a593Smuzhiyun unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2456*4882a593Smuzhiyun local_irq_restore(flags);
2457*4882a593Smuzhiyun oldpage = NULL;
2458*4882a593Smuzhiyun pobjects = 0;
2459*4882a593Smuzhiyun pages = 0;
2460*4882a593Smuzhiyun stat(s, CPU_PARTIAL_DRAIN);
2461*4882a593Smuzhiyun }
2462*4882a593Smuzhiyun }
2463*4882a593Smuzhiyun
2464*4882a593Smuzhiyun pages++;
2465*4882a593Smuzhiyun pobjects += page->objects - page->inuse;
2466*4882a593Smuzhiyun
2467*4882a593Smuzhiyun page->pages = pages;
2468*4882a593Smuzhiyun page->pobjects = pobjects;
2469*4882a593Smuzhiyun page->next = oldpage;
2470*4882a593Smuzhiyun
2471*4882a593Smuzhiyun } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2472*4882a593Smuzhiyun != oldpage);
2473*4882a593Smuzhiyun if (unlikely(!slub_cpu_partial(s))) {
2474*4882a593Smuzhiyun unsigned long flags;
2475*4882a593Smuzhiyun
2476*4882a593Smuzhiyun local_irq_save(flags);
2477*4882a593Smuzhiyun unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2478*4882a593Smuzhiyun local_irq_restore(flags);
2479*4882a593Smuzhiyun }
2480*4882a593Smuzhiyun preempt_enable();
2481*4882a593Smuzhiyun #endif /* CONFIG_SLUB_CPU_PARTIAL */
2482*4882a593Smuzhiyun }
2483*4882a593Smuzhiyun
flush_slab(struct kmem_cache * s,struct kmem_cache_cpu * c)2484*4882a593Smuzhiyun static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2485*4882a593Smuzhiyun {
2486*4882a593Smuzhiyun stat(s, CPUSLAB_FLUSH);
2487*4882a593Smuzhiyun deactivate_slab(s, c->page, c->freelist, c);
2488*4882a593Smuzhiyun }
2489*4882a593Smuzhiyun
2490*4882a593Smuzhiyun /*
2491*4882a593Smuzhiyun * Flush cpu slab.
2492*4882a593Smuzhiyun *
2493*4882a593Smuzhiyun * Called from IPI handler with interrupts disabled.
2494*4882a593Smuzhiyun */
__flush_cpu_slab(struct kmem_cache * s,int cpu)2495*4882a593Smuzhiyun static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2496*4882a593Smuzhiyun {
2497*4882a593Smuzhiyun struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2498*4882a593Smuzhiyun
2499*4882a593Smuzhiyun if (c->page)
2500*4882a593Smuzhiyun flush_slab(s, c);
2501*4882a593Smuzhiyun
2502*4882a593Smuzhiyun unfreeze_partials(s, c);
2503*4882a593Smuzhiyun }
2504*4882a593Smuzhiyun
flush_cpu_slab(void * d)2505*4882a593Smuzhiyun static void flush_cpu_slab(void *d)
2506*4882a593Smuzhiyun {
2507*4882a593Smuzhiyun struct kmem_cache *s = d;
2508*4882a593Smuzhiyun
2509*4882a593Smuzhiyun __flush_cpu_slab(s, smp_processor_id());
2510*4882a593Smuzhiyun }
2511*4882a593Smuzhiyun
has_cpu_slab(int cpu,void * info)2512*4882a593Smuzhiyun static bool has_cpu_slab(int cpu, void *info)
2513*4882a593Smuzhiyun {
2514*4882a593Smuzhiyun struct kmem_cache *s = info;
2515*4882a593Smuzhiyun struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2516*4882a593Smuzhiyun
2517*4882a593Smuzhiyun return c->page || slub_percpu_partial(c);
2518*4882a593Smuzhiyun }
2519*4882a593Smuzhiyun
flush_all(struct kmem_cache * s)2520*4882a593Smuzhiyun static void flush_all(struct kmem_cache *s)
2521*4882a593Smuzhiyun {
2522*4882a593Smuzhiyun on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
2523*4882a593Smuzhiyun }
2524*4882a593Smuzhiyun
2525*4882a593Smuzhiyun /*
2526*4882a593Smuzhiyun * Use the cpu notifier to insure that the cpu slabs are flushed when
2527*4882a593Smuzhiyun * necessary.
2528*4882a593Smuzhiyun */
slub_cpu_dead(unsigned int cpu)2529*4882a593Smuzhiyun static int slub_cpu_dead(unsigned int cpu)
2530*4882a593Smuzhiyun {
2531*4882a593Smuzhiyun struct kmem_cache *s;
2532*4882a593Smuzhiyun unsigned long flags;
2533*4882a593Smuzhiyun
2534*4882a593Smuzhiyun mutex_lock(&slab_mutex);
2535*4882a593Smuzhiyun list_for_each_entry(s, &slab_caches, list) {
2536*4882a593Smuzhiyun local_irq_save(flags);
2537*4882a593Smuzhiyun __flush_cpu_slab(s, cpu);
2538*4882a593Smuzhiyun local_irq_restore(flags);
2539*4882a593Smuzhiyun }
2540*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
2541*4882a593Smuzhiyun return 0;
2542*4882a593Smuzhiyun }
2543*4882a593Smuzhiyun
2544*4882a593Smuzhiyun /*
2545*4882a593Smuzhiyun * Check if the objects in a per cpu structure fit numa
2546*4882a593Smuzhiyun * locality expectations.
2547*4882a593Smuzhiyun */
node_match(struct page * page,int node)2548*4882a593Smuzhiyun static inline int node_match(struct page *page, int node)
2549*4882a593Smuzhiyun {
2550*4882a593Smuzhiyun #ifdef CONFIG_NUMA
2551*4882a593Smuzhiyun if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2552*4882a593Smuzhiyun return 0;
2553*4882a593Smuzhiyun #endif
2554*4882a593Smuzhiyun return 1;
2555*4882a593Smuzhiyun }
2556*4882a593Smuzhiyun
2557*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
count_free(struct page * page)2558*4882a593Smuzhiyun static int count_free(struct page *page)
2559*4882a593Smuzhiyun {
2560*4882a593Smuzhiyun return page->objects - page->inuse;
2561*4882a593Smuzhiyun }
2562*4882a593Smuzhiyun
node_nr_objs(struct kmem_cache_node * n)2563*4882a593Smuzhiyun static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2564*4882a593Smuzhiyun {
2565*4882a593Smuzhiyun return atomic_long_read(&n->total_objects);
2566*4882a593Smuzhiyun }
2567*4882a593Smuzhiyun #endif /* CONFIG_SLUB_DEBUG */
2568*4882a593Smuzhiyun
2569*4882a593Smuzhiyun #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SLUB_SYSFS)
count_partial(struct kmem_cache_node * n,int (* get_count)(struct page *))2570*4882a593Smuzhiyun static unsigned long count_partial(struct kmem_cache_node *n,
2571*4882a593Smuzhiyun int (*get_count)(struct page *))
2572*4882a593Smuzhiyun {
2573*4882a593Smuzhiyun unsigned long flags;
2574*4882a593Smuzhiyun unsigned long x = 0;
2575*4882a593Smuzhiyun struct page *page;
2576*4882a593Smuzhiyun
2577*4882a593Smuzhiyun spin_lock_irqsave(&n->list_lock, flags);
2578*4882a593Smuzhiyun list_for_each_entry(page, &n->partial, slab_list)
2579*4882a593Smuzhiyun x += get_count(page);
2580*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
2581*4882a593Smuzhiyun return x;
2582*4882a593Smuzhiyun }
2583*4882a593Smuzhiyun #endif /* CONFIG_SLUB_DEBUG || CONFIG_SLUB_SYSFS */
2584*4882a593Smuzhiyun
2585*4882a593Smuzhiyun static noinline void
slab_out_of_memory(struct kmem_cache * s,gfp_t gfpflags,int nid)2586*4882a593Smuzhiyun slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2587*4882a593Smuzhiyun {
2588*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
2589*4882a593Smuzhiyun static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2590*4882a593Smuzhiyun DEFAULT_RATELIMIT_BURST);
2591*4882a593Smuzhiyun int node;
2592*4882a593Smuzhiyun struct kmem_cache_node *n;
2593*4882a593Smuzhiyun
2594*4882a593Smuzhiyun if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2595*4882a593Smuzhiyun return;
2596*4882a593Smuzhiyun
2597*4882a593Smuzhiyun pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2598*4882a593Smuzhiyun nid, gfpflags, &gfpflags);
2599*4882a593Smuzhiyun pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2600*4882a593Smuzhiyun s->name, s->object_size, s->size, oo_order(s->oo),
2601*4882a593Smuzhiyun oo_order(s->min));
2602*4882a593Smuzhiyun
2603*4882a593Smuzhiyun if (oo_order(s->min) > get_order(s->object_size))
2604*4882a593Smuzhiyun pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2605*4882a593Smuzhiyun s->name);
2606*4882a593Smuzhiyun
2607*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
2608*4882a593Smuzhiyun unsigned long nr_slabs;
2609*4882a593Smuzhiyun unsigned long nr_objs;
2610*4882a593Smuzhiyun unsigned long nr_free;
2611*4882a593Smuzhiyun
2612*4882a593Smuzhiyun nr_free = count_partial(n, count_free);
2613*4882a593Smuzhiyun nr_slabs = node_nr_slabs(n);
2614*4882a593Smuzhiyun nr_objs = node_nr_objs(n);
2615*4882a593Smuzhiyun
2616*4882a593Smuzhiyun pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2617*4882a593Smuzhiyun node, nr_slabs, nr_objs, nr_free);
2618*4882a593Smuzhiyun }
2619*4882a593Smuzhiyun #endif
2620*4882a593Smuzhiyun }
2621*4882a593Smuzhiyun
new_slab_objects(struct kmem_cache * s,gfp_t flags,int node,struct kmem_cache_cpu ** pc)2622*4882a593Smuzhiyun static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2623*4882a593Smuzhiyun int node, struct kmem_cache_cpu **pc)
2624*4882a593Smuzhiyun {
2625*4882a593Smuzhiyun void *freelist;
2626*4882a593Smuzhiyun struct kmem_cache_cpu *c = *pc;
2627*4882a593Smuzhiyun struct page *page;
2628*4882a593Smuzhiyun
2629*4882a593Smuzhiyun WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2630*4882a593Smuzhiyun
2631*4882a593Smuzhiyun freelist = get_partial(s, flags, node, c);
2632*4882a593Smuzhiyun
2633*4882a593Smuzhiyun if (freelist)
2634*4882a593Smuzhiyun return freelist;
2635*4882a593Smuzhiyun
2636*4882a593Smuzhiyun page = new_slab(s, flags, node);
2637*4882a593Smuzhiyun if (page) {
2638*4882a593Smuzhiyun c = raw_cpu_ptr(s->cpu_slab);
2639*4882a593Smuzhiyun if (c->page)
2640*4882a593Smuzhiyun flush_slab(s, c);
2641*4882a593Smuzhiyun
2642*4882a593Smuzhiyun /*
2643*4882a593Smuzhiyun * No other reference to the page yet so we can
2644*4882a593Smuzhiyun * muck around with it freely without cmpxchg
2645*4882a593Smuzhiyun */
2646*4882a593Smuzhiyun freelist = page->freelist;
2647*4882a593Smuzhiyun page->freelist = NULL;
2648*4882a593Smuzhiyun
2649*4882a593Smuzhiyun stat(s, ALLOC_SLAB);
2650*4882a593Smuzhiyun c->page = page;
2651*4882a593Smuzhiyun *pc = c;
2652*4882a593Smuzhiyun }
2653*4882a593Smuzhiyun
2654*4882a593Smuzhiyun return freelist;
2655*4882a593Smuzhiyun }
2656*4882a593Smuzhiyun
pfmemalloc_match(struct page * page,gfp_t gfpflags)2657*4882a593Smuzhiyun static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2658*4882a593Smuzhiyun {
2659*4882a593Smuzhiyun if (unlikely(PageSlabPfmemalloc(page)))
2660*4882a593Smuzhiyun return gfp_pfmemalloc_allowed(gfpflags);
2661*4882a593Smuzhiyun
2662*4882a593Smuzhiyun return true;
2663*4882a593Smuzhiyun }
2664*4882a593Smuzhiyun
2665*4882a593Smuzhiyun /*
2666*4882a593Smuzhiyun * Check the page->freelist of a page and either transfer the freelist to the
2667*4882a593Smuzhiyun * per cpu freelist or deactivate the page.
2668*4882a593Smuzhiyun *
2669*4882a593Smuzhiyun * The page is still frozen if the return value is not NULL.
2670*4882a593Smuzhiyun *
2671*4882a593Smuzhiyun * If this function returns NULL then the page has been unfrozen.
2672*4882a593Smuzhiyun *
2673*4882a593Smuzhiyun * This function must be called with interrupt disabled.
2674*4882a593Smuzhiyun */
get_freelist(struct kmem_cache * s,struct page * page)2675*4882a593Smuzhiyun static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2676*4882a593Smuzhiyun {
2677*4882a593Smuzhiyun struct page new;
2678*4882a593Smuzhiyun unsigned long counters;
2679*4882a593Smuzhiyun void *freelist;
2680*4882a593Smuzhiyun
2681*4882a593Smuzhiyun do {
2682*4882a593Smuzhiyun freelist = page->freelist;
2683*4882a593Smuzhiyun counters = page->counters;
2684*4882a593Smuzhiyun
2685*4882a593Smuzhiyun new.counters = counters;
2686*4882a593Smuzhiyun VM_BUG_ON(!new.frozen);
2687*4882a593Smuzhiyun
2688*4882a593Smuzhiyun new.inuse = page->objects;
2689*4882a593Smuzhiyun new.frozen = freelist != NULL;
2690*4882a593Smuzhiyun
2691*4882a593Smuzhiyun } while (!__cmpxchg_double_slab(s, page,
2692*4882a593Smuzhiyun freelist, counters,
2693*4882a593Smuzhiyun NULL, new.counters,
2694*4882a593Smuzhiyun "get_freelist"));
2695*4882a593Smuzhiyun
2696*4882a593Smuzhiyun return freelist;
2697*4882a593Smuzhiyun }
2698*4882a593Smuzhiyun
2699*4882a593Smuzhiyun /*
2700*4882a593Smuzhiyun * Slow path. The lockless freelist is empty or we need to perform
2701*4882a593Smuzhiyun * debugging duties.
2702*4882a593Smuzhiyun *
2703*4882a593Smuzhiyun * Processing is still very fast if new objects have been freed to the
2704*4882a593Smuzhiyun * regular freelist. In that case we simply take over the regular freelist
2705*4882a593Smuzhiyun * as the lockless freelist and zap the regular freelist.
2706*4882a593Smuzhiyun *
2707*4882a593Smuzhiyun * If that is not working then we fall back to the partial lists. We take the
2708*4882a593Smuzhiyun * first element of the freelist as the object to allocate now and move the
2709*4882a593Smuzhiyun * rest of the freelist to the lockless freelist.
2710*4882a593Smuzhiyun *
2711*4882a593Smuzhiyun * And if we were unable to get a new slab from the partial slab lists then
2712*4882a593Smuzhiyun * we need to allocate a new slab. This is the slowest path since it involves
2713*4882a593Smuzhiyun * a call to the page allocator and the setup of a new slab.
2714*4882a593Smuzhiyun *
2715*4882a593Smuzhiyun * Version of __slab_alloc to use when we know that interrupts are
2716*4882a593Smuzhiyun * already disabled (which is the case for bulk allocation).
2717*4882a593Smuzhiyun */
___slab_alloc(struct kmem_cache * s,gfp_t gfpflags,int node,unsigned long addr,struct kmem_cache_cpu * c)2718*4882a593Smuzhiyun static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2719*4882a593Smuzhiyun unsigned long addr, struct kmem_cache_cpu *c)
2720*4882a593Smuzhiyun {
2721*4882a593Smuzhiyun void *freelist;
2722*4882a593Smuzhiyun struct page *page;
2723*4882a593Smuzhiyun
2724*4882a593Smuzhiyun stat(s, ALLOC_SLOWPATH);
2725*4882a593Smuzhiyun
2726*4882a593Smuzhiyun page = c->page;
2727*4882a593Smuzhiyun if (!page) {
2728*4882a593Smuzhiyun /*
2729*4882a593Smuzhiyun * if the node is not online or has no normal memory, just
2730*4882a593Smuzhiyun * ignore the node constraint
2731*4882a593Smuzhiyun */
2732*4882a593Smuzhiyun if (unlikely(node != NUMA_NO_NODE &&
2733*4882a593Smuzhiyun !node_state(node, N_NORMAL_MEMORY)))
2734*4882a593Smuzhiyun node = NUMA_NO_NODE;
2735*4882a593Smuzhiyun goto new_slab;
2736*4882a593Smuzhiyun }
2737*4882a593Smuzhiyun redo:
2738*4882a593Smuzhiyun
2739*4882a593Smuzhiyun if (unlikely(!node_match(page, node))) {
2740*4882a593Smuzhiyun /*
2741*4882a593Smuzhiyun * same as above but node_match() being false already
2742*4882a593Smuzhiyun * implies node != NUMA_NO_NODE
2743*4882a593Smuzhiyun */
2744*4882a593Smuzhiyun if (!node_state(node, N_NORMAL_MEMORY)) {
2745*4882a593Smuzhiyun node = NUMA_NO_NODE;
2746*4882a593Smuzhiyun goto redo;
2747*4882a593Smuzhiyun } else {
2748*4882a593Smuzhiyun stat(s, ALLOC_NODE_MISMATCH);
2749*4882a593Smuzhiyun deactivate_slab(s, page, c->freelist, c);
2750*4882a593Smuzhiyun goto new_slab;
2751*4882a593Smuzhiyun }
2752*4882a593Smuzhiyun }
2753*4882a593Smuzhiyun
2754*4882a593Smuzhiyun /*
2755*4882a593Smuzhiyun * By rights, we should be searching for a slab page that was
2756*4882a593Smuzhiyun * PFMEMALLOC but right now, we are losing the pfmemalloc
2757*4882a593Smuzhiyun * information when the page leaves the per-cpu allocator
2758*4882a593Smuzhiyun */
2759*4882a593Smuzhiyun if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2760*4882a593Smuzhiyun deactivate_slab(s, page, c->freelist, c);
2761*4882a593Smuzhiyun goto new_slab;
2762*4882a593Smuzhiyun }
2763*4882a593Smuzhiyun
2764*4882a593Smuzhiyun /* must check again c->freelist in case of cpu migration or IRQ */
2765*4882a593Smuzhiyun freelist = c->freelist;
2766*4882a593Smuzhiyun if (freelist)
2767*4882a593Smuzhiyun goto load_freelist;
2768*4882a593Smuzhiyun
2769*4882a593Smuzhiyun freelist = get_freelist(s, page);
2770*4882a593Smuzhiyun
2771*4882a593Smuzhiyun if (!freelist) {
2772*4882a593Smuzhiyun c->page = NULL;
2773*4882a593Smuzhiyun c->tid = next_tid(c->tid);
2774*4882a593Smuzhiyun stat(s, DEACTIVATE_BYPASS);
2775*4882a593Smuzhiyun goto new_slab;
2776*4882a593Smuzhiyun }
2777*4882a593Smuzhiyun
2778*4882a593Smuzhiyun stat(s, ALLOC_REFILL);
2779*4882a593Smuzhiyun
2780*4882a593Smuzhiyun load_freelist:
2781*4882a593Smuzhiyun /*
2782*4882a593Smuzhiyun * freelist is pointing to the list of objects to be used.
2783*4882a593Smuzhiyun * page is pointing to the page from which the objects are obtained.
2784*4882a593Smuzhiyun * That page must be frozen for per cpu allocations to work.
2785*4882a593Smuzhiyun */
2786*4882a593Smuzhiyun VM_BUG_ON(!c->page->frozen);
2787*4882a593Smuzhiyun c->freelist = get_freepointer(s, freelist);
2788*4882a593Smuzhiyun c->tid = next_tid(c->tid);
2789*4882a593Smuzhiyun return freelist;
2790*4882a593Smuzhiyun
2791*4882a593Smuzhiyun new_slab:
2792*4882a593Smuzhiyun
2793*4882a593Smuzhiyun if (slub_percpu_partial(c)) {
2794*4882a593Smuzhiyun page = c->page = slub_percpu_partial(c);
2795*4882a593Smuzhiyun slub_set_percpu_partial(c, page);
2796*4882a593Smuzhiyun stat(s, CPU_PARTIAL_ALLOC);
2797*4882a593Smuzhiyun goto redo;
2798*4882a593Smuzhiyun }
2799*4882a593Smuzhiyun
2800*4882a593Smuzhiyun freelist = new_slab_objects(s, gfpflags, node, &c);
2801*4882a593Smuzhiyun
2802*4882a593Smuzhiyun if (unlikely(!freelist)) {
2803*4882a593Smuzhiyun slab_out_of_memory(s, gfpflags, node);
2804*4882a593Smuzhiyun return NULL;
2805*4882a593Smuzhiyun }
2806*4882a593Smuzhiyun
2807*4882a593Smuzhiyun page = c->page;
2808*4882a593Smuzhiyun if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2809*4882a593Smuzhiyun goto load_freelist;
2810*4882a593Smuzhiyun
2811*4882a593Smuzhiyun /* Only entered in the debug case */
2812*4882a593Smuzhiyun if (kmem_cache_debug(s) &&
2813*4882a593Smuzhiyun !alloc_debug_processing(s, page, freelist, addr))
2814*4882a593Smuzhiyun goto new_slab; /* Slab failed checks. Next slab needed */
2815*4882a593Smuzhiyun
2816*4882a593Smuzhiyun deactivate_slab(s, page, get_freepointer(s, freelist), c);
2817*4882a593Smuzhiyun return freelist;
2818*4882a593Smuzhiyun }
2819*4882a593Smuzhiyun
2820*4882a593Smuzhiyun /*
2821*4882a593Smuzhiyun * Another one that disabled interrupt and compensates for possible
2822*4882a593Smuzhiyun * cpu changes by refetching the per cpu area pointer.
2823*4882a593Smuzhiyun */
__slab_alloc(struct kmem_cache * s,gfp_t gfpflags,int node,unsigned long addr,struct kmem_cache_cpu * c)2824*4882a593Smuzhiyun static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2825*4882a593Smuzhiyun unsigned long addr, struct kmem_cache_cpu *c)
2826*4882a593Smuzhiyun {
2827*4882a593Smuzhiyun void *p;
2828*4882a593Smuzhiyun unsigned long flags;
2829*4882a593Smuzhiyun
2830*4882a593Smuzhiyun local_irq_save(flags);
2831*4882a593Smuzhiyun #ifdef CONFIG_PREEMPTION
2832*4882a593Smuzhiyun /*
2833*4882a593Smuzhiyun * We may have been preempted and rescheduled on a different
2834*4882a593Smuzhiyun * cpu before disabling interrupts. Need to reload cpu area
2835*4882a593Smuzhiyun * pointer.
2836*4882a593Smuzhiyun */
2837*4882a593Smuzhiyun c = this_cpu_ptr(s->cpu_slab);
2838*4882a593Smuzhiyun #endif
2839*4882a593Smuzhiyun
2840*4882a593Smuzhiyun p = ___slab_alloc(s, gfpflags, node, addr, c);
2841*4882a593Smuzhiyun local_irq_restore(flags);
2842*4882a593Smuzhiyun return p;
2843*4882a593Smuzhiyun }
2844*4882a593Smuzhiyun
2845*4882a593Smuzhiyun /*
2846*4882a593Smuzhiyun * If the object has been wiped upon free, make sure it's fully initialized by
2847*4882a593Smuzhiyun * zeroing out freelist pointer.
2848*4882a593Smuzhiyun */
maybe_wipe_obj_freeptr(struct kmem_cache * s,void * obj)2849*4882a593Smuzhiyun static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
2850*4882a593Smuzhiyun void *obj)
2851*4882a593Smuzhiyun {
2852*4882a593Smuzhiyun if (unlikely(slab_want_init_on_free(s)) && obj)
2853*4882a593Smuzhiyun memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
2854*4882a593Smuzhiyun 0, sizeof(void *));
2855*4882a593Smuzhiyun }
2856*4882a593Smuzhiyun
2857*4882a593Smuzhiyun /*
2858*4882a593Smuzhiyun * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
2859*4882a593Smuzhiyun * have the fastpath folded into their functions. So no function call
2860*4882a593Smuzhiyun * overhead for requests that can be satisfied on the fastpath.
2861*4882a593Smuzhiyun *
2862*4882a593Smuzhiyun * The fastpath works by first checking if the lockless freelist can be used.
2863*4882a593Smuzhiyun * If not then __slab_alloc is called for slow processing.
2864*4882a593Smuzhiyun *
2865*4882a593Smuzhiyun * Otherwise we can simply pick the next object from the lockless free list.
2866*4882a593Smuzhiyun */
slab_alloc_node(struct kmem_cache * s,gfp_t gfpflags,int node,unsigned long addr,size_t orig_size)2867*4882a593Smuzhiyun static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2868*4882a593Smuzhiyun gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
2869*4882a593Smuzhiyun {
2870*4882a593Smuzhiyun void *object;
2871*4882a593Smuzhiyun struct kmem_cache_cpu *c;
2872*4882a593Smuzhiyun struct page *page;
2873*4882a593Smuzhiyun unsigned long tid;
2874*4882a593Smuzhiyun struct obj_cgroup *objcg = NULL;
2875*4882a593Smuzhiyun bool init = false;
2876*4882a593Smuzhiyun
2877*4882a593Smuzhiyun s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
2878*4882a593Smuzhiyun if (!s)
2879*4882a593Smuzhiyun return NULL;
2880*4882a593Smuzhiyun
2881*4882a593Smuzhiyun object = kfence_alloc(s, orig_size, gfpflags);
2882*4882a593Smuzhiyun if (unlikely(object))
2883*4882a593Smuzhiyun goto out;
2884*4882a593Smuzhiyun
2885*4882a593Smuzhiyun redo:
2886*4882a593Smuzhiyun /*
2887*4882a593Smuzhiyun * Must read kmem_cache cpu data via this cpu ptr. Preemption is
2888*4882a593Smuzhiyun * enabled. We may switch back and forth between cpus while
2889*4882a593Smuzhiyun * reading from one cpu area. That does not matter as long
2890*4882a593Smuzhiyun * as we end up on the original cpu again when doing the cmpxchg.
2891*4882a593Smuzhiyun *
2892*4882a593Smuzhiyun * We should guarantee that tid and kmem_cache are retrieved on
2893*4882a593Smuzhiyun * the same cpu. It could be different if CONFIG_PREEMPTION so we need
2894*4882a593Smuzhiyun * to check if it is matched or not.
2895*4882a593Smuzhiyun */
2896*4882a593Smuzhiyun do {
2897*4882a593Smuzhiyun tid = this_cpu_read(s->cpu_slab->tid);
2898*4882a593Smuzhiyun c = raw_cpu_ptr(s->cpu_slab);
2899*4882a593Smuzhiyun } while (IS_ENABLED(CONFIG_PREEMPTION) &&
2900*4882a593Smuzhiyun unlikely(tid != READ_ONCE(c->tid)));
2901*4882a593Smuzhiyun
2902*4882a593Smuzhiyun /*
2903*4882a593Smuzhiyun * Irqless object alloc/free algorithm used here depends on sequence
2904*4882a593Smuzhiyun * of fetching cpu_slab's data. tid should be fetched before anything
2905*4882a593Smuzhiyun * on c to guarantee that object and page associated with previous tid
2906*4882a593Smuzhiyun * won't be used with current tid. If we fetch tid first, object and
2907*4882a593Smuzhiyun * page could be one associated with next tid and our alloc/free
2908*4882a593Smuzhiyun * request will be failed. In this case, we will retry. So, no problem.
2909*4882a593Smuzhiyun */
2910*4882a593Smuzhiyun barrier();
2911*4882a593Smuzhiyun
2912*4882a593Smuzhiyun /*
2913*4882a593Smuzhiyun * The transaction ids are globally unique per cpu and per operation on
2914*4882a593Smuzhiyun * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
2915*4882a593Smuzhiyun * occurs on the right processor and that there was no operation on the
2916*4882a593Smuzhiyun * linked list in between.
2917*4882a593Smuzhiyun */
2918*4882a593Smuzhiyun
2919*4882a593Smuzhiyun object = c->freelist;
2920*4882a593Smuzhiyun page = c->page;
2921*4882a593Smuzhiyun if (unlikely(!object || !page || !node_match(page, node))) {
2922*4882a593Smuzhiyun object = __slab_alloc(s, gfpflags, node, addr, c);
2923*4882a593Smuzhiyun } else {
2924*4882a593Smuzhiyun void *next_object = get_freepointer_safe(s, object);
2925*4882a593Smuzhiyun
2926*4882a593Smuzhiyun /*
2927*4882a593Smuzhiyun * The cmpxchg will only match if there was no additional
2928*4882a593Smuzhiyun * operation and if we are on the right processor.
2929*4882a593Smuzhiyun *
2930*4882a593Smuzhiyun * The cmpxchg does the following atomically (without lock
2931*4882a593Smuzhiyun * semantics!)
2932*4882a593Smuzhiyun * 1. Relocate first pointer to the current per cpu area.
2933*4882a593Smuzhiyun * 2. Verify that tid and freelist have not been changed
2934*4882a593Smuzhiyun * 3. If they were not changed replace tid and freelist
2935*4882a593Smuzhiyun *
2936*4882a593Smuzhiyun * Since this is without lock semantics the protection is only
2937*4882a593Smuzhiyun * against code executing on this cpu *not* from access by
2938*4882a593Smuzhiyun * other cpus.
2939*4882a593Smuzhiyun */
2940*4882a593Smuzhiyun if (unlikely(!this_cpu_cmpxchg_double(
2941*4882a593Smuzhiyun s->cpu_slab->freelist, s->cpu_slab->tid,
2942*4882a593Smuzhiyun object, tid,
2943*4882a593Smuzhiyun next_object, next_tid(tid)))) {
2944*4882a593Smuzhiyun
2945*4882a593Smuzhiyun note_cmpxchg_failure("slab_alloc", s, tid);
2946*4882a593Smuzhiyun goto redo;
2947*4882a593Smuzhiyun }
2948*4882a593Smuzhiyun prefetch_freepointer(s, next_object);
2949*4882a593Smuzhiyun stat(s, ALLOC_FASTPATH);
2950*4882a593Smuzhiyun }
2951*4882a593Smuzhiyun
2952*4882a593Smuzhiyun maybe_wipe_obj_freeptr(s, object);
2953*4882a593Smuzhiyun init = slab_want_init_on_alloc(gfpflags, s);
2954*4882a593Smuzhiyun
2955*4882a593Smuzhiyun out:
2956*4882a593Smuzhiyun slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
2957*4882a593Smuzhiyun
2958*4882a593Smuzhiyun return object;
2959*4882a593Smuzhiyun }
2960*4882a593Smuzhiyun
slab_alloc(struct kmem_cache * s,gfp_t gfpflags,unsigned long addr,size_t orig_size)2961*4882a593Smuzhiyun static __always_inline void *slab_alloc(struct kmem_cache *s,
2962*4882a593Smuzhiyun gfp_t gfpflags, unsigned long addr, size_t orig_size)
2963*4882a593Smuzhiyun {
2964*4882a593Smuzhiyun return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
2965*4882a593Smuzhiyun }
2966*4882a593Smuzhiyun
kmem_cache_alloc(struct kmem_cache * s,gfp_t gfpflags)2967*4882a593Smuzhiyun void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2968*4882a593Smuzhiyun {
2969*4882a593Smuzhiyun void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
2970*4882a593Smuzhiyun
2971*4882a593Smuzhiyun trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2972*4882a593Smuzhiyun s->size, gfpflags);
2973*4882a593Smuzhiyun
2974*4882a593Smuzhiyun return ret;
2975*4882a593Smuzhiyun }
2976*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc);
2977*4882a593Smuzhiyun
2978*4882a593Smuzhiyun #ifdef CONFIG_TRACING
kmem_cache_alloc_trace(struct kmem_cache * s,gfp_t gfpflags,size_t size)2979*4882a593Smuzhiyun void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2980*4882a593Smuzhiyun {
2981*4882a593Smuzhiyun void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
2982*4882a593Smuzhiyun trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2983*4882a593Smuzhiyun ret = kasan_kmalloc(s, ret, size, gfpflags);
2984*4882a593Smuzhiyun return ret;
2985*4882a593Smuzhiyun }
2986*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_trace);
2987*4882a593Smuzhiyun #endif
2988*4882a593Smuzhiyun
2989*4882a593Smuzhiyun #ifdef CONFIG_NUMA
kmem_cache_alloc_node(struct kmem_cache * s,gfp_t gfpflags,int node)2990*4882a593Smuzhiyun void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2991*4882a593Smuzhiyun {
2992*4882a593Smuzhiyun void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
2993*4882a593Smuzhiyun
2994*4882a593Smuzhiyun trace_kmem_cache_alloc_node(_RET_IP_, ret,
2995*4882a593Smuzhiyun s->object_size, s->size, gfpflags, node);
2996*4882a593Smuzhiyun
2997*4882a593Smuzhiyun return ret;
2998*4882a593Smuzhiyun }
2999*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_node);
3000*4882a593Smuzhiyun
3001*4882a593Smuzhiyun #ifdef CONFIG_TRACING
kmem_cache_alloc_node_trace(struct kmem_cache * s,gfp_t gfpflags,int node,size_t size)3002*4882a593Smuzhiyun void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
3003*4882a593Smuzhiyun gfp_t gfpflags,
3004*4882a593Smuzhiyun int node, size_t size)
3005*4882a593Smuzhiyun {
3006*4882a593Smuzhiyun void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
3007*4882a593Smuzhiyun
3008*4882a593Smuzhiyun trace_kmalloc_node(_RET_IP_, ret,
3009*4882a593Smuzhiyun size, s->size, gfpflags, node);
3010*4882a593Smuzhiyun
3011*4882a593Smuzhiyun ret = kasan_kmalloc(s, ret, size, gfpflags);
3012*4882a593Smuzhiyun return ret;
3013*4882a593Smuzhiyun }
3014*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3015*4882a593Smuzhiyun #endif
3016*4882a593Smuzhiyun #endif /* CONFIG_NUMA */
3017*4882a593Smuzhiyun
3018*4882a593Smuzhiyun /*
3019*4882a593Smuzhiyun * Slow path handling. This may still be called frequently since objects
3020*4882a593Smuzhiyun * have a longer lifetime than the cpu slabs in most processing loads.
3021*4882a593Smuzhiyun *
3022*4882a593Smuzhiyun * So we still attempt to reduce cache line usage. Just take the slab
3023*4882a593Smuzhiyun * lock and free the item. If there is no additional partial page
3024*4882a593Smuzhiyun * handling required then we can return immediately.
3025*4882a593Smuzhiyun */
__slab_free(struct kmem_cache * s,struct page * page,void * head,void * tail,int cnt,unsigned long addr)3026*4882a593Smuzhiyun static void __slab_free(struct kmem_cache *s, struct page *page,
3027*4882a593Smuzhiyun void *head, void *tail, int cnt,
3028*4882a593Smuzhiyun unsigned long addr)
3029*4882a593Smuzhiyun
3030*4882a593Smuzhiyun {
3031*4882a593Smuzhiyun void *prior;
3032*4882a593Smuzhiyun int was_frozen;
3033*4882a593Smuzhiyun struct page new;
3034*4882a593Smuzhiyun unsigned long counters;
3035*4882a593Smuzhiyun struct kmem_cache_node *n = NULL;
3036*4882a593Smuzhiyun unsigned long flags;
3037*4882a593Smuzhiyun
3038*4882a593Smuzhiyun stat(s, FREE_SLOWPATH);
3039*4882a593Smuzhiyun
3040*4882a593Smuzhiyun if (kfence_free(head))
3041*4882a593Smuzhiyun return;
3042*4882a593Smuzhiyun
3043*4882a593Smuzhiyun if (kmem_cache_debug(s) &&
3044*4882a593Smuzhiyun !free_debug_processing(s, page, head, tail, cnt, addr))
3045*4882a593Smuzhiyun return;
3046*4882a593Smuzhiyun
3047*4882a593Smuzhiyun do {
3048*4882a593Smuzhiyun if (unlikely(n)) {
3049*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
3050*4882a593Smuzhiyun n = NULL;
3051*4882a593Smuzhiyun }
3052*4882a593Smuzhiyun prior = page->freelist;
3053*4882a593Smuzhiyun counters = page->counters;
3054*4882a593Smuzhiyun set_freepointer(s, tail, prior);
3055*4882a593Smuzhiyun new.counters = counters;
3056*4882a593Smuzhiyun was_frozen = new.frozen;
3057*4882a593Smuzhiyun new.inuse -= cnt;
3058*4882a593Smuzhiyun if ((!new.inuse || !prior) && !was_frozen) {
3059*4882a593Smuzhiyun
3060*4882a593Smuzhiyun if (kmem_cache_has_cpu_partial(s) && !prior) {
3061*4882a593Smuzhiyun
3062*4882a593Smuzhiyun /*
3063*4882a593Smuzhiyun * Slab was on no list before and will be
3064*4882a593Smuzhiyun * partially empty
3065*4882a593Smuzhiyun * We can defer the list move and instead
3066*4882a593Smuzhiyun * freeze it.
3067*4882a593Smuzhiyun */
3068*4882a593Smuzhiyun new.frozen = 1;
3069*4882a593Smuzhiyun
3070*4882a593Smuzhiyun } else { /* Needs to be taken off a list */
3071*4882a593Smuzhiyun
3072*4882a593Smuzhiyun n = get_node(s, page_to_nid(page));
3073*4882a593Smuzhiyun /*
3074*4882a593Smuzhiyun * Speculatively acquire the list_lock.
3075*4882a593Smuzhiyun * If the cmpxchg does not succeed then we may
3076*4882a593Smuzhiyun * drop the list_lock without any processing.
3077*4882a593Smuzhiyun *
3078*4882a593Smuzhiyun * Otherwise the list_lock will synchronize with
3079*4882a593Smuzhiyun * other processors updating the list of slabs.
3080*4882a593Smuzhiyun */
3081*4882a593Smuzhiyun spin_lock_irqsave(&n->list_lock, flags);
3082*4882a593Smuzhiyun
3083*4882a593Smuzhiyun }
3084*4882a593Smuzhiyun }
3085*4882a593Smuzhiyun
3086*4882a593Smuzhiyun } while (!cmpxchg_double_slab(s, page,
3087*4882a593Smuzhiyun prior, counters,
3088*4882a593Smuzhiyun head, new.counters,
3089*4882a593Smuzhiyun "__slab_free"));
3090*4882a593Smuzhiyun
3091*4882a593Smuzhiyun if (likely(!n)) {
3092*4882a593Smuzhiyun
3093*4882a593Smuzhiyun if (likely(was_frozen)) {
3094*4882a593Smuzhiyun /*
3095*4882a593Smuzhiyun * The list lock was not taken therefore no list
3096*4882a593Smuzhiyun * activity can be necessary.
3097*4882a593Smuzhiyun */
3098*4882a593Smuzhiyun stat(s, FREE_FROZEN);
3099*4882a593Smuzhiyun } else if (new.frozen) {
3100*4882a593Smuzhiyun /*
3101*4882a593Smuzhiyun * If we just froze the page then put it onto the
3102*4882a593Smuzhiyun * per cpu partial list.
3103*4882a593Smuzhiyun */
3104*4882a593Smuzhiyun put_cpu_partial(s, page, 1);
3105*4882a593Smuzhiyun stat(s, CPU_PARTIAL_FREE);
3106*4882a593Smuzhiyun }
3107*4882a593Smuzhiyun
3108*4882a593Smuzhiyun return;
3109*4882a593Smuzhiyun }
3110*4882a593Smuzhiyun
3111*4882a593Smuzhiyun if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
3112*4882a593Smuzhiyun goto slab_empty;
3113*4882a593Smuzhiyun
3114*4882a593Smuzhiyun /*
3115*4882a593Smuzhiyun * Objects left in the slab. If it was not on the partial list before
3116*4882a593Smuzhiyun * then add it.
3117*4882a593Smuzhiyun */
3118*4882a593Smuzhiyun if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3119*4882a593Smuzhiyun remove_full(s, n, page);
3120*4882a593Smuzhiyun add_partial(n, page, DEACTIVATE_TO_TAIL);
3121*4882a593Smuzhiyun stat(s, FREE_ADD_PARTIAL);
3122*4882a593Smuzhiyun }
3123*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
3124*4882a593Smuzhiyun return;
3125*4882a593Smuzhiyun
3126*4882a593Smuzhiyun slab_empty:
3127*4882a593Smuzhiyun if (prior) {
3128*4882a593Smuzhiyun /*
3129*4882a593Smuzhiyun * Slab on the partial list.
3130*4882a593Smuzhiyun */
3131*4882a593Smuzhiyun remove_partial(n, page);
3132*4882a593Smuzhiyun stat(s, FREE_REMOVE_PARTIAL);
3133*4882a593Smuzhiyun } else {
3134*4882a593Smuzhiyun /* Slab must be on the full list */
3135*4882a593Smuzhiyun remove_full(s, n, page);
3136*4882a593Smuzhiyun }
3137*4882a593Smuzhiyun
3138*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
3139*4882a593Smuzhiyun stat(s, FREE_SLAB);
3140*4882a593Smuzhiyun discard_slab(s, page);
3141*4882a593Smuzhiyun }
3142*4882a593Smuzhiyun
3143*4882a593Smuzhiyun /*
3144*4882a593Smuzhiyun * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
3145*4882a593Smuzhiyun * can perform fastpath freeing without additional function calls.
3146*4882a593Smuzhiyun *
3147*4882a593Smuzhiyun * The fastpath is only possible if we are freeing to the current cpu slab
3148*4882a593Smuzhiyun * of this processor. This typically the case if we have just allocated
3149*4882a593Smuzhiyun * the item before.
3150*4882a593Smuzhiyun *
3151*4882a593Smuzhiyun * If fastpath is not possible then fall back to __slab_free where we deal
3152*4882a593Smuzhiyun * with all sorts of special processing.
3153*4882a593Smuzhiyun *
3154*4882a593Smuzhiyun * Bulk free of a freelist with several objects (all pointing to the
3155*4882a593Smuzhiyun * same page) possible by specifying head and tail ptr, plus objects
3156*4882a593Smuzhiyun * count (cnt). Bulk free indicated by tail pointer being set.
3157*4882a593Smuzhiyun */
do_slab_free(struct kmem_cache * s,struct page * page,void * head,void * tail,int cnt,unsigned long addr)3158*4882a593Smuzhiyun static __always_inline void do_slab_free(struct kmem_cache *s,
3159*4882a593Smuzhiyun struct page *page, void *head, void *tail,
3160*4882a593Smuzhiyun int cnt, unsigned long addr)
3161*4882a593Smuzhiyun {
3162*4882a593Smuzhiyun void *tail_obj = tail ? : head;
3163*4882a593Smuzhiyun struct kmem_cache_cpu *c;
3164*4882a593Smuzhiyun unsigned long tid;
3165*4882a593Smuzhiyun
3166*4882a593Smuzhiyun /* memcg_slab_free_hook() is already called for bulk free. */
3167*4882a593Smuzhiyun if (!tail)
3168*4882a593Smuzhiyun memcg_slab_free_hook(s, &head, 1);
3169*4882a593Smuzhiyun redo:
3170*4882a593Smuzhiyun /*
3171*4882a593Smuzhiyun * Determine the currently cpus per cpu slab.
3172*4882a593Smuzhiyun * The cpu may change afterward. However that does not matter since
3173*4882a593Smuzhiyun * data is retrieved via this pointer. If we are on the same cpu
3174*4882a593Smuzhiyun * during the cmpxchg then the free will succeed.
3175*4882a593Smuzhiyun */
3176*4882a593Smuzhiyun do {
3177*4882a593Smuzhiyun tid = this_cpu_read(s->cpu_slab->tid);
3178*4882a593Smuzhiyun c = raw_cpu_ptr(s->cpu_slab);
3179*4882a593Smuzhiyun } while (IS_ENABLED(CONFIG_PREEMPTION) &&
3180*4882a593Smuzhiyun unlikely(tid != READ_ONCE(c->tid)));
3181*4882a593Smuzhiyun
3182*4882a593Smuzhiyun /* Same with comment on barrier() in slab_alloc_node() */
3183*4882a593Smuzhiyun barrier();
3184*4882a593Smuzhiyun
3185*4882a593Smuzhiyun if (likely(page == c->page)) {
3186*4882a593Smuzhiyun void **freelist = READ_ONCE(c->freelist);
3187*4882a593Smuzhiyun
3188*4882a593Smuzhiyun set_freepointer(s, tail_obj, freelist);
3189*4882a593Smuzhiyun
3190*4882a593Smuzhiyun if (unlikely(!this_cpu_cmpxchg_double(
3191*4882a593Smuzhiyun s->cpu_slab->freelist, s->cpu_slab->tid,
3192*4882a593Smuzhiyun freelist, tid,
3193*4882a593Smuzhiyun head, next_tid(tid)))) {
3194*4882a593Smuzhiyun
3195*4882a593Smuzhiyun note_cmpxchg_failure("slab_free", s, tid);
3196*4882a593Smuzhiyun goto redo;
3197*4882a593Smuzhiyun }
3198*4882a593Smuzhiyun stat(s, FREE_FASTPATH);
3199*4882a593Smuzhiyun } else
3200*4882a593Smuzhiyun __slab_free(s, page, head, tail_obj, cnt, addr);
3201*4882a593Smuzhiyun
3202*4882a593Smuzhiyun }
3203*4882a593Smuzhiyun
slab_free(struct kmem_cache * s,struct page * page,void * head,void * tail,int cnt,unsigned long addr)3204*4882a593Smuzhiyun static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3205*4882a593Smuzhiyun void *head, void *tail, int cnt,
3206*4882a593Smuzhiyun unsigned long addr)
3207*4882a593Smuzhiyun {
3208*4882a593Smuzhiyun /*
3209*4882a593Smuzhiyun * With KASAN enabled slab_free_freelist_hook modifies the freelist
3210*4882a593Smuzhiyun * to remove objects, whose reuse must be delayed.
3211*4882a593Smuzhiyun */
3212*4882a593Smuzhiyun if (slab_free_freelist_hook(s, &head, &tail, &cnt))
3213*4882a593Smuzhiyun do_slab_free(s, page, head, tail, cnt, addr);
3214*4882a593Smuzhiyun }
3215*4882a593Smuzhiyun
3216*4882a593Smuzhiyun #ifdef CONFIG_KASAN_GENERIC
___cache_free(struct kmem_cache * cache,void * x,unsigned long addr)3217*4882a593Smuzhiyun void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3218*4882a593Smuzhiyun {
3219*4882a593Smuzhiyun do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3220*4882a593Smuzhiyun }
3221*4882a593Smuzhiyun #endif
3222*4882a593Smuzhiyun
kmem_cache_free(struct kmem_cache * s,void * x)3223*4882a593Smuzhiyun void kmem_cache_free(struct kmem_cache *s, void *x)
3224*4882a593Smuzhiyun {
3225*4882a593Smuzhiyun s = cache_from_obj(s, x);
3226*4882a593Smuzhiyun if (!s)
3227*4882a593Smuzhiyun return;
3228*4882a593Smuzhiyun slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3229*4882a593Smuzhiyun trace_kmem_cache_free(_RET_IP_, x);
3230*4882a593Smuzhiyun }
3231*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_free);
3232*4882a593Smuzhiyun
3233*4882a593Smuzhiyun struct detached_freelist {
3234*4882a593Smuzhiyun struct page *page;
3235*4882a593Smuzhiyun void *tail;
3236*4882a593Smuzhiyun void *freelist;
3237*4882a593Smuzhiyun int cnt;
3238*4882a593Smuzhiyun struct kmem_cache *s;
3239*4882a593Smuzhiyun };
3240*4882a593Smuzhiyun
3241*4882a593Smuzhiyun /*
3242*4882a593Smuzhiyun * This function progressively scans the array with free objects (with
3243*4882a593Smuzhiyun * a limited look ahead) and extract objects belonging to the same
3244*4882a593Smuzhiyun * page. It builds a detached freelist directly within the given
3245*4882a593Smuzhiyun * page/objects. This can happen without any need for
3246*4882a593Smuzhiyun * synchronization, because the objects are owned by running process.
3247*4882a593Smuzhiyun * The freelist is build up as a single linked list in the objects.
3248*4882a593Smuzhiyun * The idea is, that this detached freelist can then be bulk
3249*4882a593Smuzhiyun * transferred to the real freelist(s), but only requiring a single
3250*4882a593Smuzhiyun * synchronization primitive. Look ahead in the array is limited due
3251*4882a593Smuzhiyun * to performance reasons.
3252*4882a593Smuzhiyun */
3253*4882a593Smuzhiyun static inline
build_detached_freelist(struct kmem_cache * s,size_t size,void ** p,struct detached_freelist * df)3254*4882a593Smuzhiyun int build_detached_freelist(struct kmem_cache *s, size_t size,
3255*4882a593Smuzhiyun void **p, struct detached_freelist *df)
3256*4882a593Smuzhiyun {
3257*4882a593Smuzhiyun size_t first_skipped_index = 0;
3258*4882a593Smuzhiyun int lookahead = 3;
3259*4882a593Smuzhiyun void *object;
3260*4882a593Smuzhiyun struct page *page;
3261*4882a593Smuzhiyun
3262*4882a593Smuzhiyun /* Always re-init detached_freelist */
3263*4882a593Smuzhiyun df->page = NULL;
3264*4882a593Smuzhiyun
3265*4882a593Smuzhiyun do {
3266*4882a593Smuzhiyun object = p[--size];
3267*4882a593Smuzhiyun /* Do we need !ZERO_OR_NULL_PTR(object) here? (for kfree) */
3268*4882a593Smuzhiyun } while (!object && size);
3269*4882a593Smuzhiyun
3270*4882a593Smuzhiyun if (!object)
3271*4882a593Smuzhiyun return 0;
3272*4882a593Smuzhiyun
3273*4882a593Smuzhiyun page = virt_to_head_page(object);
3274*4882a593Smuzhiyun if (!s) {
3275*4882a593Smuzhiyun /* Handle kalloc'ed objects */
3276*4882a593Smuzhiyun if (unlikely(!PageSlab(page))) {
3277*4882a593Smuzhiyun BUG_ON(!PageCompound(page));
3278*4882a593Smuzhiyun kfree_hook(object);
3279*4882a593Smuzhiyun __free_pages(page, compound_order(page));
3280*4882a593Smuzhiyun p[size] = NULL; /* mark object processed */
3281*4882a593Smuzhiyun return size;
3282*4882a593Smuzhiyun }
3283*4882a593Smuzhiyun /* Derive kmem_cache from object */
3284*4882a593Smuzhiyun df->s = page->slab_cache;
3285*4882a593Smuzhiyun } else {
3286*4882a593Smuzhiyun df->s = cache_from_obj(s, object); /* Support for memcg */
3287*4882a593Smuzhiyun }
3288*4882a593Smuzhiyun
3289*4882a593Smuzhiyun if (is_kfence_address(object)) {
3290*4882a593Smuzhiyun slab_free_hook(df->s, object, false);
3291*4882a593Smuzhiyun __kfence_free(object);
3292*4882a593Smuzhiyun p[size] = NULL; /* mark object processed */
3293*4882a593Smuzhiyun return size;
3294*4882a593Smuzhiyun }
3295*4882a593Smuzhiyun
3296*4882a593Smuzhiyun /* Start new detached freelist */
3297*4882a593Smuzhiyun df->page = page;
3298*4882a593Smuzhiyun set_freepointer(df->s, object, NULL);
3299*4882a593Smuzhiyun df->tail = object;
3300*4882a593Smuzhiyun df->freelist = object;
3301*4882a593Smuzhiyun p[size] = NULL; /* mark object processed */
3302*4882a593Smuzhiyun df->cnt = 1;
3303*4882a593Smuzhiyun
3304*4882a593Smuzhiyun while (size) {
3305*4882a593Smuzhiyun object = p[--size];
3306*4882a593Smuzhiyun if (!object)
3307*4882a593Smuzhiyun continue; /* Skip processed objects */
3308*4882a593Smuzhiyun
3309*4882a593Smuzhiyun /* df->page is always set at this point */
3310*4882a593Smuzhiyun if (df->page == virt_to_head_page(object)) {
3311*4882a593Smuzhiyun /* Opportunity build freelist */
3312*4882a593Smuzhiyun set_freepointer(df->s, object, df->freelist);
3313*4882a593Smuzhiyun df->freelist = object;
3314*4882a593Smuzhiyun df->cnt++;
3315*4882a593Smuzhiyun p[size] = NULL; /* mark object processed */
3316*4882a593Smuzhiyun
3317*4882a593Smuzhiyun continue;
3318*4882a593Smuzhiyun }
3319*4882a593Smuzhiyun
3320*4882a593Smuzhiyun /* Limit look ahead search */
3321*4882a593Smuzhiyun if (!--lookahead)
3322*4882a593Smuzhiyun break;
3323*4882a593Smuzhiyun
3324*4882a593Smuzhiyun if (!first_skipped_index)
3325*4882a593Smuzhiyun first_skipped_index = size + 1;
3326*4882a593Smuzhiyun }
3327*4882a593Smuzhiyun
3328*4882a593Smuzhiyun return first_skipped_index;
3329*4882a593Smuzhiyun }
3330*4882a593Smuzhiyun
3331*4882a593Smuzhiyun /* Note that interrupts must be enabled when calling this function. */
kmem_cache_free_bulk(struct kmem_cache * s,size_t size,void ** p)3332*4882a593Smuzhiyun void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3333*4882a593Smuzhiyun {
3334*4882a593Smuzhiyun if (WARN_ON(!size))
3335*4882a593Smuzhiyun return;
3336*4882a593Smuzhiyun
3337*4882a593Smuzhiyun memcg_slab_free_hook(s, p, size);
3338*4882a593Smuzhiyun do {
3339*4882a593Smuzhiyun struct detached_freelist df;
3340*4882a593Smuzhiyun
3341*4882a593Smuzhiyun size = build_detached_freelist(s, size, p, &df);
3342*4882a593Smuzhiyun if (!df.page)
3343*4882a593Smuzhiyun continue;
3344*4882a593Smuzhiyun
3345*4882a593Smuzhiyun slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
3346*4882a593Smuzhiyun } while (likely(size));
3347*4882a593Smuzhiyun }
3348*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_free_bulk);
3349*4882a593Smuzhiyun
3350*4882a593Smuzhiyun /* Note that interrupts must be enabled when calling this function. */
kmem_cache_alloc_bulk(struct kmem_cache * s,gfp_t flags,size_t size,void ** p)3351*4882a593Smuzhiyun int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3352*4882a593Smuzhiyun void **p)
3353*4882a593Smuzhiyun {
3354*4882a593Smuzhiyun struct kmem_cache_cpu *c;
3355*4882a593Smuzhiyun int i;
3356*4882a593Smuzhiyun struct obj_cgroup *objcg = NULL;
3357*4882a593Smuzhiyun
3358*4882a593Smuzhiyun /* memcg and kmem_cache debug support */
3359*4882a593Smuzhiyun s = slab_pre_alloc_hook(s, &objcg, size, flags);
3360*4882a593Smuzhiyun if (unlikely(!s))
3361*4882a593Smuzhiyun return false;
3362*4882a593Smuzhiyun /*
3363*4882a593Smuzhiyun * Drain objects in the per cpu slab, while disabling local
3364*4882a593Smuzhiyun * IRQs, which protects against PREEMPT and interrupts
3365*4882a593Smuzhiyun * handlers invoking normal fastpath.
3366*4882a593Smuzhiyun */
3367*4882a593Smuzhiyun local_irq_disable();
3368*4882a593Smuzhiyun c = this_cpu_ptr(s->cpu_slab);
3369*4882a593Smuzhiyun
3370*4882a593Smuzhiyun for (i = 0; i < size; i++) {
3371*4882a593Smuzhiyun void *object = kfence_alloc(s, s->object_size, flags);
3372*4882a593Smuzhiyun
3373*4882a593Smuzhiyun if (unlikely(object)) {
3374*4882a593Smuzhiyun p[i] = object;
3375*4882a593Smuzhiyun continue;
3376*4882a593Smuzhiyun }
3377*4882a593Smuzhiyun
3378*4882a593Smuzhiyun object = c->freelist;
3379*4882a593Smuzhiyun if (unlikely(!object)) {
3380*4882a593Smuzhiyun /*
3381*4882a593Smuzhiyun * We may have removed an object from c->freelist using
3382*4882a593Smuzhiyun * the fastpath in the previous iteration; in that case,
3383*4882a593Smuzhiyun * c->tid has not been bumped yet.
3384*4882a593Smuzhiyun * Since ___slab_alloc() may reenable interrupts while
3385*4882a593Smuzhiyun * allocating memory, we should bump c->tid now.
3386*4882a593Smuzhiyun */
3387*4882a593Smuzhiyun c->tid = next_tid(c->tid);
3388*4882a593Smuzhiyun
3389*4882a593Smuzhiyun /*
3390*4882a593Smuzhiyun * Invoking slow path likely have side-effect
3391*4882a593Smuzhiyun * of re-populating per CPU c->freelist
3392*4882a593Smuzhiyun */
3393*4882a593Smuzhiyun p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3394*4882a593Smuzhiyun _RET_IP_, c);
3395*4882a593Smuzhiyun if (unlikely(!p[i]))
3396*4882a593Smuzhiyun goto error;
3397*4882a593Smuzhiyun
3398*4882a593Smuzhiyun c = this_cpu_ptr(s->cpu_slab);
3399*4882a593Smuzhiyun maybe_wipe_obj_freeptr(s, p[i]);
3400*4882a593Smuzhiyun
3401*4882a593Smuzhiyun continue; /* goto for-loop */
3402*4882a593Smuzhiyun }
3403*4882a593Smuzhiyun c->freelist = get_freepointer(s, object);
3404*4882a593Smuzhiyun p[i] = object;
3405*4882a593Smuzhiyun maybe_wipe_obj_freeptr(s, p[i]);
3406*4882a593Smuzhiyun }
3407*4882a593Smuzhiyun c->tid = next_tid(c->tid);
3408*4882a593Smuzhiyun local_irq_enable();
3409*4882a593Smuzhiyun
3410*4882a593Smuzhiyun /*
3411*4882a593Smuzhiyun * memcg and kmem_cache debug support and memory initialization.
3412*4882a593Smuzhiyun * Done outside of the IRQ disabled fastpath loop.
3413*4882a593Smuzhiyun */
3414*4882a593Smuzhiyun slab_post_alloc_hook(s, objcg, flags, size, p,
3415*4882a593Smuzhiyun slab_want_init_on_alloc(flags, s));
3416*4882a593Smuzhiyun return i;
3417*4882a593Smuzhiyun error:
3418*4882a593Smuzhiyun local_irq_enable();
3419*4882a593Smuzhiyun slab_post_alloc_hook(s, objcg, flags, i, p, false);
3420*4882a593Smuzhiyun __kmem_cache_free_bulk(s, i, p);
3421*4882a593Smuzhiyun return 0;
3422*4882a593Smuzhiyun }
3423*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3424*4882a593Smuzhiyun
3425*4882a593Smuzhiyun
3426*4882a593Smuzhiyun /*
3427*4882a593Smuzhiyun * Object placement in a slab is made very easy because we always start at
3428*4882a593Smuzhiyun * offset 0. If we tune the size of the object to the alignment then we can
3429*4882a593Smuzhiyun * get the required alignment by putting one properly sized object after
3430*4882a593Smuzhiyun * another.
3431*4882a593Smuzhiyun *
3432*4882a593Smuzhiyun * Notice that the allocation order determines the sizes of the per cpu
3433*4882a593Smuzhiyun * caches. Each processor has always one slab available for allocations.
3434*4882a593Smuzhiyun * Increasing the allocation order reduces the number of times that slabs
3435*4882a593Smuzhiyun * must be moved on and off the partial lists and is therefore a factor in
3436*4882a593Smuzhiyun * locking overhead.
3437*4882a593Smuzhiyun */
3438*4882a593Smuzhiyun
3439*4882a593Smuzhiyun /*
3440*4882a593Smuzhiyun * Mininum / Maximum order of slab pages. This influences locking overhead
3441*4882a593Smuzhiyun * and slab fragmentation. A higher order reduces the number of partial slabs
3442*4882a593Smuzhiyun * and increases the number of allocations possible without having to
3443*4882a593Smuzhiyun * take the list_lock.
3444*4882a593Smuzhiyun */
3445*4882a593Smuzhiyun static unsigned int slub_min_order;
3446*4882a593Smuzhiyun static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3447*4882a593Smuzhiyun static unsigned int slub_min_objects;
3448*4882a593Smuzhiyun
3449*4882a593Smuzhiyun /*
3450*4882a593Smuzhiyun * Calculate the order of allocation given an slab object size.
3451*4882a593Smuzhiyun *
3452*4882a593Smuzhiyun * The order of allocation has significant impact on performance and other
3453*4882a593Smuzhiyun * system components. Generally order 0 allocations should be preferred since
3454*4882a593Smuzhiyun * order 0 does not cause fragmentation in the page allocator. Larger objects
3455*4882a593Smuzhiyun * be problematic to put into order 0 slabs because there may be too much
3456*4882a593Smuzhiyun * unused space left. We go to a higher order if more than 1/16th of the slab
3457*4882a593Smuzhiyun * would be wasted.
3458*4882a593Smuzhiyun *
3459*4882a593Smuzhiyun * In order to reach satisfactory performance we must ensure that a minimum
3460*4882a593Smuzhiyun * number of objects is in one slab. Otherwise we may generate too much
3461*4882a593Smuzhiyun * activity on the partial lists which requires taking the list_lock. This is
3462*4882a593Smuzhiyun * less a concern for large slabs though which are rarely used.
3463*4882a593Smuzhiyun *
3464*4882a593Smuzhiyun * slub_max_order specifies the order where we begin to stop considering the
3465*4882a593Smuzhiyun * number of objects in a slab as critical. If we reach slub_max_order then
3466*4882a593Smuzhiyun * we try to keep the page order as low as possible. So we accept more waste
3467*4882a593Smuzhiyun * of space in favor of a small page order.
3468*4882a593Smuzhiyun *
3469*4882a593Smuzhiyun * Higher order allocations also allow the placement of more objects in a
3470*4882a593Smuzhiyun * slab and thereby reduce object handling overhead. If the user has
3471*4882a593Smuzhiyun * requested a higher mininum order then we start with that one instead of
3472*4882a593Smuzhiyun * the smallest order which will fit the object.
3473*4882a593Smuzhiyun */
slab_order(unsigned int size,unsigned int min_objects,unsigned int max_order,unsigned int fract_leftover)3474*4882a593Smuzhiyun static inline unsigned int slab_order(unsigned int size,
3475*4882a593Smuzhiyun unsigned int min_objects, unsigned int max_order,
3476*4882a593Smuzhiyun unsigned int fract_leftover)
3477*4882a593Smuzhiyun {
3478*4882a593Smuzhiyun unsigned int min_order = slub_min_order;
3479*4882a593Smuzhiyun unsigned int order;
3480*4882a593Smuzhiyun
3481*4882a593Smuzhiyun if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3482*4882a593Smuzhiyun return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3483*4882a593Smuzhiyun
3484*4882a593Smuzhiyun for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3485*4882a593Smuzhiyun order <= max_order; order++) {
3486*4882a593Smuzhiyun
3487*4882a593Smuzhiyun unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3488*4882a593Smuzhiyun unsigned int rem;
3489*4882a593Smuzhiyun
3490*4882a593Smuzhiyun rem = slab_size % size;
3491*4882a593Smuzhiyun
3492*4882a593Smuzhiyun if (rem <= slab_size / fract_leftover)
3493*4882a593Smuzhiyun break;
3494*4882a593Smuzhiyun }
3495*4882a593Smuzhiyun
3496*4882a593Smuzhiyun return order;
3497*4882a593Smuzhiyun }
3498*4882a593Smuzhiyun
calculate_order(unsigned int size)3499*4882a593Smuzhiyun static inline int calculate_order(unsigned int size)
3500*4882a593Smuzhiyun {
3501*4882a593Smuzhiyun unsigned int order;
3502*4882a593Smuzhiyun unsigned int min_objects;
3503*4882a593Smuzhiyun unsigned int max_objects;
3504*4882a593Smuzhiyun
3505*4882a593Smuzhiyun /*
3506*4882a593Smuzhiyun * Attempt to find best configuration for a slab. This
3507*4882a593Smuzhiyun * works by first attempting to generate a layout with
3508*4882a593Smuzhiyun * the best configuration and backing off gradually.
3509*4882a593Smuzhiyun *
3510*4882a593Smuzhiyun * First we increase the acceptable waste in a slab. Then
3511*4882a593Smuzhiyun * we reduce the minimum objects required in a slab.
3512*4882a593Smuzhiyun */
3513*4882a593Smuzhiyun min_objects = slub_min_objects;
3514*4882a593Smuzhiyun if (!min_objects)
3515*4882a593Smuzhiyun min_objects = 4 * (fls(nr_cpu_ids) + 1);
3516*4882a593Smuzhiyun max_objects = order_objects(slub_max_order, size);
3517*4882a593Smuzhiyun min_objects = min(min_objects, max_objects);
3518*4882a593Smuzhiyun
3519*4882a593Smuzhiyun while (min_objects > 1) {
3520*4882a593Smuzhiyun unsigned int fraction;
3521*4882a593Smuzhiyun
3522*4882a593Smuzhiyun fraction = 16;
3523*4882a593Smuzhiyun while (fraction >= 4) {
3524*4882a593Smuzhiyun order = slab_order(size, min_objects,
3525*4882a593Smuzhiyun slub_max_order, fraction);
3526*4882a593Smuzhiyun if (order <= slub_max_order)
3527*4882a593Smuzhiyun return order;
3528*4882a593Smuzhiyun fraction /= 2;
3529*4882a593Smuzhiyun }
3530*4882a593Smuzhiyun min_objects--;
3531*4882a593Smuzhiyun }
3532*4882a593Smuzhiyun
3533*4882a593Smuzhiyun /*
3534*4882a593Smuzhiyun * We were unable to place multiple objects in a slab. Now
3535*4882a593Smuzhiyun * lets see if we can place a single object there.
3536*4882a593Smuzhiyun */
3537*4882a593Smuzhiyun order = slab_order(size, 1, slub_max_order, 1);
3538*4882a593Smuzhiyun if (order <= slub_max_order)
3539*4882a593Smuzhiyun return order;
3540*4882a593Smuzhiyun
3541*4882a593Smuzhiyun /*
3542*4882a593Smuzhiyun * Doh this slab cannot be placed using slub_max_order.
3543*4882a593Smuzhiyun */
3544*4882a593Smuzhiyun order = slab_order(size, 1, MAX_ORDER, 1);
3545*4882a593Smuzhiyun if (order < MAX_ORDER)
3546*4882a593Smuzhiyun return order;
3547*4882a593Smuzhiyun return -ENOSYS;
3548*4882a593Smuzhiyun }
3549*4882a593Smuzhiyun
3550*4882a593Smuzhiyun static void
init_kmem_cache_node(struct kmem_cache_node * n)3551*4882a593Smuzhiyun init_kmem_cache_node(struct kmem_cache_node *n)
3552*4882a593Smuzhiyun {
3553*4882a593Smuzhiyun n->nr_partial = 0;
3554*4882a593Smuzhiyun spin_lock_init(&n->list_lock);
3555*4882a593Smuzhiyun INIT_LIST_HEAD(&n->partial);
3556*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
3557*4882a593Smuzhiyun atomic_long_set(&n->nr_slabs, 0);
3558*4882a593Smuzhiyun atomic_long_set(&n->total_objects, 0);
3559*4882a593Smuzhiyun INIT_LIST_HEAD(&n->full);
3560*4882a593Smuzhiyun #endif
3561*4882a593Smuzhiyun }
3562*4882a593Smuzhiyun
alloc_kmem_cache_cpus(struct kmem_cache * s)3563*4882a593Smuzhiyun static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3564*4882a593Smuzhiyun {
3565*4882a593Smuzhiyun BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3566*4882a593Smuzhiyun KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3567*4882a593Smuzhiyun
3568*4882a593Smuzhiyun /*
3569*4882a593Smuzhiyun * Must align to double word boundary for the double cmpxchg
3570*4882a593Smuzhiyun * instructions to work; see __pcpu_double_call_return_bool().
3571*4882a593Smuzhiyun */
3572*4882a593Smuzhiyun s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3573*4882a593Smuzhiyun 2 * sizeof(void *));
3574*4882a593Smuzhiyun
3575*4882a593Smuzhiyun if (!s->cpu_slab)
3576*4882a593Smuzhiyun return 0;
3577*4882a593Smuzhiyun
3578*4882a593Smuzhiyun init_kmem_cache_cpus(s);
3579*4882a593Smuzhiyun
3580*4882a593Smuzhiyun return 1;
3581*4882a593Smuzhiyun }
3582*4882a593Smuzhiyun
3583*4882a593Smuzhiyun static struct kmem_cache *kmem_cache_node;
3584*4882a593Smuzhiyun
3585*4882a593Smuzhiyun /*
3586*4882a593Smuzhiyun * No kmalloc_node yet so do it by hand. We know that this is the first
3587*4882a593Smuzhiyun * slab on the node for this slabcache. There are no concurrent accesses
3588*4882a593Smuzhiyun * possible.
3589*4882a593Smuzhiyun *
3590*4882a593Smuzhiyun * Note that this function only works on the kmem_cache_node
3591*4882a593Smuzhiyun * when allocating for the kmem_cache_node. This is used for bootstrapping
3592*4882a593Smuzhiyun * memory on a fresh node that has no slab structures yet.
3593*4882a593Smuzhiyun */
early_kmem_cache_node_alloc(int node)3594*4882a593Smuzhiyun static void early_kmem_cache_node_alloc(int node)
3595*4882a593Smuzhiyun {
3596*4882a593Smuzhiyun struct page *page;
3597*4882a593Smuzhiyun struct kmem_cache_node *n;
3598*4882a593Smuzhiyun
3599*4882a593Smuzhiyun BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3600*4882a593Smuzhiyun
3601*4882a593Smuzhiyun page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3602*4882a593Smuzhiyun
3603*4882a593Smuzhiyun BUG_ON(!page);
3604*4882a593Smuzhiyun if (page_to_nid(page) != node) {
3605*4882a593Smuzhiyun pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3606*4882a593Smuzhiyun pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3607*4882a593Smuzhiyun }
3608*4882a593Smuzhiyun
3609*4882a593Smuzhiyun n = page->freelist;
3610*4882a593Smuzhiyun BUG_ON(!n);
3611*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
3612*4882a593Smuzhiyun init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3613*4882a593Smuzhiyun init_tracking(kmem_cache_node, n);
3614*4882a593Smuzhiyun #endif
3615*4882a593Smuzhiyun n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
3616*4882a593Smuzhiyun page->freelist = get_freepointer(kmem_cache_node, n);
3617*4882a593Smuzhiyun page->inuse = 1;
3618*4882a593Smuzhiyun page->frozen = 0;
3619*4882a593Smuzhiyun kmem_cache_node->node[node] = n;
3620*4882a593Smuzhiyun init_kmem_cache_node(n);
3621*4882a593Smuzhiyun inc_slabs_node(kmem_cache_node, node, page->objects);
3622*4882a593Smuzhiyun
3623*4882a593Smuzhiyun /*
3624*4882a593Smuzhiyun * No locks need to be taken here as it has just been
3625*4882a593Smuzhiyun * initialized and there is no concurrent access.
3626*4882a593Smuzhiyun */
3627*4882a593Smuzhiyun __add_partial(n, page, DEACTIVATE_TO_HEAD);
3628*4882a593Smuzhiyun }
3629*4882a593Smuzhiyun
free_kmem_cache_nodes(struct kmem_cache * s)3630*4882a593Smuzhiyun static void free_kmem_cache_nodes(struct kmem_cache *s)
3631*4882a593Smuzhiyun {
3632*4882a593Smuzhiyun int node;
3633*4882a593Smuzhiyun struct kmem_cache_node *n;
3634*4882a593Smuzhiyun
3635*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
3636*4882a593Smuzhiyun s->node[node] = NULL;
3637*4882a593Smuzhiyun kmem_cache_free(kmem_cache_node, n);
3638*4882a593Smuzhiyun }
3639*4882a593Smuzhiyun }
3640*4882a593Smuzhiyun
__kmem_cache_release(struct kmem_cache * s)3641*4882a593Smuzhiyun void __kmem_cache_release(struct kmem_cache *s)
3642*4882a593Smuzhiyun {
3643*4882a593Smuzhiyun cache_random_seq_destroy(s);
3644*4882a593Smuzhiyun free_percpu(s->cpu_slab);
3645*4882a593Smuzhiyun free_kmem_cache_nodes(s);
3646*4882a593Smuzhiyun }
3647*4882a593Smuzhiyun
init_kmem_cache_nodes(struct kmem_cache * s)3648*4882a593Smuzhiyun static int init_kmem_cache_nodes(struct kmem_cache *s)
3649*4882a593Smuzhiyun {
3650*4882a593Smuzhiyun int node;
3651*4882a593Smuzhiyun
3652*4882a593Smuzhiyun for_each_node_state(node, N_NORMAL_MEMORY) {
3653*4882a593Smuzhiyun struct kmem_cache_node *n;
3654*4882a593Smuzhiyun
3655*4882a593Smuzhiyun if (slab_state == DOWN) {
3656*4882a593Smuzhiyun early_kmem_cache_node_alloc(node);
3657*4882a593Smuzhiyun continue;
3658*4882a593Smuzhiyun }
3659*4882a593Smuzhiyun n = kmem_cache_alloc_node(kmem_cache_node,
3660*4882a593Smuzhiyun GFP_KERNEL, node);
3661*4882a593Smuzhiyun
3662*4882a593Smuzhiyun if (!n) {
3663*4882a593Smuzhiyun free_kmem_cache_nodes(s);
3664*4882a593Smuzhiyun return 0;
3665*4882a593Smuzhiyun }
3666*4882a593Smuzhiyun
3667*4882a593Smuzhiyun init_kmem_cache_node(n);
3668*4882a593Smuzhiyun s->node[node] = n;
3669*4882a593Smuzhiyun }
3670*4882a593Smuzhiyun return 1;
3671*4882a593Smuzhiyun }
3672*4882a593Smuzhiyun
set_min_partial(struct kmem_cache * s,unsigned long min)3673*4882a593Smuzhiyun static void set_min_partial(struct kmem_cache *s, unsigned long min)
3674*4882a593Smuzhiyun {
3675*4882a593Smuzhiyun if (min < MIN_PARTIAL)
3676*4882a593Smuzhiyun min = MIN_PARTIAL;
3677*4882a593Smuzhiyun else if (min > MAX_PARTIAL)
3678*4882a593Smuzhiyun min = MAX_PARTIAL;
3679*4882a593Smuzhiyun s->min_partial = min;
3680*4882a593Smuzhiyun }
3681*4882a593Smuzhiyun
set_cpu_partial(struct kmem_cache * s)3682*4882a593Smuzhiyun static void set_cpu_partial(struct kmem_cache *s)
3683*4882a593Smuzhiyun {
3684*4882a593Smuzhiyun #ifdef CONFIG_SLUB_CPU_PARTIAL
3685*4882a593Smuzhiyun /*
3686*4882a593Smuzhiyun * cpu_partial determined the maximum number of objects kept in the
3687*4882a593Smuzhiyun * per cpu partial lists of a processor.
3688*4882a593Smuzhiyun *
3689*4882a593Smuzhiyun * Per cpu partial lists mainly contain slabs that just have one
3690*4882a593Smuzhiyun * object freed. If they are used for allocation then they can be
3691*4882a593Smuzhiyun * filled up again with minimal effort. The slab will never hit the
3692*4882a593Smuzhiyun * per node partial lists and therefore no locking will be required.
3693*4882a593Smuzhiyun *
3694*4882a593Smuzhiyun * This setting also determines
3695*4882a593Smuzhiyun *
3696*4882a593Smuzhiyun * A) The number of objects from per cpu partial slabs dumped to the
3697*4882a593Smuzhiyun * per node list when we reach the limit.
3698*4882a593Smuzhiyun * B) The number of objects in cpu partial slabs to extract from the
3699*4882a593Smuzhiyun * per node list when we run out of per cpu objects. We only fetch
3700*4882a593Smuzhiyun * 50% to keep some capacity around for frees.
3701*4882a593Smuzhiyun */
3702*4882a593Smuzhiyun if (!kmem_cache_has_cpu_partial(s))
3703*4882a593Smuzhiyun slub_set_cpu_partial(s, 0);
3704*4882a593Smuzhiyun else if (s->size >= PAGE_SIZE)
3705*4882a593Smuzhiyun slub_set_cpu_partial(s, 2);
3706*4882a593Smuzhiyun else if (s->size >= 1024)
3707*4882a593Smuzhiyun slub_set_cpu_partial(s, 6);
3708*4882a593Smuzhiyun else if (s->size >= 256)
3709*4882a593Smuzhiyun slub_set_cpu_partial(s, 13);
3710*4882a593Smuzhiyun else
3711*4882a593Smuzhiyun slub_set_cpu_partial(s, 30);
3712*4882a593Smuzhiyun #endif
3713*4882a593Smuzhiyun }
3714*4882a593Smuzhiyun
3715*4882a593Smuzhiyun /*
3716*4882a593Smuzhiyun * calculate_sizes() determines the order and the distribution of data within
3717*4882a593Smuzhiyun * a slab object.
3718*4882a593Smuzhiyun */
calculate_sizes(struct kmem_cache * s,int forced_order)3719*4882a593Smuzhiyun static int calculate_sizes(struct kmem_cache *s, int forced_order)
3720*4882a593Smuzhiyun {
3721*4882a593Smuzhiyun slab_flags_t flags = s->flags;
3722*4882a593Smuzhiyun unsigned int size = s->object_size;
3723*4882a593Smuzhiyun unsigned int order;
3724*4882a593Smuzhiyun
3725*4882a593Smuzhiyun /*
3726*4882a593Smuzhiyun * Round up object size to the next word boundary. We can only
3727*4882a593Smuzhiyun * place the free pointer at word boundaries and this determines
3728*4882a593Smuzhiyun * the possible location of the free pointer.
3729*4882a593Smuzhiyun */
3730*4882a593Smuzhiyun size = ALIGN(size, sizeof(void *));
3731*4882a593Smuzhiyun
3732*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
3733*4882a593Smuzhiyun /*
3734*4882a593Smuzhiyun * Determine if we can poison the object itself. If the user of
3735*4882a593Smuzhiyun * the slab may touch the object after free or before allocation
3736*4882a593Smuzhiyun * then we should never poison the object itself.
3737*4882a593Smuzhiyun */
3738*4882a593Smuzhiyun if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3739*4882a593Smuzhiyun !s->ctor)
3740*4882a593Smuzhiyun s->flags |= __OBJECT_POISON;
3741*4882a593Smuzhiyun else
3742*4882a593Smuzhiyun s->flags &= ~__OBJECT_POISON;
3743*4882a593Smuzhiyun
3744*4882a593Smuzhiyun
3745*4882a593Smuzhiyun /*
3746*4882a593Smuzhiyun * If we are Redzoning then check if there is some space between the
3747*4882a593Smuzhiyun * end of the object and the free pointer. If not then add an
3748*4882a593Smuzhiyun * additional word to have some bytes to store Redzone information.
3749*4882a593Smuzhiyun */
3750*4882a593Smuzhiyun if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3751*4882a593Smuzhiyun size += sizeof(void *);
3752*4882a593Smuzhiyun #endif
3753*4882a593Smuzhiyun
3754*4882a593Smuzhiyun /*
3755*4882a593Smuzhiyun * With that we have determined the number of bytes in actual use
3756*4882a593Smuzhiyun * by the object and redzoning.
3757*4882a593Smuzhiyun */
3758*4882a593Smuzhiyun s->inuse = size;
3759*4882a593Smuzhiyun
3760*4882a593Smuzhiyun if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3761*4882a593Smuzhiyun ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
3762*4882a593Smuzhiyun s->ctor) {
3763*4882a593Smuzhiyun /*
3764*4882a593Smuzhiyun * Relocate free pointer after the object if it is not
3765*4882a593Smuzhiyun * permitted to overwrite the first word of the object on
3766*4882a593Smuzhiyun * kmem_cache_free.
3767*4882a593Smuzhiyun *
3768*4882a593Smuzhiyun * This is the case if we do RCU, have a constructor or
3769*4882a593Smuzhiyun * destructor, are poisoning the objects, or are
3770*4882a593Smuzhiyun * redzoning an object smaller than sizeof(void *).
3771*4882a593Smuzhiyun *
3772*4882a593Smuzhiyun * The assumption that s->offset >= s->inuse means free
3773*4882a593Smuzhiyun * pointer is outside of the object is used in the
3774*4882a593Smuzhiyun * freeptr_outside_object() function. If that is no
3775*4882a593Smuzhiyun * longer true, the function needs to be modified.
3776*4882a593Smuzhiyun */
3777*4882a593Smuzhiyun s->offset = size;
3778*4882a593Smuzhiyun size += sizeof(void *);
3779*4882a593Smuzhiyun } else {
3780*4882a593Smuzhiyun /*
3781*4882a593Smuzhiyun * Store freelist pointer near middle of object to keep
3782*4882a593Smuzhiyun * it away from the edges of the object to avoid small
3783*4882a593Smuzhiyun * sized over/underflows from neighboring allocations.
3784*4882a593Smuzhiyun */
3785*4882a593Smuzhiyun s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
3786*4882a593Smuzhiyun }
3787*4882a593Smuzhiyun
3788*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
3789*4882a593Smuzhiyun if (flags & SLAB_STORE_USER)
3790*4882a593Smuzhiyun /*
3791*4882a593Smuzhiyun * Need to store information about allocs and frees after
3792*4882a593Smuzhiyun * the object.
3793*4882a593Smuzhiyun */
3794*4882a593Smuzhiyun size += 2 * sizeof(struct track);
3795*4882a593Smuzhiyun #endif
3796*4882a593Smuzhiyun
3797*4882a593Smuzhiyun kasan_cache_create(s, &size, &s->flags);
3798*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
3799*4882a593Smuzhiyun if (flags & SLAB_RED_ZONE) {
3800*4882a593Smuzhiyun /*
3801*4882a593Smuzhiyun * Add some empty padding so that we can catch
3802*4882a593Smuzhiyun * overwrites from earlier objects rather than let
3803*4882a593Smuzhiyun * tracking information or the free pointer be
3804*4882a593Smuzhiyun * corrupted if a user writes before the start
3805*4882a593Smuzhiyun * of the object.
3806*4882a593Smuzhiyun */
3807*4882a593Smuzhiyun size += sizeof(void *);
3808*4882a593Smuzhiyun
3809*4882a593Smuzhiyun s->red_left_pad = sizeof(void *);
3810*4882a593Smuzhiyun s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3811*4882a593Smuzhiyun size += s->red_left_pad;
3812*4882a593Smuzhiyun }
3813*4882a593Smuzhiyun #endif
3814*4882a593Smuzhiyun
3815*4882a593Smuzhiyun /*
3816*4882a593Smuzhiyun * SLUB stores one object immediately after another beginning from
3817*4882a593Smuzhiyun * offset 0. In order to align the objects we have to simply size
3818*4882a593Smuzhiyun * each object to conform to the alignment.
3819*4882a593Smuzhiyun */
3820*4882a593Smuzhiyun size = ALIGN(size, s->align);
3821*4882a593Smuzhiyun s->size = size;
3822*4882a593Smuzhiyun s->reciprocal_size = reciprocal_value(size);
3823*4882a593Smuzhiyun if (forced_order >= 0)
3824*4882a593Smuzhiyun order = forced_order;
3825*4882a593Smuzhiyun else
3826*4882a593Smuzhiyun order = calculate_order(size);
3827*4882a593Smuzhiyun
3828*4882a593Smuzhiyun if ((int)order < 0)
3829*4882a593Smuzhiyun return 0;
3830*4882a593Smuzhiyun
3831*4882a593Smuzhiyun s->allocflags = 0;
3832*4882a593Smuzhiyun if (order)
3833*4882a593Smuzhiyun s->allocflags |= __GFP_COMP;
3834*4882a593Smuzhiyun
3835*4882a593Smuzhiyun if (s->flags & SLAB_CACHE_DMA)
3836*4882a593Smuzhiyun s->allocflags |= GFP_DMA;
3837*4882a593Smuzhiyun
3838*4882a593Smuzhiyun if (s->flags & SLAB_CACHE_DMA32)
3839*4882a593Smuzhiyun s->allocflags |= GFP_DMA32;
3840*4882a593Smuzhiyun
3841*4882a593Smuzhiyun if (s->flags & SLAB_RECLAIM_ACCOUNT)
3842*4882a593Smuzhiyun s->allocflags |= __GFP_RECLAIMABLE;
3843*4882a593Smuzhiyun
3844*4882a593Smuzhiyun /*
3845*4882a593Smuzhiyun * Determine the number of objects per slab
3846*4882a593Smuzhiyun */
3847*4882a593Smuzhiyun s->oo = oo_make(order, size);
3848*4882a593Smuzhiyun s->min = oo_make(get_order(size), size);
3849*4882a593Smuzhiyun if (oo_objects(s->oo) > oo_objects(s->max))
3850*4882a593Smuzhiyun s->max = s->oo;
3851*4882a593Smuzhiyun
3852*4882a593Smuzhiyun return !!oo_objects(s->oo);
3853*4882a593Smuzhiyun }
3854*4882a593Smuzhiyun
kmem_cache_open(struct kmem_cache * s,slab_flags_t flags)3855*4882a593Smuzhiyun static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3856*4882a593Smuzhiyun {
3857*4882a593Smuzhiyun s->flags = kmem_cache_flags(s->size, flags, s->name);
3858*4882a593Smuzhiyun #ifdef CONFIG_SLAB_FREELIST_HARDENED
3859*4882a593Smuzhiyun s->random = get_random_long();
3860*4882a593Smuzhiyun #endif
3861*4882a593Smuzhiyun
3862*4882a593Smuzhiyun if (!calculate_sizes(s, -1))
3863*4882a593Smuzhiyun goto error;
3864*4882a593Smuzhiyun if (disable_higher_order_debug) {
3865*4882a593Smuzhiyun /*
3866*4882a593Smuzhiyun * Disable debugging flags that store metadata if the min slab
3867*4882a593Smuzhiyun * order increased.
3868*4882a593Smuzhiyun */
3869*4882a593Smuzhiyun if (get_order(s->size) > get_order(s->object_size)) {
3870*4882a593Smuzhiyun s->flags &= ~DEBUG_METADATA_FLAGS;
3871*4882a593Smuzhiyun s->offset = 0;
3872*4882a593Smuzhiyun if (!calculate_sizes(s, -1))
3873*4882a593Smuzhiyun goto error;
3874*4882a593Smuzhiyun }
3875*4882a593Smuzhiyun }
3876*4882a593Smuzhiyun
3877*4882a593Smuzhiyun #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3878*4882a593Smuzhiyun defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3879*4882a593Smuzhiyun if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3880*4882a593Smuzhiyun /* Enable fast mode */
3881*4882a593Smuzhiyun s->flags |= __CMPXCHG_DOUBLE;
3882*4882a593Smuzhiyun #endif
3883*4882a593Smuzhiyun
3884*4882a593Smuzhiyun /*
3885*4882a593Smuzhiyun * The larger the object size is, the more pages we want on the partial
3886*4882a593Smuzhiyun * list to avoid pounding the page allocator excessively.
3887*4882a593Smuzhiyun */
3888*4882a593Smuzhiyun set_min_partial(s, ilog2(s->size) / 2);
3889*4882a593Smuzhiyun
3890*4882a593Smuzhiyun set_cpu_partial(s);
3891*4882a593Smuzhiyun
3892*4882a593Smuzhiyun #ifdef CONFIG_NUMA
3893*4882a593Smuzhiyun s->remote_node_defrag_ratio = 1000;
3894*4882a593Smuzhiyun #endif
3895*4882a593Smuzhiyun
3896*4882a593Smuzhiyun /* Initialize the pre-computed randomized freelist if slab is up */
3897*4882a593Smuzhiyun if (slab_state >= UP) {
3898*4882a593Smuzhiyun if (init_cache_random_seq(s))
3899*4882a593Smuzhiyun goto error;
3900*4882a593Smuzhiyun }
3901*4882a593Smuzhiyun
3902*4882a593Smuzhiyun if (!init_kmem_cache_nodes(s))
3903*4882a593Smuzhiyun goto error;
3904*4882a593Smuzhiyun
3905*4882a593Smuzhiyun if (alloc_kmem_cache_cpus(s))
3906*4882a593Smuzhiyun return 0;
3907*4882a593Smuzhiyun
3908*4882a593Smuzhiyun error:
3909*4882a593Smuzhiyun __kmem_cache_release(s);
3910*4882a593Smuzhiyun return -EINVAL;
3911*4882a593Smuzhiyun }
3912*4882a593Smuzhiyun
list_slab_objects(struct kmem_cache * s,struct page * page,const char * text)3913*4882a593Smuzhiyun static void list_slab_objects(struct kmem_cache *s, struct page *page,
3914*4882a593Smuzhiyun const char *text)
3915*4882a593Smuzhiyun {
3916*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
3917*4882a593Smuzhiyun void *addr = page_address(page);
3918*4882a593Smuzhiyun unsigned long *map;
3919*4882a593Smuzhiyun void *p;
3920*4882a593Smuzhiyun
3921*4882a593Smuzhiyun slab_err(s, page, text, s->name);
3922*4882a593Smuzhiyun slab_lock(page);
3923*4882a593Smuzhiyun
3924*4882a593Smuzhiyun map = get_map(s, page);
3925*4882a593Smuzhiyun for_each_object(p, s, addr, page->objects) {
3926*4882a593Smuzhiyun
3927*4882a593Smuzhiyun if (!test_bit(__obj_to_index(s, addr, p), map)) {
3928*4882a593Smuzhiyun pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3929*4882a593Smuzhiyun print_tracking(s, p);
3930*4882a593Smuzhiyun }
3931*4882a593Smuzhiyun }
3932*4882a593Smuzhiyun put_map(map);
3933*4882a593Smuzhiyun slab_unlock(page);
3934*4882a593Smuzhiyun #endif
3935*4882a593Smuzhiyun }
3936*4882a593Smuzhiyun
3937*4882a593Smuzhiyun /*
3938*4882a593Smuzhiyun * Attempt to free all partial slabs on a node.
3939*4882a593Smuzhiyun * This is called from __kmem_cache_shutdown(). We must take list_lock
3940*4882a593Smuzhiyun * because sysfs file might still access partial list after the shutdowning.
3941*4882a593Smuzhiyun */
free_partial(struct kmem_cache * s,struct kmem_cache_node * n)3942*4882a593Smuzhiyun static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3943*4882a593Smuzhiyun {
3944*4882a593Smuzhiyun LIST_HEAD(discard);
3945*4882a593Smuzhiyun struct page *page, *h;
3946*4882a593Smuzhiyun
3947*4882a593Smuzhiyun BUG_ON(irqs_disabled());
3948*4882a593Smuzhiyun spin_lock_irq(&n->list_lock);
3949*4882a593Smuzhiyun list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3950*4882a593Smuzhiyun if (!page->inuse) {
3951*4882a593Smuzhiyun remove_partial(n, page);
3952*4882a593Smuzhiyun list_add(&page->slab_list, &discard);
3953*4882a593Smuzhiyun } else {
3954*4882a593Smuzhiyun list_slab_objects(s, page,
3955*4882a593Smuzhiyun "Objects remaining in %s on __kmem_cache_shutdown()");
3956*4882a593Smuzhiyun }
3957*4882a593Smuzhiyun }
3958*4882a593Smuzhiyun spin_unlock_irq(&n->list_lock);
3959*4882a593Smuzhiyun
3960*4882a593Smuzhiyun list_for_each_entry_safe(page, h, &discard, slab_list)
3961*4882a593Smuzhiyun discard_slab(s, page);
3962*4882a593Smuzhiyun }
3963*4882a593Smuzhiyun
__kmem_cache_empty(struct kmem_cache * s)3964*4882a593Smuzhiyun bool __kmem_cache_empty(struct kmem_cache *s)
3965*4882a593Smuzhiyun {
3966*4882a593Smuzhiyun int node;
3967*4882a593Smuzhiyun struct kmem_cache_node *n;
3968*4882a593Smuzhiyun
3969*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n)
3970*4882a593Smuzhiyun if (n->nr_partial || slabs_node(s, node))
3971*4882a593Smuzhiyun return false;
3972*4882a593Smuzhiyun return true;
3973*4882a593Smuzhiyun }
3974*4882a593Smuzhiyun
3975*4882a593Smuzhiyun /*
3976*4882a593Smuzhiyun * Release all resources used by a slab cache.
3977*4882a593Smuzhiyun */
__kmem_cache_shutdown(struct kmem_cache * s)3978*4882a593Smuzhiyun int __kmem_cache_shutdown(struct kmem_cache *s)
3979*4882a593Smuzhiyun {
3980*4882a593Smuzhiyun int node;
3981*4882a593Smuzhiyun struct kmem_cache_node *n;
3982*4882a593Smuzhiyun
3983*4882a593Smuzhiyun flush_all(s);
3984*4882a593Smuzhiyun /* Attempt to free all objects */
3985*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
3986*4882a593Smuzhiyun free_partial(s, n);
3987*4882a593Smuzhiyun if (n->nr_partial || slabs_node(s, node))
3988*4882a593Smuzhiyun return 1;
3989*4882a593Smuzhiyun }
3990*4882a593Smuzhiyun return 0;
3991*4882a593Smuzhiyun }
3992*4882a593Smuzhiyun
3993*4882a593Smuzhiyun /********************************************************************
3994*4882a593Smuzhiyun * Kmalloc subsystem
3995*4882a593Smuzhiyun *******************************************************************/
3996*4882a593Smuzhiyun
setup_slub_min_order(char * str)3997*4882a593Smuzhiyun static int __init setup_slub_min_order(char *str)
3998*4882a593Smuzhiyun {
3999*4882a593Smuzhiyun get_option(&str, (int *)&slub_min_order);
4000*4882a593Smuzhiyun
4001*4882a593Smuzhiyun return 1;
4002*4882a593Smuzhiyun }
4003*4882a593Smuzhiyun
4004*4882a593Smuzhiyun __setup("slub_min_order=", setup_slub_min_order);
4005*4882a593Smuzhiyun
setup_slub_max_order(char * str)4006*4882a593Smuzhiyun static int __init setup_slub_max_order(char *str)
4007*4882a593Smuzhiyun {
4008*4882a593Smuzhiyun get_option(&str, (int *)&slub_max_order);
4009*4882a593Smuzhiyun slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
4010*4882a593Smuzhiyun
4011*4882a593Smuzhiyun return 1;
4012*4882a593Smuzhiyun }
4013*4882a593Smuzhiyun
4014*4882a593Smuzhiyun __setup("slub_max_order=", setup_slub_max_order);
4015*4882a593Smuzhiyun
setup_slub_min_objects(char * str)4016*4882a593Smuzhiyun static int __init setup_slub_min_objects(char *str)
4017*4882a593Smuzhiyun {
4018*4882a593Smuzhiyun get_option(&str, (int *)&slub_min_objects);
4019*4882a593Smuzhiyun
4020*4882a593Smuzhiyun return 1;
4021*4882a593Smuzhiyun }
4022*4882a593Smuzhiyun
4023*4882a593Smuzhiyun __setup("slub_min_objects=", setup_slub_min_objects);
4024*4882a593Smuzhiyun
__kmalloc(size_t size,gfp_t flags)4025*4882a593Smuzhiyun void *__kmalloc(size_t size, gfp_t flags)
4026*4882a593Smuzhiyun {
4027*4882a593Smuzhiyun struct kmem_cache *s;
4028*4882a593Smuzhiyun void *ret;
4029*4882a593Smuzhiyun
4030*4882a593Smuzhiyun if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4031*4882a593Smuzhiyun return kmalloc_large(size, flags);
4032*4882a593Smuzhiyun
4033*4882a593Smuzhiyun s = kmalloc_slab(size, flags);
4034*4882a593Smuzhiyun
4035*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(s)))
4036*4882a593Smuzhiyun return s;
4037*4882a593Smuzhiyun
4038*4882a593Smuzhiyun ret = slab_alloc(s, flags, _RET_IP_, size);
4039*4882a593Smuzhiyun
4040*4882a593Smuzhiyun trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
4041*4882a593Smuzhiyun
4042*4882a593Smuzhiyun ret = kasan_kmalloc(s, ret, size, flags);
4043*4882a593Smuzhiyun
4044*4882a593Smuzhiyun return ret;
4045*4882a593Smuzhiyun }
4046*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc);
4047*4882a593Smuzhiyun
4048*4882a593Smuzhiyun #ifdef CONFIG_NUMA
kmalloc_large_node(size_t size,gfp_t flags,int node)4049*4882a593Smuzhiyun static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
4050*4882a593Smuzhiyun {
4051*4882a593Smuzhiyun struct page *page;
4052*4882a593Smuzhiyun void *ptr = NULL;
4053*4882a593Smuzhiyun unsigned int order = get_order(size);
4054*4882a593Smuzhiyun
4055*4882a593Smuzhiyun flags |= __GFP_COMP;
4056*4882a593Smuzhiyun page = alloc_pages_node(node, flags, order);
4057*4882a593Smuzhiyun if (page) {
4058*4882a593Smuzhiyun ptr = page_address(page);
4059*4882a593Smuzhiyun mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4060*4882a593Smuzhiyun PAGE_SIZE << order);
4061*4882a593Smuzhiyun }
4062*4882a593Smuzhiyun
4063*4882a593Smuzhiyun return kmalloc_large_node_hook(ptr, size, flags);
4064*4882a593Smuzhiyun }
4065*4882a593Smuzhiyun
__kmalloc_node(size_t size,gfp_t flags,int node)4066*4882a593Smuzhiyun void *__kmalloc_node(size_t size, gfp_t flags, int node)
4067*4882a593Smuzhiyun {
4068*4882a593Smuzhiyun struct kmem_cache *s;
4069*4882a593Smuzhiyun void *ret;
4070*4882a593Smuzhiyun
4071*4882a593Smuzhiyun if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4072*4882a593Smuzhiyun ret = kmalloc_large_node(size, flags, node);
4073*4882a593Smuzhiyun
4074*4882a593Smuzhiyun trace_kmalloc_node(_RET_IP_, ret,
4075*4882a593Smuzhiyun size, PAGE_SIZE << get_order(size),
4076*4882a593Smuzhiyun flags, node);
4077*4882a593Smuzhiyun
4078*4882a593Smuzhiyun return ret;
4079*4882a593Smuzhiyun }
4080*4882a593Smuzhiyun
4081*4882a593Smuzhiyun s = kmalloc_slab(size, flags);
4082*4882a593Smuzhiyun
4083*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(s)))
4084*4882a593Smuzhiyun return s;
4085*4882a593Smuzhiyun
4086*4882a593Smuzhiyun ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
4087*4882a593Smuzhiyun
4088*4882a593Smuzhiyun trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
4089*4882a593Smuzhiyun
4090*4882a593Smuzhiyun ret = kasan_kmalloc(s, ret, size, flags);
4091*4882a593Smuzhiyun
4092*4882a593Smuzhiyun return ret;
4093*4882a593Smuzhiyun }
4094*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_node);
4095*4882a593Smuzhiyun #endif /* CONFIG_NUMA */
4096*4882a593Smuzhiyun
4097*4882a593Smuzhiyun #ifdef CONFIG_HARDENED_USERCOPY
4098*4882a593Smuzhiyun /*
4099*4882a593Smuzhiyun * Rejects incorrectly sized objects and objects that are to be copied
4100*4882a593Smuzhiyun * to/from userspace but do not fall entirely within the containing slab
4101*4882a593Smuzhiyun * cache's usercopy region.
4102*4882a593Smuzhiyun *
4103*4882a593Smuzhiyun * Returns NULL if check passes, otherwise const char * to name of cache
4104*4882a593Smuzhiyun * to indicate an error.
4105*4882a593Smuzhiyun */
__check_heap_object(const void * ptr,unsigned long n,struct page * page,bool to_user)4106*4882a593Smuzhiyun void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
4107*4882a593Smuzhiyun bool to_user)
4108*4882a593Smuzhiyun {
4109*4882a593Smuzhiyun struct kmem_cache *s;
4110*4882a593Smuzhiyun unsigned int offset;
4111*4882a593Smuzhiyun size_t object_size;
4112*4882a593Smuzhiyun bool is_kfence = is_kfence_address(ptr);
4113*4882a593Smuzhiyun
4114*4882a593Smuzhiyun ptr = kasan_reset_tag(ptr);
4115*4882a593Smuzhiyun
4116*4882a593Smuzhiyun /* Find object and usable object size. */
4117*4882a593Smuzhiyun s = page->slab_cache;
4118*4882a593Smuzhiyun
4119*4882a593Smuzhiyun /* Reject impossible pointers. */
4120*4882a593Smuzhiyun if (ptr < page_address(page))
4121*4882a593Smuzhiyun usercopy_abort("SLUB object not in SLUB page?!", NULL,
4122*4882a593Smuzhiyun to_user, 0, n);
4123*4882a593Smuzhiyun
4124*4882a593Smuzhiyun /* Find offset within object. */
4125*4882a593Smuzhiyun if (is_kfence)
4126*4882a593Smuzhiyun offset = ptr - kfence_object_start(ptr);
4127*4882a593Smuzhiyun else
4128*4882a593Smuzhiyun offset = (ptr - page_address(page)) % s->size;
4129*4882a593Smuzhiyun
4130*4882a593Smuzhiyun /* Adjust for redzone and reject if within the redzone. */
4131*4882a593Smuzhiyun if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4132*4882a593Smuzhiyun if (offset < s->red_left_pad)
4133*4882a593Smuzhiyun usercopy_abort("SLUB object in left red zone",
4134*4882a593Smuzhiyun s->name, to_user, offset, n);
4135*4882a593Smuzhiyun offset -= s->red_left_pad;
4136*4882a593Smuzhiyun }
4137*4882a593Smuzhiyun
4138*4882a593Smuzhiyun /* Allow address range falling entirely within usercopy region. */
4139*4882a593Smuzhiyun if (offset >= s->useroffset &&
4140*4882a593Smuzhiyun offset - s->useroffset <= s->usersize &&
4141*4882a593Smuzhiyun n <= s->useroffset - offset + s->usersize)
4142*4882a593Smuzhiyun return;
4143*4882a593Smuzhiyun
4144*4882a593Smuzhiyun /*
4145*4882a593Smuzhiyun * If the copy is still within the allocated object, produce
4146*4882a593Smuzhiyun * a warning instead of rejecting the copy. This is intended
4147*4882a593Smuzhiyun * to be a temporary method to find any missing usercopy
4148*4882a593Smuzhiyun * whitelists.
4149*4882a593Smuzhiyun */
4150*4882a593Smuzhiyun object_size = slab_ksize(s);
4151*4882a593Smuzhiyun if (usercopy_fallback &&
4152*4882a593Smuzhiyun offset <= object_size && n <= object_size - offset) {
4153*4882a593Smuzhiyun usercopy_warn("SLUB object", s->name, to_user, offset, n);
4154*4882a593Smuzhiyun return;
4155*4882a593Smuzhiyun }
4156*4882a593Smuzhiyun
4157*4882a593Smuzhiyun usercopy_abort("SLUB object", s->name, to_user, offset, n);
4158*4882a593Smuzhiyun }
4159*4882a593Smuzhiyun #endif /* CONFIG_HARDENED_USERCOPY */
4160*4882a593Smuzhiyun
__ksize(const void * object)4161*4882a593Smuzhiyun size_t __ksize(const void *object)
4162*4882a593Smuzhiyun {
4163*4882a593Smuzhiyun struct page *page;
4164*4882a593Smuzhiyun
4165*4882a593Smuzhiyun if (unlikely(object == ZERO_SIZE_PTR))
4166*4882a593Smuzhiyun return 0;
4167*4882a593Smuzhiyun
4168*4882a593Smuzhiyun page = virt_to_head_page(object);
4169*4882a593Smuzhiyun
4170*4882a593Smuzhiyun if (unlikely(!PageSlab(page))) {
4171*4882a593Smuzhiyun WARN_ON(!PageCompound(page));
4172*4882a593Smuzhiyun return page_size(page);
4173*4882a593Smuzhiyun }
4174*4882a593Smuzhiyun
4175*4882a593Smuzhiyun return slab_ksize(page->slab_cache);
4176*4882a593Smuzhiyun }
4177*4882a593Smuzhiyun EXPORT_SYMBOL(__ksize);
4178*4882a593Smuzhiyun
kfree(const void * x)4179*4882a593Smuzhiyun void kfree(const void *x)
4180*4882a593Smuzhiyun {
4181*4882a593Smuzhiyun struct page *page;
4182*4882a593Smuzhiyun void *object = (void *)x;
4183*4882a593Smuzhiyun
4184*4882a593Smuzhiyun trace_kfree(_RET_IP_, x);
4185*4882a593Smuzhiyun
4186*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(x)))
4187*4882a593Smuzhiyun return;
4188*4882a593Smuzhiyun
4189*4882a593Smuzhiyun page = virt_to_head_page(x);
4190*4882a593Smuzhiyun if (unlikely(!PageSlab(page))) {
4191*4882a593Smuzhiyun unsigned int order = compound_order(page);
4192*4882a593Smuzhiyun
4193*4882a593Smuzhiyun BUG_ON(!PageCompound(page));
4194*4882a593Smuzhiyun kfree_hook(object);
4195*4882a593Smuzhiyun mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4196*4882a593Smuzhiyun -(PAGE_SIZE << order));
4197*4882a593Smuzhiyun __free_pages(page, order);
4198*4882a593Smuzhiyun return;
4199*4882a593Smuzhiyun }
4200*4882a593Smuzhiyun slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
4201*4882a593Smuzhiyun }
4202*4882a593Smuzhiyun EXPORT_SYMBOL(kfree);
4203*4882a593Smuzhiyun
4204*4882a593Smuzhiyun #define SHRINK_PROMOTE_MAX 32
4205*4882a593Smuzhiyun
4206*4882a593Smuzhiyun /*
4207*4882a593Smuzhiyun * kmem_cache_shrink discards empty slabs and promotes the slabs filled
4208*4882a593Smuzhiyun * up most to the head of the partial lists. New allocations will then
4209*4882a593Smuzhiyun * fill those up and thus they can be removed from the partial lists.
4210*4882a593Smuzhiyun *
4211*4882a593Smuzhiyun * The slabs with the least items are placed last. This results in them
4212*4882a593Smuzhiyun * being allocated from last increasing the chance that the last objects
4213*4882a593Smuzhiyun * are freed in them.
4214*4882a593Smuzhiyun */
__kmem_cache_shrink(struct kmem_cache * s)4215*4882a593Smuzhiyun int __kmem_cache_shrink(struct kmem_cache *s)
4216*4882a593Smuzhiyun {
4217*4882a593Smuzhiyun int node;
4218*4882a593Smuzhiyun int i;
4219*4882a593Smuzhiyun struct kmem_cache_node *n;
4220*4882a593Smuzhiyun struct page *page;
4221*4882a593Smuzhiyun struct page *t;
4222*4882a593Smuzhiyun struct list_head discard;
4223*4882a593Smuzhiyun struct list_head promote[SHRINK_PROMOTE_MAX];
4224*4882a593Smuzhiyun unsigned long flags;
4225*4882a593Smuzhiyun int ret = 0;
4226*4882a593Smuzhiyun
4227*4882a593Smuzhiyun flush_all(s);
4228*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
4229*4882a593Smuzhiyun INIT_LIST_HEAD(&discard);
4230*4882a593Smuzhiyun for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4231*4882a593Smuzhiyun INIT_LIST_HEAD(promote + i);
4232*4882a593Smuzhiyun
4233*4882a593Smuzhiyun spin_lock_irqsave(&n->list_lock, flags);
4234*4882a593Smuzhiyun
4235*4882a593Smuzhiyun /*
4236*4882a593Smuzhiyun * Build lists of slabs to discard or promote.
4237*4882a593Smuzhiyun *
4238*4882a593Smuzhiyun * Note that concurrent frees may occur while we hold the
4239*4882a593Smuzhiyun * list_lock. page->inuse here is the upper limit.
4240*4882a593Smuzhiyun */
4241*4882a593Smuzhiyun list_for_each_entry_safe(page, t, &n->partial, slab_list) {
4242*4882a593Smuzhiyun int free = page->objects - page->inuse;
4243*4882a593Smuzhiyun
4244*4882a593Smuzhiyun /* Do not reread page->inuse */
4245*4882a593Smuzhiyun barrier();
4246*4882a593Smuzhiyun
4247*4882a593Smuzhiyun /* We do not keep full slabs on the list */
4248*4882a593Smuzhiyun BUG_ON(free <= 0);
4249*4882a593Smuzhiyun
4250*4882a593Smuzhiyun if (free == page->objects) {
4251*4882a593Smuzhiyun list_move(&page->slab_list, &discard);
4252*4882a593Smuzhiyun n->nr_partial--;
4253*4882a593Smuzhiyun } else if (free <= SHRINK_PROMOTE_MAX)
4254*4882a593Smuzhiyun list_move(&page->slab_list, promote + free - 1);
4255*4882a593Smuzhiyun }
4256*4882a593Smuzhiyun
4257*4882a593Smuzhiyun /*
4258*4882a593Smuzhiyun * Promote the slabs filled up most to the head of the
4259*4882a593Smuzhiyun * partial list.
4260*4882a593Smuzhiyun */
4261*4882a593Smuzhiyun for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4262*4882a593Smuzhiyun list_splice(promote + i, &n->partial);
4263*4882a593Smuzhiyun
4264*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
4265*4882a593Smuzhiyun
4266*4882a593Smuzhiyun /* Release empty slabs */
4267*4882a593Smuzhiyun list_for_each_entry_safe(page, t, &discard, slab_list)
4268*4882a593Smuzhiyun discard_slab(s, page);
4269*4882a593Smuzhiyun
4270*4882a593Smuzhiyun if (slabs_node(s, node))
4271*4882a593Smuzhiyun ret = 1;
4272*4882a593Smuzhiyun }
4273*4882a593Smuzhiyun
4274*4882a593Smuzhiyun return ret;
4275*4882a593Smuzhiyun }
4276*4882a593Smuzhiyun
slab_mem_going_offline_callback(void * arg)4277*4882a593Smuzhiyun static int slab_mem_going_offline_callback(void *arg)
4278*4882a593Smuzhiyun {
4279*4882a593Smuzhiyun struct kmem_cache *s;
4280*4882a593Smuzhiyun
4281*4882a593Smuzhiyun mutex_lock(&slab_mutex);
4282*4882a593Smuzhiyun list_for_each_entry(s, &slab_caches, list)
4283*4882a593Smuzhiyun __kmem_cache_shrink(s);
4284*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
4285*4882a593Smuzhiyun
4286*4882a593Smuzhiyun return 0;
4287*4882a593Smuzhiyun }
4288*4882a593Smuzhiyun
slab_mem_offline_callback(void * arg)4289*4882a593Smuzhiyun static void slab_mem_offline_callback(void *arg)
4290*4882a593Smuzhiyun {
4291*4882a593Smuzhiyun struct kmem_cache_node *n;
4292*4882a593Smuzhiyun struct kmem_cache *s;
4293*4882a593Smuzhiyun struct memory_notify *marg = arg;
4294*4882a593Smuzhiyun int offline_node;
4295*4882a593Smuzhiyun
4296*4882a593Smuzhiyun offline_node = marg->status_change_nid_normal;
4297*4882a593Smuzhiyun
4298*4882a593Smuzhiyun /*
4299*4882a593Smuzhiyun * If the node still has available memory. we need kmem_cache_node
4300*4882a593Smuzhiyun * for it yet.
4301*4882a593Smuzhiyun */
4302*4882a593Smuzhiyun if (offline_node < 0)
4303*4882a593Smuzhiyun return;
4304*4882a593Smuzhiyun
4305*4882a593Smuzhiyun mutex_lock(&slab_mutex);
4306*4882a593Smuzhiyun list_for_each_entry(s, &slab_caches, list) {
4307*4882a593Smuzhiyun n = get_node(s, offline_node);
4308*4882a593Smuzhiyun if (n) {
4309*4882a593Smuzhiyun /*
4310*4882a593Smuzhiyun * if n->nr_slabs > 0, slabs still exist on the node
4311*4882a593Smuzhiyun * that is going down. We were unable to free them,
4312*4882a593Smuzhiyun * and offline_pages() function shouldn't call this
4313*4882a593Smuzhiyun * callback. So, we must fail.
4314*4882a593Smuzhiyun */
4315*4882a593Smuzhiyun BUG_ON(slabs_node(s, offline_node));
4316*4882a593Smuzhiyun
4317*4882a593Smuzhiyun s->node[offline_node] = NULL;
4318*4882a593Smuzhiyun kmem_cache_free(kmem_cache_node, n);
4319*4882a593Smuzhiyun }
4320*4882a593Smuzhiyun }
4321*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
4322*4882a593Smuzhiyun }
4323*4882a593Smuzhiyun
slab_mem_going_online_callback(void * arg)4324*4882a593Smuzhiyun static int slab_mem_going_online_callback(void *arg)
4325*4882a593Smuzhiyun {
4326*4882a593Smuzhiyun struct kmem_cache_node *n;
4327*4882a593Smuzhiyun struct kmem_cache *s;
4328*4882a593Smuzhiyun struct memory_notify *marg = arg;
4329*4882a593Smuzhiyun int nid = marg->status_change_nid_normal;
4330*4882a593Smuzhiyun int ret = 0;
4331*4882a593Smuzhiyun
4332*4882a593Smuzhiyun /*
4333*4882a593Smuzhiyun * If the node's memory is already available, then kmem_cache_node is
4334*4882a593Smuzhiyun * already created. Nothing to do.
4335*4882a593Smuzhiyun */
4336*4882a593Smuzhiyun if (nid < 0)
4337*4882a593Smuzhiyun return 0;
4338*4882a593Smuzhiyun
4339*4882a593Smuzhiyun /*
4340*4882a593Smuzhiyun * We are bringing a node online. No memory is available yet. We must
4341*4882a593Smuzhiyun * allocate a kmem_cache_node structure in order to bring the node
4342*4882a593Smuzhiyun * online.
4343*4882a593Smuzhiyun */
4344*4882a593Smuzhiyun mutex_lock(&slab_mutex);
4345*4882a593Smuzhiyun list_for_each_entry(s, &slab_caches, list) {
4346*4882a593Smuzhiyun /*
4347*4882a593Smuzhiyun * XXX: kmem_cache_alloc_node will fallback to other nodes
4348*4882a593Smuzhiyun * since memory is not yet available from the node that
4349*4882a593Smuzhiyun * is brought up.
4350*4882a593Smuzhiyun */
4351*4882a593Smuzhiyun n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4352*4882a593Smuzhiyun if (!n) {
4353*4882a593Smuzhiyun ret = -ENOMEM;
4354*4882a593Smuzhiyun goto out;
4355*4882a593Smuzhiyun }
4356*4882a593Smuzhiyun init_kmem_cache_node(n);
4357*4882a593Smuzhiyun s->node[nid] = n;
4358*4882a593Smuzhiyun }
4359*4882a593Smuzhiyun out:
4360*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
4361*4882a593Smuzhiyun return ret;
4362*4882a593Smuzhiyun }
4363*4882a593Smuzhiyun
slab_memory_callback(struct notifier_block * self,unsigned long action,void * arg)4364*4882a593Smuzhiyun static int slab_memory_callback(struct notifier_block *self,
4365*4882a593Smuzhiyun unsigned long action, void *arg)
4366*4882a593Smuzhiyun {
4367*4882a593Smuzhiyun int ret = 0;
4368*4882a593Smuzhiyun
4369*4882a593Smuzhiyun switch (action) {
4370*4882a593Smuzhiyun case MEM_GOING_ONLINE:
4371*4882a593Smuzhiyun ret = slab_mem_going_online_callback(arg);
4372*4882a593Smuzhiyun break;
4373*4882a593Smuzhiyun case MEM_GOING_OFFLINE:
4374*4882a593Smuzhiyun ret = slab_mem_going_offline_callback(arg);
4375*4882a593Smuzhiyun break;
4376*4882a593Smuzhiyun case MEM_OFFLINE:
4377*4882a593Smuzhiyun case MEM_CANCEL_ONLINE:
4378*4882a593Smuzhiyun slab_mem_offline_callback(arg);
4379*4882a593Smuzhiyun break;
4380*4882a593Smuzhiyun case MEM_ONLINE:
4381*4882a593Smuzhiyun case MEM_CANCEL_OFFLINE:
4382*4882a593Smuzhiyun break;
4383*4882a593Smuzhiyun }
4384*4882a593Smuzhiyun if (ret)
4385*4882a593Smuzhiyun ret = notifier_from_errno(ret);
4386*4882a593Smuzhiyun else
4387*4882a593Smuzhiyun ret = NOTIFY_OK;
4388*4882a593Smuzhiyun return ret;
4389*4882a593Smuzhiyun }
4390*4882a593Smuzhiyun
4391*4882a593Smuzhiyun static struct notifier_block slab_memory_callback_nb = {
4392*4882a593Smuzhiyun .notifier_call = slab_memory_callback,
4393*4882a593Smuzhiyun .priority = SLAB_CALLBACK_PRI,
4394*4882a593Smuzhiyun };
4395*4882a593Smuzhiyun
4396*4882a593Smuzhiyun /********************************************************************
4397*4882a593Smuzhiyun * Basic setup of slabs
4398*4882a593Smuzhiyun *******************************************************************/
4399*4882a593Smuzhiyun
4400*4882a593Smuzhiyun /*
4401*4882a593Smuzhiyun * Used for early kmem_cache structures that were allocated using
4402*4882a593Smuzhiyun * the page allocator. Allocate them properly then fix up the pointers
4403*4882a593Smuzhiyun * that may be pointing to the wrong kmem_cache structure.
4404*4882a593Smuzhiyun */
4405*4882a593Smuzhiyun
bootstrap(struct kmem_cache * static_cache)4406*4882a593Smuzhiyun static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4407*4882a593Smuzhiyun {
4408*4882a593Smuzhiyun int node;
4409*4882a593Smuzhiyun struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4410*4882a593Smuzhiyun struct kmem_cache_node *n;
4411*4882a593Smuzhiyun
4412*4882a593Smuzhiyun memcpy(s, static_cache, kmem_cache->object_size);
4413*4882a593Smuzhiyun
4414*4882a593Smuzhiyun /*
4415*4882a593Smuzhiyun * This runs very early, and only the boot processor is supposed to be
4416*4882a593Smuzhiyun * up. Even if it weren't true, IRQs are not up so we couldn't fire
4417*4882a593Smuzhiyun * IPIs around.
4418*4882a593Smuzhiyun */
4419*4882a593Smuzhiyun __flush_cpu_slab(s, smp_processor_id());
4420*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
4421*4882a593Smuzhiyun struct page *p;
4422*4882a593Smuzhiyun
4423*4882a593Smuzhiyun list_for_each_entry(p, &n->partial, slab_list)
4424*4882a593Smuzhiyun p->slab_cache = s;
4425*4882a593Smuzhiyun
4426*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
4427*4882a593Smuzhiyun list_for_each_entry(p, &n->full, slab_list)
4428*4882a593Smuzhiyun p->slab_cache = s;
4429*4882a593Smuzhiyun #endif
4430*4882a593Smuzhiyun }
4431*4882a593Smuzhiyun list_add(&s->list, &slab_caches);
4432*4882a593Smuzhiyun return s;
4433*4882a593Smuzhiyun }
4434*4882a593Smuzhiyun
kmem_cache_init(void)4435*4882a593Smuzhiyun void __init kmem_cache_init(void)
4436*4882a593Smuzhiyun {
4437*4882a593Smuzhiyun static __initdata struct kmem_cache boot_kmem_cache,
4438*4882a593Smuzhiyun boot_kmem_cache_node;
4439*4882a593Smuzhiyun
4440*4882a593Smuzhiyun if (debug_guardpage_minorder())
4441*4882a593Smuzhiyun slub_max_order = 0;
4442*4882a593Smuzhiyun
4443*4882a593Smuzhiyun kmem_cache_node = &boot_kmem_cache_node;
4444*4882a593Smuzhiyun kmem_cache = &boot_kmem_cache;
4445*4882a593Smuzhiyun
4446*4882a593Smuzhiyun create_boot_cache(kmem_cache_node, "kmem_cache_node",
4447*4882a593Smuzhiyun sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4448*4882a593Smuzhiyun
4449*4882a593Smuzhiyun register_hotmemory_notifier(&slab_memory_callback_nb);
4450*4882a593Smuzhiyun
4451*4882a593Smuzhiyun /* Able to allocate the per node structures */
4452*4882a593Smuzhiyun slab_state = PARTIAL;
4453*4882a593Smuzhiyun
4454*4882a593Smuzhiyun create_boot_cache(kmem_cache, "kmem_cache",
4455*4882a593Smuzhiyun offsetof(struct kmem_cache, node) +
4456*4882a593Smuzhiyun nr_node_ids * sizeof(struct kmem_cache_node *),
4457*4882a593Smuzhiyun SLAB_HWCACHE_ALIGN, 0, 0);
4458*4882a593Smuzhiyun
4459*4882a593Smuzhiyun kmem_cache = bootstrap(&boot_kmem_cache);
4460*4882a593Smuzhiyun kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4461*4882a593Smuzhiyun
4462*4882a593Smuzhiyun /* Now we can use the kmem_cache to allocate kmalloc slabs */
4463*4882a593Smuzhiyun setup_kmalloc_cache_index_table();
4464*4882a593Smuzhiyun create_kmalloc_caches(0);
4465*4882a593Smuzhiyun
4466*4882a593Smuzhiyun /* Setup random freelists for each cache */
4467*4882a593Smuzhiyun init_freelist_randomization();
4468*4882a593Smuzhiyun
4469*4882a593Smuzhiyun cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4470*4882a593Smuzhiyun slub_cpu_dead);
4471*4882a593Smuzhiyun
4472*4882a593Smuzhiyun pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4473*4882a593Smuzhiyun cache_line_size(),
4474*4882a593Smuzhiyun slub_min_order, slub_max_order, slub_min_objects,
4475*4882a593Smuzhiyun nr_cpu_ids, nr_node_ids);
4476*4882a593Smuzhiyun }
4477*4882a593Smuzhiyun
kmem_cache_init_late(void)4478*4882a593Smuzhiyun void __init kmem_cache_init_late(void)
4479*4882a593Smuzhiyun {
4480*4882a593Smuzhiyun }
4481*4882a593Smuzhiyun
4482*4882a593Smuzhiyun struct kmem_cache *
__kmem_cache_alias(const char * name,unsigned int size,unsigned int align,slab_flags_t flags,void (* ctor)(void *))4483*4882a593Smuzhiyun __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4484*4882a593Smuzhiyun slab_flags_t flags, void (*ctor)(void *))
4485*4882a593Smuzhiyun {
4486*4882a593Smuzhiyun struct kmem_cache *s;
4487*4882a593Smuzhiyun
4488*4882a593Smuzhiyun s = find_mergeable(size, align, flags, name, ctor);
4489*4882a593Smuzhiyun if (s) {
4490*4882a593Smuzhiyun s->refcount++;
4491*4882a593Smuzhiyun
4492*4882a593Smuzhiyun /*
4493*4882a593Smuzhiyun * Adjust the object sizes so that we clear
4494*4882a593Smuzhiyun * the complete object on kzalloc.
4495*4882a593Smuzhiyun */
4496*4882a593Smuzhiyun s->object_size = max(s->object_size, size);
4497*4882a593Smuzhiyun s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4498*4882a593Smuzhiyun
4499*4882a593Smuzhiyun if (sysfs_slab_alias(s, name)) {
4500*4882a593Smuzhiyun s->refcount--;
4501*4882a593Smuzhiyun s = NULL;
4502*4882a593Smuzhiyun }
4503*4882a593Smuzhiyun }
4504*4882a593Smuzhiyun
4505*4882a593Smuzhiyun return s;
4506*4882a593Smuzhiyun }
4507*4882a593Smuzhiyun
__kmem_cache_create(struct kmem_cache * s,slab_flags_t flags)4508*4882a593Smuzhiyun int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4509*4882a593Smuzhiyun {
4510*4882a593Smuzhiyun int err;
4511*4882a593Smuzhiyun
4512*4882a593Smuzhiyun err = kmem_cache_open(s, flags);
4513*4882a593Smuzhiyun if (err)
4514*4882a593Smuzhiyun return err;
4515*4882a593Smuzhiyun
4516*4882a593Smuzhiyun /* Mutex is not taken during early boot */
4517*4882a593Smuzhiyun if (slab_state <= UP)
4518*4882a593Smuzhiyun return 0;
4519*4882a593Smuzhiyun
4520*4882a593Smuzhiyun err = sysfs_slab_add(s);
4521*4882a593Smuzhiyun if (err) {
4522*4882a593Smuzhiyun __kmem_cache_release(s);
4523*4882a593Smuzhiyun return err;
4524*4882a593Smuzhiyun }
4525*4882a593Smuzhiyun
4526*4882a593Smuzhiyun if (s->flags & SLAB_STORE_USER)
4527*4882a593Smuzhiyun debugfs_slab_add(s);
4528*4882a593Smuzhiyun
4529*4882a593Smuzhiyun return 0;
4530*4882a593Smuzhiyun }
4531*4882a593Smuzhiyun
__kmalloc_track_caller(size_t size,gfp_t gfpflags,unsigned long caller)4532*4882a593Smuzhiyun void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4533*4882a593Smuzhiyun {
4534*4882a593Smuzhiyun struct kmem_cache *s;
4535*4882a593Smuzhiyun void *ret;
4536*4882a593Smuzhiyun
4537*4882a593Smuzhiyun if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4538*4882a593Smuzhiyun return kmalloc_large(size, gfpflags);
4539*4882a593Smuzhiyun
4540*4882a593Smuzhiyun s = kmalloc_slab(size, gfpflags);
4541*4882a593Smuzhiyun
4542*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(s)))
4543*4882a593Smuzhiyun return s;
4544*4882a593Smuzhiyun
4545*4882a593Smuzhiyun ret = slab_alloc(s, gfpflags, caller, size);
4546*4882a593Smuzhiyun
4547*4882a593Smuzhiyun /* Honor the call site pointer we received. */
4548*4882a593Smuzhiyun trace_kmalloc(caller, ret, size, s->size, gfpflags);
4549*4882a593Smuzhiyun
4550*4882a593Smuzhiyun return ret;
4551*4882a593Smuzhiyun }
4552*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_track_caller);
4553*4882a593Smuzhiyun
4554*4882a593Smuzhiyun #ifdef CONFIG_NUMA
__kmalloc_node_track_caller(size_t size,gfp_t gfpflags,int node,unsigned long caller)4555*4882a593Smuzhiyun void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4556*4882a593Smuzhiyun int node, unsigned long caller)
4557*4882a593Smuzhiyun {
4558*4882a593Smuzhiyun struct kmem_cache *s;
4559*4882a593Smuzhiyun void *ret;
4560*4882a593Smuzhiyun
4561*4882a593Smuzhiyun if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4562*4882a593Smuzhiyun ret = kmalloc_large_node(size, gfpflags, node);
4563*4882a593Smuzhiyun
4564*4882a593Smuzhiyun trace_kmalloc_node(caller, ret,
4565*4882a593Smuzhiyun size, PAGE_SIZE << get_order(size),
4566*4882a593Smuzhiyun gfpflags, node);
4567*4882a593Smuzhiyun
4568*4882a593Smuzhiyun return ret;
4569*4882a593Smuzhiyun }
4570*4882a593Smuzhiyun
4571*4882a593Smuzhiyun s = kmalloc_slab(size, gfpflags);
4572*4882a593Smuzhiyun
4573*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(s)))
4574*4882a593Smuzhiyun return s;
4575*4882a593Smuzhiyun
4576*4882a593Smuzhiyun ret = slab_alloc_node(s, gfpflags, node, caller, size);
4577*4882a593Smuzhiyun
4578*4882a593Smuzhiyun /* Honor the call site pointer we received. */
4579*4882a593Smuzhiyun trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4580*4882a593Smuzhiyun
4581*4882a593Smuzhiyun return ret;
4582*4882a593Smuzhiyun }
4583*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_node_track_caller);
4584*4882a593Smuzhiyun #endif
4585*4882a593Smuzhiyun
4586*4882a593Smuzhiyun #ifdef CONFIG_SLUB_SYSFS
count_inuse(struct page * page)4587*4882a593Smuzhiyun static int count_inuse(struct page *page)
4588*4882a593Smuzhiyun {
4589*4882a593Smuzhiyun return page->inuse;
4590*4882a593Smuzhiyun }
4591*4882a593Smuzhiyun
count_total(struct page * page)4592*4882a593Smuzhiyun static int count_total(struct page *page)
4593*4882a593Smuzhiyun {
4594*4882a593Smuzhiyun return page->objects;
4595*4882a593Smuzhiyun }
4596*4882a593Smuzhiyun #endif
4597*4882a593Smuzhiyun
4598*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
validate_slab(struct kmem_cache * s,struct page * page)4599*4882a593Smuzhiyun static void validate_slab(struct kmem_cache *s, struct page *page)
4600*4882a593Smuzhiyun {
4601*4882a593Smuzhiyun void *p;
4602*4882a593Smuzhiyun void *addr = page_address(page);
4603*4882a593Smuzhiyun unsigned long *map;
4604*4882a593Smuzhiyun
4605*4882a593Smuzhiyun slab_lock(page);
4606*4882a593Smuzhiyun
4607*4882a593Smuzhiyun if (!check_slab(s, page) || !on_freelist(s, page, NULL))
4608*4882a593Smuzhiyun goto unlock;
4609*4882a593Smuzhiyun
4610*4882a593Smuzhiyun /* Now we know that a valid freelist exists */
4611*4882a593Smuzhiyun map = get_map(s, page);
4612*4882a593Smuzhiyun for_each_object(p, s, addr, page->objects) {
4613*4882a593Smuzhiyun u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
4614*4882a593Smuzhiyun SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
4615*4882a593Smuzhiyun
4616*4882a593Smuzhiyun if (!check_object(s, page, p, val))
4617*4882a593Smuzhiyun break;
4618*4882a593Smuzhiyun }
4619*4882a593Smuzhiyun put_map(map);
4620*4882a593Smuzhiyun unlock:
4621*4882a593Smuzhiyun slab_unlock(page);
4622*4882a593Smuzhiyun }
4623*4882a593Smuzhiyun
validate_slab_node(struct kmem_cache * s,struct kmem_cache_node * n)4624*4882a593Smuzhiyun static int validate_slab_node(struct kmem_cache *s,
4625*4882a593Smuzhiyun struct kmem_cache_node *n)
4626*4882a593Smuzhiyun {
4627*4882a593Smuzhiyun unsigned long count = 0;
4628*4882a593Smuzhiyun struct page *page;
4629*4882a593Smuzhiyun unsigned long flags;
4630*4882a593Smuzhiyun
4631*4882a593Smuzhiyun spin_lock_irqsave(&n->list_lock, flags);
4632*4882a593Smuzhiyun
4633*4882a593Smuzhiyun list_for_each_entry(page, &n->partial, slab_list) {
4634*4882a593Smuzhiyun validate_slab(s, page);
4635*4882a593Smuzhiyun count++;
4636*4882a593Smuzhiyun }
4637*4882a593Smuzhiyun if (count != n->nr_partial)
4638*4882a593Smuzhiyun pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4639*4882a593Smuzhiyun s->name, count, n->nr_partial);
4640*4882a593Smuzhiyun
4641*4882a593Smuzhiyun if (!(s->flags & SLAB_STORE_USER))
4642*4882a593Smuzhiyun goto out;
4643*4882a593Smuzhiyun
4644*4882a593Smuzhiyun list_for_each_entry(page, &n->full, slab_list) {
4645*4882a593Smuzhiyun validate_slab(s, page);
4646*4882a593Smuzhiyun count++;
4647*4882a593Smuzhiyun }
4648*4882a593Smuzhiyun if (count != atomic_long_read(&n->nr_slabs))
4649*4882a593Smuzhiyun pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4650*4882a593Smuzhiyun s->name, count, atomic_long_read(&n->nr_slabs));
4651*4882a593Smuzhiyun
4652*4882a593Smuzhiyun out:
4653*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
4654*4882a593Smuzhiyun return count;
4655*4882a593Smuzhiyun }
4656*4882a593Smuzhiyun
validate_slab_cache(struct kmem_cache * s)4657*4882a593Smuzhiyun static long validate_slab_cache(struct kmem_cache *s)
4658*4882a593Smuzhiyun {
4659*4882a593Smuzhiyun int node;
4660*4882a593Smuzhiyun unsigned long count = 0;
4661*4882a593Smuzhiyun struct kmem_cache_node *n;
4662*4882a593Smuzhiyun
4663*4882a593Smuzhiyun flush_all(s);
4664*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n)
4665*4882a593Smuzhiyun count += validate_slab_node(s, n);
4666*4882a593Smuzhiyun
4667*4882a593Smuzhiyun return count;
4668*4882a593Smuzhiyun }
4669*4882a593Smuzhiyun
4670*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_FS
4671*4882a593Smuzhiyun /*
4672*4882a593Smuzhiyun * Generate lists of code addresses where slabcache objects are allocated
4673*4882a593Smuzhiyun * and freed.
4674*4882a593Smuzhiyun */
4675*4882a593Smuzhiyun
4676*4882a593Smuzhiyun struct location {
4677*4882a593Smuzhiyun unsigned long count;
4678*4882a593Smuzhiyun unsigned long addr;
4679*4882a593Smuzhiyun long long sum_time;
4680*4882a593Smuzhiyun long min_time;
4681*4882a593Smuzhiyun long max_time;
4682*4882a593Smuzhiyun long min_pid;
4683*4882a593Smuzhiyun long max_pid;
4684*4882a593Smuzhiyun DECLARE_BITMAP(cpus, NR_CPUS);
4685*4882a593Smuzhiyun nodemask_t nodes;
4686*4882a593Smuzhiyun };
4687*4882a593Smuzhiyun
4688*4882a593Smuzhiyun struct loc_track {
4689*4882a593Smuzhiyun unsigned long max;
4690*4882a593Smuzhiyun unsigned long count;
4691*4882a593Smuzhiyun struct location *loc;
4692*4882a593Smuzhiyun loff_t idx;
4693*4882a593Smuzhiyun };
4694*4882a593Smuzhiyun
4695*4882a593Smuzhiyun static struct dentry *slab_debugfs_root;
4696*4882a593Smuzhiyun
free_loc_track(struct loc_track * t)4697*4882a593Smuzhiyun static void free_loc_track(struct loc_track *t)
4698*4882a593Smuzhiyun {
4699*4882a593Smuzhiyun if (t->max)
4700*4882a593Smuzhiyun free_pages((unsigned long)t->loc,
4701*4882a593Smuzhiyun get_order(sizeof(struct location) * t->max));
4702*4882a593Smuzhiyun }
4703*4882a593Smuzhiyun
alloc_loc_track(struct loc_track * t,unsigned long max,gfp_t flags)4704*4882a593Smuzhiyun static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4705*4882a593Smuzhiyun {
4706*4882a593Smuzhiyun struct location *l;
4707*4882a593Smuzhiyun int order;
4708*4882a593Smuzhiyun
4709*4882a593Smuzhiyun order = get_order(sizeof(struct location) * max);
4710*4882a593Smuzhiyun
4711*4882a593Smuzhiyun l = (void *)__get_free_pages(flags, order);
4712*4882a593Smuzhiyun if (!l)
4713*4882a593Smuzhiyun return 0;
4714*4882a593Smuzhiyun
4715*4882a593Smuzhiyun if (t->count) {
4716*4882a593Smuzhiyun memcpy(l, t->loc, sizeof(struct location) * t->count);
4717*4882a593Smuzhiyun free_loc_track(t);
4718*4882a593Smuzhiyun }
4719*4882a593Smuzhiyun t->max = max;
4720*4882a593Smuzhiyun t->loc = l;
4721*4882a593Smuzhiyun return 1;
4722*4882a593Smuzhiyun }
4723*4882a593Smuzhiyun
add_location(struct loc_track * t,struct kmem_cache * s,const struct track * track)4724*4882a593Smuzhiyun static int add_location(struct loc_track *t, struct kmem_cache *s,
4725*4882a593Smuzhiyun const struct track *track)
4726*4882a593Smuzhiyun {
4727*4882a593Smuzhiyun long start, end, pos;
4728*4882a593Smuzhiyun struct location *l;
4729*4882a593Smuzhiyun unsigned long caddr;
4730*4882a593Smuzhiyun unsigned long age = jiffies - track->when;
4731*4882a593Smuzhiyun
4732*4882a593Smuzhiyun start = -1;
4733*4882a593Smuzhiyun end = t->count;
4734*4882a593Smuzhiyun
4735*4882a593Smuzhiyun for ( ; ; ) {
4736*4882a593Smuzhiyun pos = start + (end - start + 1) / 2;
4737*4882a593Smuzhiyun
4738*4882a593Smuzhiyun /*
4739*4882a593Smuzhiyun * There is nothing at "end". If we end up there
4740*4882a593Smuzhiyun * we need to add something to before end.
4741*4882a593Smuzhiyun */
4742*4882a593Smuzhiyun if (pos == end)
4743*4882a593Smuzhiyun break;
4744*4882a593Smuzhiyun
4745*4882a593Smuzhiyun caddr = t->loc[pos].addr;
4746*4882a593Smuzhiyun if (track->addr == caddr) {
4747*4882a593Smuzhiyun
4748*4882a593Smuzhiyun l = &t->loc[pos];
4749*4882a593Smuzhiyun l->count++;
4750*4882a593Smuzhiyun if (track->when) {
4751*4882a593Smuzhiyun l->sum_time += age;
4752*4882a593Smuzhiyun if (age < l->min_time)
4753*4882a593Smuzhiyun l->min_time = age;
4754*4882a593Smuzhiyun if (age > l->max_time)
4755*4882a593Smuzhiyun l->max_time = age;
4756*4882a593Smuzhiyun
4757*4882a593Smuzhiyun if (track->pid < l->min_pid)
4758*4882a593Smuzhiyun l->min_pid = track->pid;
4759*4882a593Smuzhiyun if (track->pid > l->max_pid)
4760*4882a593Smuzhiyun l->max_pid = track->pid;
4761*4882a593Smuzhiyun
4762*4882a593Smuzhiyun cpumask_set_cpu(track->cpu,
4763*4882a593Smuzhiyun to_cpumask(l->cpus));
4764*4882a593Smuzhiyun }
4765*4882a593Smuzhiyun node_set(page_to_nid(virt_to_page(track)), l->nodes);
4766*4882a593Smuzhiyun return 1;
4767*4882a593Smuzhiyun }
4768*4882a593Smuzhiyun
4769*4882a593Smuzhiyun if (track->addr < caddr)
4770*4882a593Smuzhiyun end = pos;
4771*4882a593Smuzhiyun else
4772*4882a593Smuzhiyun start = pos;
4773*4882a593Smuzhiyun }
4774*4882a593Smuzhiyun
4775*4882a593Smuzhiyun /*
4776*4882a593Smuzhiyun * Not found. Insert new tracking element.
4777*4882a593Smuzhiyun */
4778*4882a593Smuzhiyun if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4779*4882a593Smuzhiyun return 0;
4780*4882a593Smuzhiyun
4781*4882a593Smuzhiyun l = t->loc + pos;
4782*4882a593Smuzhiyun if (pos < t->count)
4783*4882a593Smuzhiyun memmove(l + 1, l,
4784*4882a593Smuzhiyun (t->count - pos) * sizeof(struct location));
4785*4882a593Smuzhiyun t->count++;
4786*4882a593Smuzhiyun l->count = 1;
4787*4882a593Smuzhiyun l->addr = track->addr;
4788*4882a593Smuzhiyun l->sum_time = age;
4789*4882a593Smuzhiyun l->min_time = age;
4790*4882a593Smuzhiyun l->max_time = age;
4791*4882a593Smuzhiyun l->min_pid = track->pid;
4792*4882a593Smuzhiyun l->max_pid = track->pid;
4793*4882a593Smuzhiyun cpumask_clear(to_cpumask(l->cpus));
4794*4882a593Smuzhiyun cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4795*4882a593Smuzhiyun nodes_clear(l->nodes);
4796*4882a593Smuzhiyun node_set(page_to_nid(virt_to_page(track)), l->nodes);
4797*4882a593Smuzhiyun return 1;
4798*4882a593Smuzhiyun }
4799*4882a593Smuzhiyun
process_slab(struct loc_track * t,struct kmem_cache * s,struct page * page,enum track_item alloc,unsigned long * obj_map)4800*4882a593Smuzhiyun static void process_slab(struct loc_track *t, struct kmem_cache *s,
4801*4882a593Smuzhiyun struct page *page, enum track_item alloc,
4802*4882a593Smuzhiyun unsigned long *obj_map)
4803*4882a593Smuzhiyun {
4804*4882a593Smuzhiyun void *addr = page_address(page);
4805*4882a593Smuzhiyun void *p;
4806*4882a593Smuzhiyun
4807*4882a593Smuzhiyun __fill_map(obj_map, s, page);
4808*4882a593Smuzhiyun
4809*4882a593Smuzhiyun for_each_object(p, s, addr, page->objects)
4810*4882a593Smuzhiyun if (!test_bit(__obj_to_index(s, addr, p), obj_map))
4811*4882a593Smuzhiyun add_location(t, s, get_track(s, p, alloc));
4812*4882a593Smuzhiyun }
4813*4882a593Smuzhiyun #endif /* CONFIG_DEBUG_FS */
4814*4882a593Smuzhiyun #endif /* CONFIG_SLUB_DEBUG */
4815*4882a593Smuzhiyun
4816*4882a593Smuzhiyun #ifdef SLUB_RESILIENCY_TEST
resiliency_test(void)4817*4882a593Smuzhiyun static void __init resiliency_test(void)
4818*4882a593Smuzhiyun {
4819*4882a593Smuzhiyun u8 *p;
4820*4882a593Smuzhiyun int type = KMALLOC_NORMAL;
4821*4882a593Smuzhiyun
4822*4882a593Smuzhiyun BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4823*4882a593Smuzhiyun
4824*4882a593Smuzhiyun pr_err("SLUB resiliency testing\n");
4825*4882a593Smuzhiyun pr_err("-----------------------\n");
4826*4882a593Smuzhiyun pr_err("A. Corruption after allocation\n");
4827*4882a593Smuzhiyun
4828*4882a593Smuzhiyun p = kzalloc(16, GFP_KERNEL);
4829*4882a593Smuzhiyun p[16] = 0x12;
4830*4882a593Smuzhiyun pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4831*4882a593Smuzhiyun p + 16);
4832*4882a593Smuzhiyun
4833*4882a593Smuzhiyun validate_slab_cache(kmalloc_caches[type][4]);
4834*4882a593Smuzhiyun
4835*4882a593Smuzhiyun /* Hmmm... The next two are dangerous */
4836*4882a593Smuzhiyun p = kzalloc(32, GFP_KERNEL);
4837*4882a593Smuzhiyun p[32 + sizeof(void *)] = 0x34;
4838*4882a593Smuzhiyun pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4839*4882a593Smuzhiyun p);
4840*4882a593Smuzhiyun pr_err("If allocated object is overwritten then not detectable\n\n");
4841*4882a593Smuzhiyun
4842*4882a593Smuzhiyun validate_slab_cache(kmalloc_caches[type][5]);
4843*4882a593Smuzhiyun p = kzalloc(64, GFP_KERNEL);
4844*4882a593Smuzhiyun p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4845*4882a593Smuzhiyun *p = 0x56;
4846*4882a593Smuzhiyun pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4847*4882a593Smuzhiyun p);
4848*4882a593Smuzhiyun pr_err("If allocated object is overwritten then not detectable\n\n");
4849*4882a593Smuzhiyun validate_slab_cache(kmalloc_caches[type][6]);
4850*4882a593Smuzhiyun
4851*4882a593Smuzhiyun pr_err("\nB. Corruption after free\n");
4852*4882a593Smuzhiyun p = kzalloc(128, GFP_KERNEL);
4853*4882a593Smuzhiyun kfree(p);
4854*4882a593Smuzhiyun *p = 0x78;
4855*4882a593Smuzhiyun pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4856*4882a593Smuzhiyun validate_slab_cache(kmalloc_caches[type][7]);
4857*4882a593Smuzhiyun
4858*4882a593Smuzhiyun p = kzalloc(256, GFP_KERNEL);
4859*4882a593Smuzhiyun kfree(p);
4860*4882a593Smuzhiyun p[50] = 0x9a;
4861*4882a593Smuzhiyun pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4862*4882a593Smuzhiyun validate_slab_cache(kmalloc_caches[type][8]);
4863*4882a593Smuzhiyun
4864*4882a593Smuzhiyun p = kzalloc(512, GFP_KERNEL);
4865*4882a593Smuzhiyun kfree(p);
4866*4882a593Smuzhiyun p[512] = 0xab;
4867*4882a593Smuzhiyun pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4868*4882a593Smuzhiyun validate_slab_cache(kmalloc_caches[type][9]);
4869*4882a593Smuzhiyun }
4870*4882a593Smuzhiyun #else
4871*4882a593Smuzhiyun #ifdef CONFIG_SLUB_SYSFS
resiliency_test(void)4872*4882a593Smuzhiyun static void resiliency_test(void) {};
4873*4882a593Smuzhiyun #endif
4874*4882a593Smuzhiyun #endif /* SLUB_RESILIENCY_TEST */
4875*4882a593Smuzhiyun
4876*4882a593Smuzhiyun #ifdef CONFIG_SLUB_SYSFS
4877*4882a593Smuzhiyun enum slab_stat_type {
4878*4882a593Smuzhiyun SL_ALL, /* All slabs */
4879*4882a593Smuzhiyun SL_PARTIAL, /* Only partially allocated slabs */
4880*4882a593Smuzhiyun SL_CPU, /* Only slabs used for cpu caches */
4881*4882a593Smuzhiyun SL_OBJECTS, /* Determine allocated objects not slabs */
4882*4882a593Smuzhiyun SL_TOTAL /* Determine object capacity not slabs */
4883*4882a593Smuzhiyun };
4884*4882a593Smuzhiyun
4885*4882a593Smuzhiyun #define SO_ALL (1 << SL_ALL)
4886*4882a593Smuzhiyun #define SO_PARTIAL (1 << SL_PARTIAL)
4887*4882a593Smuzhiyun #define SO_CPU (1 << SL_CPU)
4888*4882a593Smuzhiyun #define SO_OBJECTS (1 << SL_OBJECTS)
4889*4882a593Smuzhiyun #define SO_TOTAL (1 << SL_TOTAL)
4890*4882a593Smuzhiyun
4891*4882a593Smuzhiyun #ifdef CONFIG_MEMCG
4892*4882a593Smuzhiyun static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
4893*4882a593Smuzhiyun
setup_slub_memcg_sysfs(char * str)4894*4882a593Smuzhiyun static int __init setup_slub_memcg_sysfs(char *str)
4895*4882a593Smuzhiyun {
4896*4882a593Smuzhiyun int v;
4897*4882a593Smuzhiyun
4898*4882a593Smuzhiyun if (get_option(&str, &v) > 0)
4899*4882a593Smuzhiyun memcg_sysfs_enabled = v;
4900*4882a593Smuzhiyun
4901*4882a593Smuzhiyun return 1;
4902*4882a593Smuzhiyun }
4903*4882a593Smuzhiyun
4904*4882a593Smuzhiyun __setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
4905*4882a593Smuzhiyun #endif
4906*4882a593Smuzhiyun
show_slab_objects(struct kmem_cache * s,char * buf,unsigned long flags)4907*4882a593Smuzhiyun static ssize_t show_slab_objects(struct kmem_cache *s,
4908*4882a593Smuzhiyun char *buf, unsigned long flags)
4909*4882a593Smuzhiyun {
4910*4882a593Smuzhiyun unsigned long total = 0;
4911*4882a593Smuzhiyun int node;
4912*4882a593Smuzhiyun int x;
4913*4882a593Smuzhiyun unsigned long *nodes;
4914*4882a593Smuzhiyun
4915*4882a593Smuzhiyun nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
4916*4882a593Smuzhiyun if (!nodes)
4917*4882a593Smuzhiyun return -ENOMEM;
4918*4882a593Smuzhiyun
4919*4882a593Smuzhiyun if (flags & SO_CPU) {
4920*4882a593Smuzhiyun int cpu;
4921*4882a593Smuzhiyun
4922*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
4923*4882a593Smuzhiyun struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4924*4882a593Smuzhiyun cpu);
4925*4882a593Smuzhiyun int node;
4926*4882a593Smuzhiyun struct page *page;
4927*4882a593Smuzhiyun
4928*4882a593Smuzhiyun page = READ_ONCE(c->page);
4929*4882a593Smuzhiyun if (!page)
4930*4882a593Smuzhiyun continue;
4931*4882a593Smuzhiyun
4932*4882a593Smuzhiyun node = page_to_nid(page);
4933*4882a593Smuzhiyun if (flags & SO_TOTAL)
4934*4882a593Smuzhiyun x = page->objects;
4935*4882a593Smuzhiyun else if (flags & SO_OBJECTS)
4936*4882a593Smuzhiyun x = page->inuse;
4937*4882a593Smuzhiyun else
4938*4882a593Smuzhiyun x = 1;
4939*4882a593Smuzhiyun
4940*4882a593Smuzhiyun total += x;
4941*4882a593Smuzhiyun nodes[node] += x;
4942*4882a593Smuzhiyun
4943*4882a593Smuzhiyun page = slub_percpu_partial_read_once(c);
4944*4882a593Smuzhiyun if (page) {
4945*4882a593Smuzhiyun node = page_to_nid(page);
4946*4882a593Smuzhiyun if (flags & SO_TOTAL)
4947*4882a593Smuzhiyun WARN_ON_ONCE(1);
4948*4882a593Smuzhiyun else if (flags & SO_OBJECTS)
4949*4882a593Smuzhiyun WARN_ON_ONCE(1);
4950*4882a593Smuzhiyun else
4951*4882a593Smuzhiyun x = page->pages;
4952*4882a593Smuzhiyun total += x;
4953*4882a593Smuzhiyun nodes[node] += x;
4954*4882a593Smuzhiyun }
4955*4882a593Smuzhiyun }
4956*4882a593Smuzhiyun }
4957*4882a593Smuzhiyun
4958*4882a593Smuzhiyun /*
4959*4882a593Smuzhiyun * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
4960*4882a593Smuzhiyun * already held which will conflict with an existing lock order:
4961*4882a593Smuzhiyun *
4962*4882a593Smuzhiyun * mem_hotplug_lock->slab_mutex->kernfs_mutex
4963*4882a593Smuzhiyun *
4964*4882a593Smuzhiyun * We don't really need mem_hotplug_lock (to hold off
4965*4882a593Smuzhiyun * slab_mem_going_offline_callback) here because slab's memory hot
4966*4882a593Smuzhiyun * unplug code doesn't destroy the kmem_cache->node[] data.
4967*4882a593Smuzhiyun */
4968*4882a593Smuzhiyun
4969*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
4970*4882a593Smuzhiyun if (flags & SO_ALL) {
4971*4882a593Smuzhiyun struct kmem_cache_node *n;
4972*4882a593Smuzhiyun
4973*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
4974*4882a593Smuzhiyun
4975*4882a593Smuzhiyun if (flags & SO_TOTAL)
4976*4882a593Smuzhiyun x = atomic_long_read(&n->total_objects);
4977*4882a593Smuzhiyun else if (flags & SO_OBJECTS)
4978*4882a593Smuzhiyun x = atomic_long_read(&n->total_objects) -
4979*4882a593Smuzhiyun count_partial(n, count_free);
4980*4882a593Smuzhiyun else
4981*4882a593Smuzhiyun x = atomic_long_read(&n->nr_slabs);
4982*4882a593Smuzhiyun total += x;
4983*4882a593Smuzhiyun nodes[node] += x;
4984*4882a593Smuzhiyun }
4985*4882a593Smuzhiyun
4986*4882a593Smuzhiyun } else
4987*4882a593Smuzhiyun #endif
4988*4882a593Smuzhiyun if (flags & SO_PARTIAL) {
4989*4882a593Smuzhiyun struct kmem_cache_node *n;
4990*4882a593Smuzhiyun
4991*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
4992*4882a593Smuzhiyun if (flags & SO_TOTAL)
4993*4882a593Smuzhiyun x = count_partial(n, count_total);
4994*4882a593Smuzhiyun else if (flags & SO_OBJECTS)
4995*4882a593Smuzhiyun x = count_partial(n, count_inuse);
4996*4882a593Smuzhiyun else
4997*4882a593Smuzhiyun x = n->nr_partial;
4998*4882a593Smuzhiyun total += x;
4999*4882a593Smuzhiyun nodes[node] += x;
5000*4882a593Smuzhiyun }
5001*4882a593Smuzhiyun }
5002*4882a593Smuzhiyun x = sprintf(buf, "%lu", total);
5003*4882a593Smuzhiyun #ifdef CONFIG_NUMA
5004*4882a593Smuzhiyun for (node = 0; node < nr_node_ids; node++)
5005*4882a593Smuzhiyun if (nodes[node])
5006*4882a593Smuzhiyun x += sprintf(buf + x, " N%d=%lu",
5007*4882a593Smuzhiyun node, nodes[node]);
5008*4882a593Smuzhiyun #endif
5009*4882a593Smuzhiyun kfree(nodes);
5010*4882a593Smuzhiyun return x + sprintf(buf + x, "\n");
5011*4882a593Smuzhiyun }
5012*4882a593Smuzhiyun
5013*4882a593Smuzhiyun #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5014*4882a593Smuzhiyun #define to_slab(n) container_of(n, struct kmem_cache, kobj)
5015*4882a593Smuzhiyun
5016*4882a593Smuzhiyun struct slab_attribute {
5017*4882a593Smuzhiyun struct attribute attr;
5018*4882a593Smuzhiyun ssize_t (*show)(struct kmem_cache *s, char *buf);
5019*4882a593Smuzhiyun ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
5020*4882a593Smuzhiyun };
5021*4882a593Smuzhiyun
5022*4882a593Smuzhiyun #define SLAB_ATTR_RO(_name) \
5023*4882a593Smuzhiyun static struct slab_attribute _name##_attr = \
5024*4882a593Smuzhiyun __ATTR(_name, 0400, _name##_show, NULL)
5025*4882a593Smuzhiyun
5026*4882a593Smuzhiyun #define SLAB_ATTR(_name) \
5027*4882a593Smuzhiyun static struct slab_attribute _name##_attr = \
5028*4882a593Smuzhiyun __ATTR(_name, 0600, _name##_show, _name##_store)
5029*4882a593Smuzhiyun
slab_size_show(struct kmem_cache * s,char * buf)5030*4882a593Smuzhiyun static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
5031*4882a593Smuzhiyun {
5032*4882a593Smuzhiyun return sprintf(buf, "%u\n", s->size);
5033*4882a593Smuzhiyun }
5034*4882a593Smuzhiyun SLAB_ATTR_RO(slab_size);
5035*4882a593Smuzhiyun
align_show(struct kmem_cache * s,char * buf)5036*4882a593Smuzhiyun static ssize_t align_show(struct kmem_cache *s, char *buf)
5037*4882a593Smuzhiyun {
5038*4882a593Smuzhiyun return sprintf(buf, "%u\n", s->align);
5039*4882a593Smuzhiyun }
5040*4882a593Smuzhiyun SLAB_ATTR_RO(align);
5041*4882a593Smuzhiyun
object_size_show(struct kmem_cache * s,char * buf)5042*4882a593Smuzhiyun static ssize_t object_size_show(struct kmem_cache *s, char *buf)
5043*4882a593Smuzhiyun {
5044*4882a593Smuzhiyun return sprintf(buf, "%u\n", s->object_size);
5045*4882a593Smuzhiyun }
5046*4882a593Smuzhiyun SLAB_ATTR_RO(object_size);
5047*4882a593Smuzhiyun
objs_per_slab_show(struct kmem_cache * s,char * buf)5048*4882a593Smuzhiyun static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
5049*4882a593Smuzhiyun {
5050*4882a593Smuzhiyun return sprintf(buf, "%u\n", oo_objects(s->oo));
5051*4882a593Smuzhiyun }
5052*4882a593Smuzhiyun SLAB_ATTR_RO(objs_per_slab);
5053*4882a593Smuzhiyun
order_show(struct kmem_cache * s,char * buf)5054*4882a593Smuzhiyun static ssize_t order_show(struct kmem_cache *s, char *buf)
5055*4882a593Smuzhiyun {
5056*4882a593Smuzhiyun return sprintf(buf, "%u\n", oo_order(s->oo));
5057*4882a593Smuzhiyun }
5058*4882a593Smuzhiyun SLAB_ATTR_RO(order);
5059*4882a593Smuzhiyun
min_partial_show(struct kmem_cache * s,char * buf)5060*4882a593Smuzhiyun static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
5061*4882a593Smuzhiyun {
5062*4882a593Smuzhiyun return sprintf(buf, "%lu\n", s->min_partial);
5063*4882a593Smuzhiyun }
5064*4882a593Smuzhiyun
min_partial_store(struct kmem_cache * s,const char * buf,size_t length)5065*4882a593Smuzhiyun static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
5066*4882a593Smuzhiyun size_t length)
5067*4882a593Smuzhiyun {
5068*4882a593Smuzhiyun unsigned long min;
5069*4882a593Smuzhiyun int err;
5070*4882a593Smuzhiyun
5071*4882a593Smuzhiyun err = kstrtoul(buf, 10, &min);
5072*4882a593Smuzhiyun if (err)
5073*4882a593Smuzhiyun return err;
5074*4882a593Smuzhiyun
5075*4882a593Smuzhiyun set_min_partial(s, min);
5076*4882a593Smuzhiyun return length;
5077*4882a593Smuzhiyun }
5078*4882a593Smuzhiyun SLAB_ATTR(min_partial);
5079*4882a593Smuzhiyun
cpu_partial_show(struct kmem_cache * s,char * buf)5080*4882a593Smuzhiyun static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5081*4882a593Smuzhiyun {
5082*4882a593Smuzhiyun return sprintf(buf, "%u\n", slub_cpu_partial(s));
5083*4882a593Smuzhiyun }
5084*4882a593Smuzhiyun
cpu_partial_store(struct kmem_cache * s,const char * buf,size_t length)5085*4882a593Smuzhiyun static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5086*4882a593Smuzhiyun size_t length)
5087*4882a593Smuzhiyun {
5088*4882a593Smuzhiyun unsigned int objects;
5089*4882a593Smuzhiyun int err;
5090*4882a593Smuzhiyun
5091*4882a593Smuzhiyun err = kstrtouint(buf, 10, &objects);
5092*4882a593Smuzhiyun if (err)
5093*4882a593Smuzhiyun return err;
5094*4882a593Smuzhiyun if (objects && !kmem_cache_has_cpu_partial(s))
5095*4882a593Smuzhiyun return -EINVAL;
5096*4882a593Smuzhiyun
5097*4882a593Smuzhiyun slub_set_cpu_partial(s, objects);
5098*4882a593Smuzhiyun flush_all(s);
5099*4882a593Smuzhiyun return length;
5100*4882a593Smuzhiyun }
5101*4882a593Smuzhiyun SLAB_ATTR(cpu_partial);
5102*4882a593Smuzhiyun
ctor_show(struct kmem_cache * s,char * buf)5103*4882a593Smuzhiyun static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5104*4882a593Smuzhiyun {
5105*4882a593Smuzhiyun if (!s->ctor)
5106*4882a593Smuzhiyun return 0;
5107*4882a593Smuzhiyun return sprintf(buf, "%pS\n", s->ctor);
5108*4882a593Smuzhiyun }
5109*4882a593Smuzhiyun SLAB_ATTR_RO(ctor);
5110*4882a593Smuzhiyun
aliases_show(struct kmem_cache * s,char * buf)5111*4882a593Smuzhiyun static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5112*4882a593Smuzhiyun {
5113*4882a593Smuzhiyun return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5114*4882a593Smuzhiyun }
5115*4882a593Smuzhiyun SLAB_ATTR_RO(aliases);
5116*4882a593Smuzhiyun
partial_show(struct kmem_cache * s,char * buf)5117*4882a593Smuzhiyun static ssize_t partial_show(struct kmem_cache *s, char *buf)
5118*4882a593Smuzhiyun {
5119*4882a593Smuzhiyun return show_slab_objects(s, buf, SO_PARTIAL);
5120*4882a593Smuzhiyun }
5121*4882a593Smuzhiyun SLAB_ATTR_RO(partial);
5122*4882a593Smuzhiyun
cpu_slabs_show(struct kmem_cache * s,char * buf)5123*4882a593Smuzhiyun static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5124*4882a593Smuzhiyun {
5125*4882a593Smuzhiyun return show_slab_objects(s, buf, SO_CPU);
5126*4882a593Smuzhiyun }
5127*4882a593Smuzhiyun SLAB_ATTR_RO(cpu_slabs);
5128*4882a593Smuzhiyun
objects_show(struct kmem_cache * s,char * buf)5129*4882a593Smuzhiyun static ssize_t objects_show(struct kmem_cache *s, char *buf)
5130*4882a593Smuzhiyun {
5131*4882a593Smuzhiyun return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5132*4882a593Smuzhiyun }
5133*4882a593Smuzhiyun SLAB_ATTR_RO(objects);
5134*4882a593Smuzhiyun
objects_partial_show(struct kmem_cache * s,char * buf)5135*4882a593Smuzhiyun static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5136*4882a593Smuzhiyun {
5137*4882a593Smuzhiyun return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5138*4882a593Smuzhiyun }
5139*4882a593Smuzhiyun SLAB_ATTR_RO(objects_partial);
5140*4882a593Smuzhiyun
slabs_cpu_partial_show(struct kmem_cache * s,char * buf)5141*4882a593Smuzhiyun static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5142*4882a593Smuzhiyun {
5143*4882a593Smuzhiyun int objects = 0;
5144*4882a593Smuzhiyun int pages = 0;
5145*4882a593Smuzhiyun int cpu;
5146*4882a593Smuzhiyun int len;
5147*4882a593Smuzhiyun
5148*4882a593Smuzhiyun for_each_online_cpu(cpu) {
5149*4882a593Smuzhiyun struct page *page;
5150*4882a593Smuzhiyun
5151*4882a593Smuzhiyun page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5152*4882a593Smuzhiyun
5153*4882a593Smuzhiyun if (page) {
5154*4882a593Smuzhiyun pages += page->pages;
5155*4882a593Smuzhiyun objects += page->pobjects;
5156*4882a593Smuzhiyun }
5157*4882a593Smuzhiyun }
5158*4882a593Smuzhiyun
5159*4882a593Smuzhiyun len = sprintf(buf, "%d(%d)", objects, pages);
5160*4882a593Smuzhiyun
5161*4882a593Smuzhiyun #ifdef CONFIG_SMP
5162*4882a593Smuzhiyun for_each_online_cpu(cpu) {
5163*4882a593Smuzhiyun struct page *page;
5164*4882a593Smuzhiyun
5165*4882a593Smuzhiyun page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5166*4882a593Smuzhiyun
5167*4882a593Smuzhiyun if (page && len < PAGE_SIZE - 20)
5168*4882a593Smuzhiyun len += sprintf(buf + len, " C%d=%d(%d)", cpu,
5169*4882a593Smuzhiyun page->pobjects, page->pages);
5170*4882a593Smuzhiyun }
5171*4882a593Smuzhiyun #endif
5172*4882a593Smuzhiyun return len + sprintf(buf + len, "\n");
5173*4882a593Smuzhiyun }
5174*4882a593Smuzhiyun SLAB_ATTR_RO(slabs_cpu_partial);
5175*4882a593Smuzhiyun
reclaim_account_show(struct kmem_cache * s,char * buf)5176*4882a593Smuzhiyun static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5177*4882a593Smuzhiyun {
5178*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5179*4882a593Smuzhiyun }
5180*4882a593Smuzhiyun SLAB_ATTR_RO(reclaim_account);
5181*4882a593Smuzhiyun
hwcache_align_show(struct kmem_cache * s,char * buf)5182*4882a593Smuzhiyun static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5183*4882a593Smuzhiyun {
5184*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5185*4882a593Smuzhiyun }
5186*4882a593Smuzhiyun SLAB_ATTR_RO(hwcache_align);
5187*4882a593Smuzhiyun
5188*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA
cache_dma_show(struct kmem_cache * s,char * buf)5189*4882a593Smuzhiyun static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5190*4882a593Smuzhiyun {
5191*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5192*4882a593Smuzhiyun }
5193*4882a593Smuzhiyun SLAB_ATTR_RO(cache_dma);
5194*4882a593Smuzhiyun #endif
5195*4882a593Smuzhiyun
usersize_show(struct kmem_cache * s,char * buf)5196*4882a593Smuzhiyun static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5197*4882a593Smuzhiyun {
5198*4882a593Smuzhiyun return sprintf(buf, "%u\n", s->usersize);
5199*4882a593Smuzhiyun }
5200*4882a593Smuzhiyun SLAB_ATTR_RO(usersize);
5201*4882a593Smuzhiyun
destroy_by_rcu_show(struct kmem_cache * s,char * buf)5202*4882a593Smuzhiyun static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5203*4882a593Smuzhiyun {
5204*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5205*4882a593Smuzhiyun }
5206*4882a593Smuzhiyun SLAB_ATTR_RO(destroy_by_rcu);
5207*4882a593Smuzhiyun
5208*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
slabs_show(struct kmem_cache * s,char * buf)5209*4882a593Smuzhiyun static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5210*4882a593Smuzhiyun {
5211*4882a593Smuzhiyun return show_slab_objects(s, buf, SO_ALL);
5212*4882a593Smuzhiyun }
5213*4882a593Smuzhiyun SLAB_ATTR_RO(slabs);
5214*4882a593Smuzhiyun
total_objects_show(struct kmem_cache * s,char * buf)5215*4882a593Smuzhiyun static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5216*4882a593Smuzhiyun {
5217*4882a593Smuzhiyun return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5218*4882a593Smuzhiyun }
5219*4882a593Smuzhiyun SLAB_ATTR_RO(total_objects);
5220*4882a593Smuzhiyun
sanity_checks_show(struct kmem_cache * s,char * buf)5221*4882a593Smuzhiyun static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5222*4882a593Smuzhiyun {
5223*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5224*4882a593Smuzhiyun }
5225*4882a593Smuzhiyun SLAB_ATTR_RO(sanity_checks);
5226*4882a593Smuzhiyun
trace_show(struct kmem_cache * s,char * buf)5227*4882a593Smuzhiyun static ssize_t trace_show(struct kmem_cache *s, char *buf)
5228*4882a593Smuzhiyun {
5229*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5230*4882a593Smuzhiyun }
5231*4882a593Smuzhiyun SLAB_ATTR_RO(trace);
5232*4882a593Smuzhiyun
red_zone_show(struct kmem_cache * s,char * buf)5233*4882a593Smuzhiyun static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5234*4882a593Smuzhiyun {
5235*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5236*4882a593Smuzhiyun }
5237*4882a593Smuzhiyun
5238*4882a593Smuzhiyun SLAB_ATTR_RO(red_zone);
5239*4882a593Smuzhiyun
poison_show(struct kmem_cache * s,char * buf)5240*4882a593Smuzhiyun static ssize_t poison_show(struct kmem_cache *s, char *buf)
5241*4882a593Smuzhiyun {
5242*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5243*4882a593Smuzhiyun }
5244*4882a593Smuzhiyun
5245*4882a593Smuzhiyun SLAB_ATTR_RO(poison);
5246*4882a593Smuzhiyun
store_user_show(struct kmem_cache * s,char * buf)5247*4882a593Smuzhiyun static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5248*4882a593Smuzhiyun {
5249*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5250*4882a593Smuzhiyun }
5251*4882a593Smuzhiyun
5252*4882a593Smuzhiyun SLAB_ATTR_RO(store_user);
5253*4882a593Smuzhiyun
validate_show(struct kmem_cache * s,char * buf)5254*4882a593Smuzhiyun static ssize_t validate_show(struct kmem_cache *s, char *buf)
5255*4882a593Smuzhiyun {
5256*4882a593Smuzhiyun return 0;
5257*4882a593Smuzhiyun }
5258*4882a593Smuzhiyun
validate_store(struct kmem_cache * s,const char * buf,size_t length)5259*4882a593Smuzhiyun static ssize_t validate_store(struct kmem_cache *s,
5260*4882a593Smuzhiyun const char *buf, size_t length)
5261*4882a593Smuzhiyun {
5262*4882a593Smuzhiyun int ret = -EINVAL;
5263*4882a593Smuzhiyun
5264*4882a593Smuzhiyun if (buf[0] == '1') {
5265*4882a593Smuzhiyun ret = validate_slab_cache(s);
5266*4882a593Smuzhiyun if (ret >= 0)
5267*4882a593Smuzhiyun ret = length;
5268*4882a593Smuzhiyun }
5269*4882a593Smuzhiyun return ret;
5270*4882a593Smuzhiyun }
5271*4882a593Smuzhiyun SLAB_ATTR(validate);
5272*4882a593Smuzhiyun
5273*4882a593Smuzhiyun #endif /* CONFIG_SLUB_DEBUG */
5274*4882a593Smuzhiyun
5275*4882a593Smuzhiyun #ifdef CONFIG_FAILSLAB
failslab_show(struct kmem_cache * s,char * buf)5276*4882a593Smuzhiyun static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5277*4882a593Smuzhiyun {
5278*4882a593Smuzhiyun return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5279*4882a593Smuzhiyun }
5280*4882a593Smuzhiyun SLAB_ATTR_RO(failslab);
5281*4882a593Smuzhiyun #endif
5282*4882a593Smuzhiyun
shrink_show(struct kmem_cache * s,char * buf)5283*4882a593Smuzhiyun static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5284*4882a593Smuzhiyun {
5285*4882a593Smuzhiyun return 0;
5286*4882a593Smuzhiyun }
5287*4882a593Smuzhiyun
shrink_store(struct kmem_cache * s,const char * buf,size_t length)5288*4882a593Smuzhiyun static ssize_t shrink_store(struct kmem_cache *s,
5289*4882a593Smuzhiyun const char *buf, size_t length)
5290*4882a593Smuzhiyun {
5291*4882a593Smuzhiyun if (buf[0] == '1')
5292*4882a593Smuzhiyun kmem_cache_shrink(s);
5293*4882a593Smuzhiyun else
5294*4882a593Smuzhiyun return -EINVAL;
5295*4882a593Smuzhiyun return length;
5296*4882a593Smuzhiyun }
5297*4882a593Smuzhiyun SLAB_ATTR(shrink);
5298*4882a593Smuzhiyun
5299*4882a593Smuzhiyun #ifdef CONFIG_NUMA
remote_node_defrag_ratio_show(struct kmem_cache * s,char * buf)5300*4882a593Smuzhiyun static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5301*4882a593Smuzhiyun {
5302*4882a593Smuzhiyun return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5303*4882a593Smuzhiyun }
5304*4882a593Smuzhiyun
remote_node_defrag_ratio_store(struct kmem_cache * s,const char * buf,size_t length)5305*4882a593Smuzhiyun static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5306*4882a593Smuzhiyun const char *buf, size_t length)
5307*4882a593Smuzhiyun {
5308*4882a593Smuzhiyun unsigned int ratio;
5309*4882a593Smuzhiyun int err;
5310*4882a593Smuzhiyun
5311*4882a593Smuzhiyun err = kstrtouint(buf, 10, &ratio);
5312*4882a593Smuzhiyun if (err)
5313*4882a593Smuzhiyun return err;
5314*4882a593Smuzhiyun if (ratio > 100)
5315*4882a593Smuzhiyun return -ERANGE;
5316*4882a593Smuzhiyun
5317*4882a593Smuzhiyun s->remote_node_defrag_ratio = ratio * 10;
5318*4882a593Smuzhiyun
5319*4882a593Smuzhiyun return length;
5320*4882a593Smuzhiyun }
5321*4882a593Smuzhiyun SLAB_ATTR(remote_node_defrag_ratio);
5322*4882a593Smuzhiyun #endif
5323*4882a593Smuzhiyun
5324*4882a593Smuzhiyun #ifdef CONFIG_SLUB_STATS
show_stat(struct kmem_cache * s,char * buf,enum stat_item si)5325*4882a593Smuzhiyun static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5326*4882a593Smuzhiyun {
5327*4882a593Smuzhiyun unsigned long sum = 0;
5328*4882a593Smuzhiyun int cpu;
5329*4882a593Smuzhiyun int len;
5330*4882a593Smuzhiyun int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5331*4882a593Smuzhiyun
5332*4882a593Smuzhiyun if (!data)
5333*4882a593Smuzhiyun return -ENOMEM;
5334*4882a593Smuzhiyun
5335*4882a593Smuzhiyun for_each_online_cpu(cpu) {
5336*4882a593Smuzhiyun unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5337*4882a593Smuzhiyun
5338*4882a593Smuzhiyun data[cpu] = x;
5339*4882a593Smuzhiyun sum += x;
5340*4882a593Smuzhiyun }
5341*4882a593Smuzhiyun
5342*4882a593Smuzhiyun len = sprintf(buf, "%lu", sum);
5343*4882a593Smuzhiyun
5344*4882a593Smuzhiyun #ifdef CONFIG_SMP
5345*4882a593Smuzhiyun for_each_online_cpu(cpu) {
5346*4882a593Smuzhiyun if (data[cpu] && len < PAGE_SIZE - 20)
5347*4882a593Smuzhiyun len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5348*4882a593Smuzhiyun }
5349*4882a593Smuzhiyun #endif
5350*4882a593Smuzhiyun kfree(data);
5351*4882a593Smuzhiyun return len + sprintf(buf + len, "\n");
5352*4882a593Smuzhiyun }
5353*4882a593Smuzhiyun
clear_stat(struct kmem_cache * s,enum stat_item si)5354*4882a593Smuzhiyun static void clear_stat(struct kmem_cache *s, enum stat_item si)
5355*4882a593Smuzhiyun {
5356*4882a593Smuzhiyun int cpu;
5357*4882a593Smuzhiyun
5358*4882a593Smuzhiyun for_each_online_cpu(cpu)
5359*4882a593Smuzhiyun per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5360*4882a593Smuzhiyun }
5361*4882a593Smuzhiyun
5362*4882a593Smuzhiyun #define STAT_ATTR(si, text) \
5363*4882a593Smuzhiyun static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5364*4882a593Smuzhiyun { \
5365*4882a593Smuzhiyun return show_stat(s, buf, si); \
5366*4882a593Smuzhiyun } \
5367*4882a593Smuzhiyun static ssize_t text##_store(struct kmem_cache *s, \
5368*4882a593Smuzhiyun const char *buf, size_t length) \
5369*4882a593Smuzhiyun { \
5370*4882a593Smuzhiyun if (buf[0] != '0') \
5371*4882a593Smuzhiyun return -EINVAL; \
5372*4882a593Smuzhiyun clear_stat(s, si); \
5373*4882a593Smuzhiyun return length; \
5374*4882a593Smuzhiyun } \
5375*4882a593Smuzhiyun SLAB_ATTR(text); \
5376*4882a593Smuzhiyun
5377*4882a593Smuzhiyun STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5378*4882a593Smuzhiyun STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5379*4882a593Smuzhiyun STAT_ATTR(FREE_FASTPATH, free_fastpath);
5380*4882a593Smuzhiyun STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5381*4882a593Smuzhiyun STAT_ATTR(FREE_FROZEN, free_frozen);
5382*4882a593Smuzhiyun STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5383*4882a593Smuzhiyun STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5384*4882a593Smuzhiyun STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5385*4882a593Smuzhiyun STAT_ATTR(ALLOC_SLAB, alloc_slab);
5386*4882a593Smuzhiyun STAT_ATTR(ALLOC_REFILL, alloc_refill);
5387*4882a593Smuzhiyun STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5388*4882a593Smuzhiyun STAT_ATTR(FREE_SLAB, free_slab);
5389*4882a593Smuzhiyun STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5390*4882a593Smuzhiyun STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5391*4882a593Smuzhiyun STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5392*4882a593Smuzhiyun STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5393*4882a593Smuzhiyun STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5394*4882a593Smuzhiyun STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5395*4882a593Smuzhiyun STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5396*4882a593Smuzhiyun STAT_ATTR(ORDER_FALLBACK, order_fallback);
5397*4882a593Smuzhiyun STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5398*4882a593Smuzhiyun STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5399*4882a593Smuzhiyun STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5400*4882a593Smuzhiyun STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5401*4882a593Smuzhiyun STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5402*4882a593Smuzhiyun STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5403*4882a593Smuzhiyun #endif /* CONFIG_SLUB_STATS */
5404*4882a593Smuzhiyun
5405*4882a593Smuzhiyun static struct attribute *slab_attrs[] = {
5406*4882a593Smuzhiyun &slab_size_attr.attr,
5407*4882a593Smuzhiyun &object_size_attr.attr,
5408*4882a593Smuzhiyun &objs_per_slab_attr.attr,
5409*4882a593Smuzhiyun &order_attr.attr,
5410*4882a593Smuzhiyun &min_partial_attr.attr,
5411*4882a593Smuzhiyun &cpu_partial_attr.attr,
5412*4882a593Smuzhiyun &objects_attr.attr,
5413*4882a593Smuzhiyun &objects_partial_attr.attr,
5414*4882a593Smuzhiyun &partial_attr.attr,
5415*4882a593Smuzhiyun &cpu_slabs_attr.attr,
5416*4882a593Smuzhiyun &ctor_attr.attr,
5417*4882a593Smuzhiyun &aliases_attr.attr,
5418*4882a593Smuzhiyun &align_attr.attr,
5419*4882a593Smuzhiyun &hwcache_align_attr.attr,
5420*4882a593Smuzhiyun &reclaim_account_attr.attr,
5421*4882a593Smuzhiyun &destroy_by_rcu_attr.attr,
5422*4882a593Smuzhiyun &shrink_attr.attr,
5423*4882a593Smuzhiyun &slabs_cpu_partial_attr.attr,
5424*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
5425*4882a593Smuzhiyun &total_objects_attr.attr,
5426*4882a593Smuzhiyun &slabs_attr.attr,
5427*4882a593Smuzhiyun &sanity_checks_attr.attr,
5428*4882a593Smuzhiyun &trace_attr.attr,
5429*4882a593Smuzhiyun &red_zone_attr.attr,
5430*4882a593Smuzhiyun &poison_attr.attr,
5431*4882a593Smuzhiyun &store_user_attr.attr,
5432*4882a593Smuzhiyun &validate_attr.attr,
5433*4882a593Smuzhiyun #endif
5434*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA
5435*4882a593Smuzhiyun &cache_dma_attr.attr,
5436*4882a593Smuzhiyun #endif
5437*4882a593Smuzhiyun #ifdef CONFIG_NUMA
5438*4882a593Smuzhiyun &remote_node_defrag_ratio_attr.attr,
5439*4882a593Smuzhiyun #endif
5440*4882a593Smuzhiyun #ifdef CONFIG_SLUB_STATS
5441*4882a593Smuzhiyun &alloc_fastpath_attr.attr,
5442*4882a593Smuzhiyun &alloc_slowpath_attr.attr,
5443*4882a593Smuzhiyun &free_fastpath_attr.attr,
5444*4882a593Smuzhiyun &free_slowpath_attr.attr,
5445*4882a593Smuzhiyun &free_frozen_attr.attr,
5446*4882a593Smuzhiyun &free_add_partial_attr.attr,
5447*4882a593Smuzhiyun &free_remove_partial_attr.attr,
5448*4882a593Smuzhiyun &alloc_from_partial_attr.attr,
5449*4882a593Smuzhiyun &alloc_slab_attr.attr,
5450*4882a593Smuzhiyun &alloc_refill_attr.attr,
5451*4882a593Smuzhiyun &alloc_node_mismatch_attr.attr,
5452*4882a593Smuzhiyun &free_slab_attr.attr,
5453*4882a593Smuzhiyun &cpuslab_flush_attr.attr,
5454*4882a593Smuzhiyun &deactivate_full_attr.attr,
5455*4882a593Smuzhiyun &deactivate_empty_attr.attr,
5456*4882a593Smuzhiyun &deactivate_to_head_attr.attr,
5457*4882a593Smuzhiyun &deactivate_to_tail_attr.attr,
5458*4882a593Smuzhiyun &deactivate_remote_frees_attr.attr,
5459*4882a593Smuzhiyun &deactivate_bypass_attr.attr,
5460*4882a593Smuzhiyun &order_fallback_attr.attr,
5461*4882a593Smuzhiyun &cmpxchg_double_fail_attr.attr,
5462*4882a593Smuzhiyun &cmpxchg_double_cpu_fail_attr.attr,
5463*4882a593Smuzhiyun &cpu_partial_alloc_attr.attr,
5464*4882a593Smuzhiyun &cpu_partial_free_attr.attr,
5465*4882a593Smuzhiyun &cpu_partial_node_attr.attr,
5466*4882a593Smuzhiyun &cpu_partial_drain_attr.attr,
5467*4882a593Smuzhiyun #endif
5468*4882a593Smuzhiyun #ifdef CONFIG_FAILSLAB
5469*4882a593Smuzhiyun &failslab_attr.attr,
5470*4882a593Smuzhiyun #endif
5471*4882a593Smuzhiyun &usersize_attr.attr,
5472*4882a593Smuzhiyun
5473*4882a593Smuzhiyun NULL
5474*4882a593Smuzhiyun };
5475*4882a593Smuzhiyun
5476*4882a593Smuzhiyun static const struct attribute_group slab_attr_group = {
5477*4882a593Smuzhiyun .attrs = slab_attrs,
5478*4882a593Smuzhiyun };
5479*4882a593Smuzhiyun
slab_attr_show(struct kobject * kobj,struct attribute * attr,char * buf)5480*4882a593Smuzhiyun static ssize_t slab_attr_show(struct kobject *kobj,
5481*4882a593Smuzhiyun struct attribute *attr,
5482*4882a593Smuzhiyun char *buf)
5483*4882a593Smuzhiyun {
5484*4882a593Smuzhiyun struct slab_attribute *attribute;
5485*4882a593Smuzhiyun struct kmem_cache *s;
5486*4882a593Smuzhiyun int err;
5487*4882a593Smuzhiyun
5488*4882a593Smuzhiyun attribute = to_slab_attr(attr);
5489*4882a593Smuzhiyun s = to_slab(kobj);
5490*4882a593Smuzhiyun
5491*4882a593Smuzhiyun if (!attribute->show)
5492*4882a593Smuzhiyun return -EIO;
5493*4882a593Smuzhiyun
5494*4882a593Smuzhiyun err = attribute->show(s, buf);
5495*4882a593Smuzhiyun
5496*4882a593Smuzhiyun return err;
5497*4882a593Smuzhiyun }
5498*4882a593Smuzhiyun
slab_attr_store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t len)5499*4882a593Smuzhiyun static ssize_t slab_attr_store(struct kobject *kobj,
5500*4882a593Smuzhiyun struct attribute *attr,
5501*4882a593Smuzhiyun const char *buf, size_t len)
5502*4882a593Smuzhiyun {
5503*4882a593Smuzhiyun struct slab_attribute *attribute;
5504*4882a593Smuzhiyun struct kmem_cache *s;
5505*4882a593Smuzhiyun int err;
5506*4882a593Smuzhiyun
5507*4882a593Smuzhiyun attribute = to_slab_attr(attr);
5508*4882a593Smuzhiyun s = to_slab(kobj);
5509*4882a593Smuzhiyun
5510*4882a593Smuzhiyun if (!attribute->store)
5511*4882a593Smuzhiyun return -EIO;
5512*4882a593Smuzhiyun
5513*4882a593Smuzhiyun err = attribute->store(s, buf, len);
5514*4882a593Smuzhiyun return err;
5515*4882a593Smuzhiyun }
5516*4882a593Smuzhiyun
kmem_cache_release(struct kobject * k)5517*4882a593Smuzhiyun static void kmem_cache_release(struct kobject *k)
5518*4882a593Smuzhiyun {
5519*4882a593Smuzhiyun slab_kmem_cache_release(to_slab(k));
5520*4882a593Smuzhiyun }
5521*4882a593Smuzhiyun
5522*4882a593Smuzhiyun static const struct sysfs_ops slab_sysfs_ops = {
5523*4882a593Smuzhiyun .show = slab_attr_show,
5524*4882a593Smuzhiyun .store = slab_attr_store,
5525*4882a593Smuzhiyun };
5526*4882a593Smuzhiyun
5527*4882a593Smuzhiyun static struct kobj_type slab_ktype = {
5528*4882a593Smuzhiyun .sysfs_ops = &slab_sysfs_ops,
5529*4882a593Smuzhiyun .release = kmem_cache_release,
5530*4882a593Smuzhiyun };
5531*4882a593Smuzhiyun
5532*4882a593Smuzhiyun static struct kset *slab_kset;
5533*4882a593Smuzhiyun
cache_kset(struct kmem_cache * s)5534*4882a593Smuzhiyun static inline struct kset *cache_kset(struct kmem_cache *s)
5535*4882a593Smuzhiyun {
5536*4882a593Smuzhiyun return slab_kset;
5537*4882a593Smuzhiyun }
5538*4882a593Smuzhiyun
5539*4882a593Smuzhiyun #define ID_STR_LENGTH 64
5540*4882a593Smuzhiyun
5541*4882a593Smuzhiyun /* Create a unique string id for a slab cache:
5542*4882a593Smuzhiyun *
5543*4882a593Smuzhiyun * Format :[flags-]size
5544*4882a593Smuzhiyun */
create_unique_id(struct kmem_cache * s)5545*4882a593Smuzhiyun static char *create_unique_id(struct kmem_cache *s)
5546*4882a593Smuzhiyun {
5547*4882a593Smuzhiyun char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5548*4882a593Smuzhiyun char *p = name;
5549*4882a593Smuzhiyun
5550*4882a593Smuzhiyun if (!name)
5551*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
5552*4882a593Smuzhiyun
5553*4882a593Smuzhiyun *p++ = ':';
5554*4882a593Smuzhiyun /*
5555*4882a593Smuzhiyun * First flags affecting slabcache operations. We will only
5556*4882a593Smuzhiyun * get here for aliasable slabs so we do not need to support
5557*4882a593Smuzhiyun * too many flags. The flags here must cover all flags that
5558*4882a593Smuzhiyun * are matched during merging to guarantee that the id is
5559*4882a593Smuzhiyun * unique.
5560*4882a593Smuzhiyun */
5561*4882a593Smuzhiyun if (s->flags & SLAB_CACHE_DMA)
5562*4882a593Smuzhiyun *p++ = 'd';
5563*4882a593Smuzhiyun if (s->flags & SLAB_CACHE_DMA32)
5564*4882a593Smuzhiyun *p++ = 'D';
5565*4882a593Smuzhiyun if (s->flags & SLAB_RECLAIM_ACCOUNT)
5566*4882a593Smuzhiyun *p++ = 'a';
5567*4882a593Smuzhiyun if (s->flags & SLAB_CONSISTENCY_CHECKS)
5568*4882a593Smuzhiyun *p++ = 'F';
5569*4882a593Smuzhiyun if (s->flags & SLAB_ACCOUNT)
5570*4882a593Smuzhiyun *p++ = 'A';
5571*4882a593Smuzhiyun if (p != name + 1)
5572*4882a593Smuzhiyun *p++ = '-';
5573*4882a593Smuzhiyun p += sprintf(p, "%07u", s->size);
5574*4882a593Smuzhiyun
5575*4882a593Smuzhiyun BUG_ON(p > name + ID_STR_LENGTH - 1);
5576*4882a593Smuzhiyun return name;
5577*4882a593Smuzhiyun }
5578*4882a593Smuzhiyun
sysfs_slab_add(struct kmem_cache * s)5579*4882a593Smuzhiyun static int sysfs_slab_add(struct kmem_cache *s)
5580*4882a593Smuzhiyun {
5581*4882a593Smuzhiyun int err;
5582*4882a593Smuzhiyun const char *name;
5583*4882a593Smuzhiyun struct kset *kset = cache_kset(s);
5584*4882a593Smuzhiyun int unmergeable = slab_unmergeable(s);
5585*4882a593Smuzhiyun
5586*4882a593Smuzhiyun if (!kset) {
5587*4882a593Smuzhiyun kobject_init(&s->kobj, &slab_ktype);
5588*4882a593Smuzhiyun return 0;
5589*4882a593Smuzhiyun }
5590*4882a593Smuzhiyun
5591*4882a593Smuzhiyun if (!unmergeable && disable_higher_order_debug &&
5592*4882a593Smuzhiyun (slub_debug & DEBUG_METADATA_FLAGS))
5593*4882a593Smuzhiyun unmergeable = 1;
5594*4882a593Smuzhiyun
5595*4882a593Smuzhiyun if (unmergeable) {
5596*4882a593Smuzhiyun /*
5597*4882a593Smuzhiyun * Slabcache can never be merged so we can use the name proper.
5598*4882a593Smuzhiyun * This is typically the case for debug situations. In that
5599*4882a593Smuzhiyun * case we can catch duplicate names easily.
5600*4882a593Smuzhiyun */
5601*4882a593Smuzhiyun sysfs_remove_link(&slab_kset->kobj, s->name);
5602*4882a593Smuzhiyun name = s->name;
5603*4882a593Smuzhiyun } else {
5604*4882a593Smuzhiyun /*
5605*4882a593Smuzhiyun * Create a unique name for the slab as a target
5606*4882a593Smuzhiyun * for the symlinks.
5607*4882a593Smuzhiyun */
5608*4882a593Smuzhiyun name = create_unique_id(s);
5609*4882a593Smuzhiyun if (IS_ERR(name))
5610*4882a593Smuzhiyun return PTR_ERR(name);
5611*4882a593Smuzhiyun }
5612*4882a593Smuzhiyun
5613*4882a593Smuzhiyun s->kobj.kset = kset;
5614*4882a593Smuzhiyun err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5615*4882a593Smuzhiyun if (err)
5616*4882a593Smuzhiyun goto out;
5617*4882a593Smuzhiyun
5618*4882a593Smuzhiyun err = sysfs_create_group(&s->kobj, &slab_attr_group);
5619*4882a593Smuzhiyun if (err)
5620*4882a593Smuzhiyun goto out_del_kobj;
5621*4882a593Smuzhiyun
5622*4882a593Smuzhiyun if (!unmergeable) {
5623*4882a593Smuzhiyun /* Setup first alias */
5624*4882a593Smuzhiyun sysfs_slab_alias(s, s->name);
5625*4882a593Smuzhiyun }
5626*4882a593Smuzhiyun out:
5627*4882a593Smuzhiyun if (!unmergeable)
5628*4882a593Smuzhiyun kfree(name);
5629*4882a593Smuzhiyun return err;
5630*4882a593Smuzhiyun out_del_kobj:
5631*4882a593Smuzhiyun kobject_del(&s->kobj);
5632*4882a593Smuzhiyun goto out;
5633*4882a593Smuzhiyun }
5634*4882a593Smuzhiyun
sysfs_slab_unlink(struct kmem_cache * s)5635*4882a593Smuzhiyun void sysfs_slab_unlink(struct kmem_cache *s)
5636*4882a593Smuzhiyun {
5637*4882a593Smuzhiyun if (slab_state >= FULL)
5638*4882a593Smuzhiyun kobject_del(&s->kobj);
5639*4882a593Smuzhiyun }
5640*4882a593Smuzhiyun
sysfs_slab_release(struct kmem_cache * s)5641*4882a593Smuzhiyun void sysfs_slab_release(struct kmem_cache *s)
5642*4882a593Smuzhiyun {
5643*4882a593Smuzhiyun if (slab_state >= FULL)
5644*4882a593Smuzhiyun kobject_put(&s->kobj);
5645*4882a593Smuzhiyun }
5646*4882a593Smuzhiyun
5647*4882a593Smuzhiyun /*
5648*4882a593Smuzhiyun * Need to buffer aliases during bootup until sysfs becomes
5649*4882a593Smuzhiyun * available lest we lose that information.
5650*4882a593Smuzhiyun */
5651*4882a593Smuzhiyun struct saved_alias {
5652*4882a593Smuzhiyun struct kmem_cache *s;
5653*4882a593Smuzhiyun const char *name;
5654*4882a593Smuzhiyun struct saved_alias *next;
5655*4882a593Smuzhiyun };
5656*4882a593Smuzhiyun
5657*4882a593Smuzhiyun static struct saved_alias *alias_list;
5658*4882a593Smuzhiyun
sysfs_slab_alias(struct kmem_cache * s,const char * name)5659*4882a593Smuzhiyun static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5660*4882a593Smuzhiyun {
5661*4882a593Smuzhiyun struct saved_alias *al;
5662*4882a593Smuzhiyun
5663*4882a593Smuzhiyun if (slab_state == FULL) {
5664*4882a593Smuzhiyun /*
5665*4882a593Smuzhiyun * If we have a leftover link then remove it.
5666*4882a593Smuzhiyun */
5667*4882a593Smuzhiyun sysfs_remove_link(&slab_kset->kobj, name);
5668*4882a593Smuzhiyun return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5669*4882a593Smuzhiyun }
5670*4882a593Smuzhiyun
5671*4882a593Smuzhiyun al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5672*4882a593Smuzhiyun if (!al)
5673*4882a593Smuzhiyun return -ENOMEM;
5674*4882a593Smuzhiyun
5675*4882a593Smuzhiyun al->s = s;
5676*4882a593Smuzhiyun al->name = name;
5677*4882a593Smuzhiyun al->next = alias_list;
5678*4882a593Smuzhiyun alias_list = al;
5679*4882a593Smuzhiyun return 0;
5680*4882a593Smuzhiyun }
5681*4882a593Smuzhiyun
slab_sysfs_init(void)5682*4882a593Smuzhiyun static int __init slab_sysfs_init(void)
5683*4882a593Smuzhiyun {
5684*4882a593Smuzhiyun struct kmem_cache *s;
5685*4882a593Smuzhiyun int err;
5686*4882a593Smuzhiyun
5687*4882a593Smuzhiyun mutex_lock(&slab_mutex);
5688*4882a593Smuzhiyun
5689*4882a593Smuzhiyun slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
5690*4882a593Smuzhiyun if (!slab_kset) {
5691*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
5692*4882a593Smuzhiyun pr_err("Cannot register slab subsystem.\n");
5693*4882a593Smuzhiyun return -ENOSYS;
5694*4882a593Smuzhiyun }
5695*4882a593Smuzhiyun
5696*4882a593Smuzhiyun slab_state = FULL;
5697*4882a593Smuzhiyun
5698*4882a593Smuzhiyun list_for_each_entry(s, &slab_caches, list) {
5699*4882a593Smuzhiyun err = sysfs_slab_add(s);
5700*4882a593Smuzhiyun if (err)
5701*4882a593Smuzhiyun pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5702*4882a593Smuzhiyun s->name);
5703*4882a593Smuzhiyun }
5704*4882a593Smuzhiyun
5705*4882a593Smuzhiyun while (alias_list) {
5706*4882a593Smuzhiyun struct saved_alias *al = alias_list;
5707*4882a593Smuzhiyun
5708*4882a593Smuzhiyun alias_list = alias_list->next;
5709*4882a593Smuzhiyun err = sysfs_slab_alias(al->s, al->name);
5710*4882a593Smuzhiyun if (err)
5711*4882a593Smuzhiyun pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5712*4882a593Smuzhiyun al->name);
5713*4882a593Smuzhiyun kfree(al);
5714*4882a593Smuzhiyun }
5715*4882a593Smuzhiyun
5716*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
5717*4882a593Smuzhiyun resiliency_test();
5718*4882a593Smuzhiyun return 0;
5719*4882a593Smuzhiyun }
5720*4882a593Smuzhiyun
5721*4882a593Smuzhiyun __initcall(slab_sysfs_init);
5722*4882a593Smuzhiyun #endif /* CONFIG_SLUB_SYSFS */
5723*4882a593Smuzhiyun
5724*4882a593Smuzhiyun #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
slab_debugfs_show(struct seq_file * seq,void * v)5725*4882a593Smuzhiyun static int slab_debugfs_show(struct seq_file *seq, void *v)
5726*4882a593Smuzhiyun {
5727*4882a593Smuzhiyun struct loc_track *t = seq->private;
5728*4882a593Smuzhiyun struct location *l;
5729*4882a593Smuzhiyun unsigned long idx;
5730*4882a593Smuzhiyun
5731*4882a593Smuzhiyun idx = (unsigned long) t->idx;
5732*4882a593Smuzhiyun if (idx < t->count) {
5733*4882a593Smuzhiyun l = &t->loc[idx];
5734*4882a593Smuzhiyun
5735*4882a593Smuzhiyun seq_printf(seq, "%7ld ", l->count);
5736*4882a593Smuzhiyun
5737*4882a593Smuzhiyun if (l->addr)
5738*4882a593Smuzhiyun seq_printf(seq, "%pS", (void *)l->addr);
5739*4882a593Smuzhiyun else
5740*4882a593Smuzhiyun seq_puts(seq, "<not-available>");
5741*4882a593Smuzhiyun
5742*4882a593Smuzhiyun if (l->sum_time != l->min_time) {
5743*4882a593Smuzhiyun seq_printf(seq, " age=%ld/%llu/%ld",
5744*4882a593Smuzhiyun l->min_time, div_u64(l->sum_time, l->count),
5745*4882a593Smuzhiyun l->max_time);
5746*4882a593Smuzhiyun } else
5747*4882a593Smuzhiyun seq_printf(seq, " age=%ld", l->min_time);
5748*4882a593Smuzhiyun
5749*4882a593Smuzhiyun if (l->min_pid != l->max_pid)
5750*4882a593Smuzhiyun seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid);
5751*4882a593Smuzhiyun else
5752*4882a593Smuzhiyun seq_printf(seq, " pid=%ld",
5753*4882a593Smuzhiyun l->min_pid);
5754*4882a593Smuzhiyun
5755*4882a593Smuzhiyun if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus)))
5756*4882a593Smuzhiyun seq_printf(seq, " cpus=%*pbl",
5757*4882a593Smuzhiyun cpumask_pr_args(to_cpumask(l->cpus)));
5758*4882a593Smuzhiyun
5759*4882a593Smuzhiyun if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
5760*4882a593Smuzhiyun seq_printf(seq, " nodes=%*pbl",
5761*4882a593Smuzhiyun nodemask_pr_args(&l->nodes));
5762*4882a593Smuzhiyun
5763*4882a593Smuzhiyun seq_puts(seq, "\n");
5764*4882a593Smuzhiyun }
5765*4882a593Smuzhiyun
5766*4882a593Smuzhiyun if (!idx && !t->count)
5767*4882a593Smuzhiyun seq_puts(seq, "No data\n");
5768*4882a593Smuzhiyun
5769*4882a593Smuzhiyun return 0;
5770*4882a593Smuzhiyun }
5771*4882a593Smuzhiyun
slab_debugfs_stop(struct seq_file * seq,void * v)5772*4882a593Smuzhiyun static void slab_debugfs_stop(struct seq_file *seq, void *v)
5773*4882a593Smuzhiyun {
5774*4882a593Smuzhiyun }
5775*4882a593Smuzhiyun
slab_debugfs_next(struct seq_file * seq,void * v,loff_t * ppos)5776*4882a593Smuzhiyun static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
5777*4882a593Smuzhiyun {
5778*4882a593Smuzhiyun struct loc_track *t = seq->private;
5779*4882a593Smuzhiyun
5780*4882a593Smuzhiyun t->idx = ++(*ppos);
5781*4882a593Smuzhiyun if (*ppos <= t->count)
5782*4882a593Smuzhiyun return ppos;
5783*4882a593Smuzhiyun
5784*4882a593Smuzhiyun return NULL;
5785*4882a593Smuzhiyun }
5786*4882a593Smuzhiyun
slab_debugfs_start(struct seq_file * seq,loff_t * ppos)5787*4882a593Smuzhiyun static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
5788*4882a593Smuzhiyun {
5789*4882a593Smuzhiyun struct loc_track *t = seq->private;
5790*4882a593Smuzhiyun
5791*4882a593Smuzhiyun t->idx = *ppos;
5792*4882a593Smuzhiyun return ppos;
5793*4882a593Smuzhiyun }
5794*4882a593Smuzhiyun
5795*4882a593Smuzhiyun static const struct seq_operations slab_debugfs_sops = {
5796*4882a593Smuzhiyun .start = slab_debugfs_start,
5797*4882a593Smuzhiyun .next = slab_debugfs_next,
5798*4882a593Smuzhiyun .stop = slab_debugfs_stop,
5799*4882a593Smuzhiyun .show = slab_debugfs_show,
5800*4882a593Smuzhiyun };
5801*4882a593Smuzhiyun
slab_debug_trace_open(struct inode * inode,struct file * filep)5802*4882a593Smuzhiyun static int slab_debug_trace_open(struct inode *inode, struct file *filep)
5803*4882a593Smuzhiyun {
5804*4882a593Smuzhiyun
5805*4882a593Smuzhiyun struct kmem_cache_node *n;
5806*4882a593Smuzhiyun enum track_item alloc;
5807*4882a593Smuzhiyun int node;
5808*4882a593Smuzhiyun struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
5809*4882a593Smuzhiyun sizeof(struct loc_track));
5810*4882a593Smuzhiyun struct kmem_cache *s = file_inode(filep)->i_private;
5811*4882a593Smuzhiyun unsigned long *obj_map;
5812*4882a593Smuzhiyun
5813*4882a593Smuzhiyun if (!t)
5814*4882a593Smuzhiyun return -ENOMEM;
5815*4882a593Smuzhiyun
5816*4882a593Smuzhiyun obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
5817*4882a593Smuzhiyun if (!obj_map) {
5818*4882a593Smuzhiyun seq_release_private(inode, filep);
5819*4882a593Smuzhiyun return -ENOMEM;
5820*4882a593Smuzhiyun }
5821*4882a593Smuzhiyun
5822*4882a593Smuzhiyun if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
5823*4882a593Smuzhiyun alloc = TRACK_ALLOC;
5824*4882a593Smuzhiyun else
5825*4882a593Smuzhiyun alloc = TRACK_FREE;
5826*4882a593Smuzhiyun
5827*4882a593Smuzhiyun if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
5828*4882a593Smuzhiyun bitmap_free(obj_map);
5829*4882a593Smuzhiyun seq_release_private(inode, filep);
5830*4882a593Smuzhiyun return -ENOMEM;
5831*4882a593Smuzhiyun }
5832*4882a593Smuzhiyun
5833*4882a593Smuzhiyun /* Push back cpu slabs */
5834*4882a593Smuzhiyun flush_all(s);
5835*4882a593Smuzhiyun
5836*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
5837*4882a593Smuzhiyun unsigned long flags;
5838*4882a593Smuzhiyun struct page *page;
5839*4882a593Smuzhiyun
5840*4882a593Smuzhiyun if (!atomic_long_read(&n->nr_slabs))
5841*4882a593Smuzhiyun continue;
5842*4882a593Smuzhiyun
5843*4882a593Smuzhiyun spin_lock_irqsave(&n->list_lock, flags);
5844*4882a593Smuzhiyun list_for_each_entry(page, &n->partial, slab_list)
5845*4882a593Smuzhiyun process_slab(t, s, page, alloc, obj_map);
5846*4882a593Smuzhiyun list_for_each_entry(page, &n->full, slab_list)
5847*4882a593Smuzhiyun process_slab(t, s, page, alloc, obj_map);
5848*4882a593Smuzhiyun spin_unlock_irqrestore(&n->list_lock, flags);
5849*4882a593Smuzhiyun }
5850*4882a593Smuzhiyun
5851*4882a593Smuzhiyun bitmap_free(obj_map);
5852*4882a593Smuzhiyun return 0;
5853*4882a593Smuzhiyun }
5854*4882a593Smuzhiyun
slab_debug_trace_release(struct inode * inode,struct file * file)5855*4882a593Smuzhiyun static int slab_debug_trace_release(struct inode *inode, struct file *file)
5856*4882a593Smuzhiyun {
5857*4882a593Smuzhiyun struct seq_file *seq = file->private_data;
5858*4882a593Smuzhiyun struct loc_track *t = seq->private;
5859*4882a593Smuzhiyun
5860*4882a593Smuzhiyun free_loc_track(t);
5861*4882a593Smuzhiyun return seq_release_private(inode, file);
5862*4882a593Smuzhiyun }
5863*4882a593Smuzhiyun
5864*4882a593Smuzhiyun static const struct file_operations slab_debugfs_fops = {
5865*4882a593Smuzhiyun .open = slab_debug_trace_open,
5866*4882a593Smuzhiyun .read = seq_read,
5867*4882a593Smuzhiyun .llseek = seq_lseek,
5868*4882a593Smuzhiyun .release = slab_debug_trace_release,
5869*4882a593Smuzhiyun };
5870*4882a593Smuzhiyun
debugfs_slab_add(struct kmem_cache * s)5871*4882a593Smuzhiyun static void debugfs_slab_add(struct kmem_cache *s)
5872*4882a593Smuzhiyun {
5873*4882a593Smuzhiyun struct dentry *slab_cache_dir;
5874*4882a593Smuzhiyun
5875*4882a593Smuzhiyun if (unlikely(!slab_debugfs_root))
5876*4882a593Smuzhiyun return;
5877*4882a593Smuzhiyun
5878*4882a593Smuzhiyun slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
5879*4882a593Smuzhiyun
5880*4882a593Smuzhiyun debugfs_create_file("alloc_traces", 0400,
5881*4882a593Smuzhiyun slab_cache_dir, s, &slab_debugfs_fops);
5882*4882a593Smuzhiyun
5883*4882a593Smuzhiyun debugfs_create_file("free_traces", 0400,
5884*4882a593Smuzhiyun slab_cache_dir, s, &slab_debugfs_fops);
5885*4882a593Smuzhiyun }
5886*4882a593Smuzhiyun
debugfs_slab_release(struct kmem_cache * s)5887*4882a593Smuzhiyun void debugfs_slab_release(struct kmem_cache *s)
5888*4882a593Smuzhiyun {
5889*4882a593Smuzhiyun debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root));
5890*4882a593Smuzhiyun }
5891*4882a593Smuzhiyun
slab_debugfs_init(void)5892*4882a593Smuzhiyun static int __init slab_debugfs_init(void)
5893*4882a593Smuzhiyun {
5894*4882a593Smuzhiyun struct kmem_cache *s;
5895*4882a593Smuzhiyun
5896*4882a593Smuzhiyun slab_debugfs_root = debugfs_create_dir("slab", NULL);
5897*4882a593Smuzhiyun
5898*4882a593Smuzhiyun list_for_each_entry(s, &slab_caches, list)
5899*4882a593Smuzhiyun if (s->flags & SLAB_STORE_USER)
5900*4882a593Smuzhiyun debugfs_slab_add(s);
5901*4882a593Smuzhiyun
5902*4882a593Smuzhiyun return 0;
5903*4882a593Smuzhiyun
5904*4882a593Smuzhiyun }
5905*4882a593Smuzhiyun __initcall(slab_debugfs_init);
5906*4882a593Smuzhiyun #endif
5907*4882a593Smuzhiyun /*
5908*4882a593Smuzhiyun * The /proc/slabinfo ABI
5909*4882a593Smuzhiyun */
5910*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
get_slabinfo(struct kmem_cache * s,struct slabinfo * sinfo)5911*4882a593Smuzhiyun void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5912*4882a593Smuzhiyun {
5913*4882a593Smuzhiyun unsigned long nr_slabs = 0;
5914*4882a593Smuzhiyun unsigned long nr_objs = 0;
5915*4882a593Smuzhiyun unsigned long nr_free = 0;
5916*4882a593Smuzhiyun int node;
5917*4882a593Smuzhiyun struct kmem_cache_node *n;
5918*4882a593Smuzhiyun
5919*4882a593Smuzhiyun for_each_kmem_cache_node(s, node, n) {
5920*4882a593Smuzhiyun nr_slabs += node_nr_slabs(n);
5921*4882a593Smuzhiyun nr_objs += node_nr_objs(n);
5922*4882a593Smuzhiyun nr_free += count_partial(n, count_free);
5923*4882a593Smuzhiyun }
5924*4882a593Smuzhiyun
5925*4882a593Smuzhiyun sinfo->active_objs = nr_objs - nr_free;
5926*4882a593Smuzhiyun sinfo->num_objs = nr_objs;
5927*4882a593Smuzhiyun sinfo->active_slabs = nr_slabs;
5928*4882a593Smuzhiyun sinfo->num_slabs = nr_slabs;
5929*4882a593Smuzhiyun sinfo->objects_per_slab = oo_objects(s->oo);
5930*4882a593Smuzhiyun sinfo->cache_order = oo_order(s->oo);
5931*4882a593Smuzhiyun }
5932*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(get_slabinfo);
5933*4882a593Smuzhiyun
slabinfo_show_stats(struct seq_file * m,struct kmem_cache * s)5934*4882a593Smuzhiyun void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5935*4882a593Smuzhiyun {
5936*4882a593Smuzhiyun }
5937*4882a593Smuzhiyun
slabinfo_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)5938*4882a593Smuzhiyun ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5939*4882a593Smuzhiyun size_t count, loff_t *ppos)
5940*4882a593Smuzhiyun {
5941*4882a593Smuzhiyun return -EIO;
5942*4882a593Smuzhiyun }
5943*4882a593Smuzhiyun #endif /* CONFIG_SLUB_DEBUG */
5944