1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Slab allocator functions that are independent of the allocator strategy
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * (C) 2012 Christoph Lameter <cl@linux.com>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun #include <linux/slab.h>
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include <linux/mm.h>
10*4882a593Smuzhiyun #include <linux/poison.h>
11*4882a593Smuzhiyun #include <linux/interrupt.h>
12*4882a593Smuzhiyun #include <linux/memory.h>
13*4882a593Smuzhiyun #include <linux/cache.h>
14*4882a593Smuzhiyun #include <linux/compiler.h>
15*4882a593Smuzhiyun #include <linux/kfence.h>
16*4882a593Smuzhiyun #include <linux/module.h>
17*4882a593Smuzhiyun #include <linux/cpu.h>
18*4882a593Smuzhiyun #include <linux/uaccess.h>
19*4882a593Smuzhiyun #include <linux/seq_file.h>
20*4882a593Smuzhiyun #include <linux/proc_fs.h>
21*4882a593Smuzhiyun #include <linux/debugfs.h>
22*4882a593Smuzhiyun #include <linux/kasan.h>
23*4882a593Smuzhiyun #include <asm/cacheflush.h>
24*4882a593Smuzhiyun #include <asm/tlbflush.h>
25*4882a593Smuzhiyun #include <asm/page.h>
26*4882a593Smuzhiyun #include <linux/memcontrol.h>
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
29*4882a593Smuzhiyun #include <trace/events/kmem.h>
30*4882a593Smuzhiyun #undef CREATE_TRACE_POINTS
31*4882a593Smuzhiyun #include <trace/hooks/mm.h>
32*4882a593Smuzhiyun #include "internal.h"
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun #include "slab.h"
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun enum slab_state slab_state;
37*4882a593Smuzhiyun LIST_HEAD(slab_caches);
38*4882a593Smuzhiyun DEFINE_MUTEX(slab_mutex);
39*4882a593Smuzhiyun struct kmem_cache *kmem_cache;
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun #ifdef CONFIG_HARDENED_USERCOPY
42*4882a593Smuzhiyun bool usercopy_fallback __ro_after_init =
43*4882a593Smuzhiyun IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
44*4882a593Smuzhiyun module_param(usercopy_fallback, bool, 0400);
45*4882a593Smuzhiyun MODULE_PARM_DESC(usercopy_fallback,
46*4882a593Smuzhiyun "WARN instead of reject usercopy whitelist violations");
47*4882a593Smuzhiyun #endif
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun static LIST_HEAD(slab_caches_to_rcu_destroy);
50*4882a593Smuzhiyun static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
51*4882a593Smuzhiyun static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
52*4882a593Smuzhiyun slab_caches_to_rcu_destroy_workfn);
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun /*
55*4882a593Smuzhiyun * Set of flags that will prevent slab merging
56*4882a593Smuzhiyun */
57*4882a593Smuzhiyun #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
58*4882a593Smuzhiyun SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
59*4882a593Smuzhiyun SLAB_FAILSLAB | kasan_never_merge())
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
62*4882a593Smuzhiyun SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun /*
65*4882a593Smuzhiyun * Merge control. If this is set then no merging of slab caches will occur.
66*4882a593Smuzhiyun */
67*4882a593Smuzhiyun static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
68*4882a593Smuzhiyun
setup_slab_nomerge(char * str)69*4882a593Smuzhiyun static int __init setup_slab_nomerge(char *str)
70*4882a593Smuzhiyun {
71*4882a593Smuzhiyun slab_nomerge = true;
72*4882a593Smuzhiyun return 1;
73*4882a593Smuzhiyun }
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun #ifdef CONFIG_SLUB
76*4882a593Smuzhiyun __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
77*4882a593Smuzhiyun #endif
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun __setup("slab_nomerge", setup_slab_nomerge);
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun /*
82*4882a593Smuzhiyun * Determine the size of a slab object
83*4882a593Smuzhiyun */
kmem_cache_size(struct kmem_cache * s)84*4882a593Smuzhiyun unsigned int kmem_cache_size(struct kmem_cache *s)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun return s->object_size;
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_size);
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_VM
kmem_cache_sanity_check(const char * name,unsigned int size)91*4882a593Smuzhiyun static int kmem_cache_sanity_check(const char *name, unsigned int size)
92*4882a593Smuzhiyun {
93*4882a593Smuzhiyun if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
94*4882a593Smuzhiyun pr_err("kmem_cache_create(%s) integrity check failed\n", name);
95*4882a593Smuzhiyun return -EINVAL;
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun WARN_ON(strchr(name, ' ')); /* It confuses parsers */
99*4882a593Smuzhiyun return 0;
100*4882a593Smuzhiyun }
101*4882a593Smuzhiyun #else
kmem_cache_sanity_check(const char * name,unsigned int size)102*4882a593Smuzhiyun static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
103*4882a593Smuzhiyun {
104*4882a593Smuzhiyun return 0;
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun #endif
107*4882a593Smuzhiyun
__kmem_cache_free_bulk(struct kmem_cache * s,size_t nr,void ** p)108*4882a593Smuzhiyun void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun size_t i;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun for (i = 0; i < nr; i++) {
113*4882a593Smuzhiyun if (s)
114*4882a593Smuzhiyun kmem_cache_free(s, p[i]);
115*4882a593Smuzhiyun else
116*4882a593Smuzhiyun kfree(p[i]);
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun
__kmem_cache_alloc_bulk(struct kmem_cache * s,gfp_t flags,size_t nr,void ** p)120*4882a593Smuzhiyun int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
121*4882a593Smuzhiyun void **p)
122*4882a593Smuzhiyun {
123*4882a593Smuzhiyun size_t i;
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun for (i = 0; i < nr; i++) {
126*4882a593Smuzhiyun void *x = p[i] = kmem_cache_alloc(s, flags);
127*4882a593Smuzhiyun if (!x) {
128*4882a593Smuzhiyun __kmem_cache_free_bulk(s, i, p);
129*4882a593Smuzhiyun return 0;
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun return i;
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun /*
136*4882a593Smuzhiyun * Figure out what the alignment of the objects will be given a set of
137*4882a593Smuzhiyun * flags, a user specified alignment and the size of the objects.
138*4882a593Smuzhiyun */
calculate_alignment(slab_flags_t flags,unsigned int align,unsigned int size)139*4882a593Smuzhiyun static unsigned int calculate_alignment(slab_flags_t flags,
140*4882a593Smuzhiyun unsigned int align, unsigned int size)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun /*
143*4882a593Smuzhiyun * If the user wants hardware cache aligned objects then follow that
144*4882a593Smuzhiyun * suggestion if the object is sufficiently large.
145*4882a593Smuzhiyun *
146*4882a593Smuzhiyun * The hardware cache alignment cannot override the specified
147*4882a593Smuzhiyun * alignment though. If that is greater then use it.
148*4882a593Smuzhiyun */
149*4882a593Smuzhiyun if (flags & SLAB_HWCACHE_ALIGN) {
150*4882a593Smuzhiyun unsigned int ralign;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun ralign = cache_line_size();
153*4882a593Smuzhiyun while (size <= ralign / 2)
154*4882a593Smuzhiyun ralign /= 2;
155*4882a593Smuzhiyun align = max(align, ralign);
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun align = max(align, arch_slab_minalign());
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun return ALIGN(align, sizeof(void *));
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun /*
164*4882a593Smuzhiyun * Find a mergeable slab cache
165*4882a593Smuzhiyun */
slab_unmergeable(struct kmem_cache * s)166*4882a593Smuzhiyun int slab_unmergeable(struct kmem_cache *s)
167*4882a593Smuzhiyun {
168*4882a593Smuzhiyun if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
169*4882a593Smuzhiyun return 1;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun if (s->ctor)
172*4882a593Smuzhiyun return 1;
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun if (s->usersize)
175*4882a593Smuzhiyun return 1;
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun /*
178*4882a593Smuzhiyun * We may have set a slab to be unmergeable during bootstrap.
179*4882a593Smuzhiyun */
180*4882a593Smuzhiyun if (s->refcount < 0)
181*4882a593Smuzhiyun return 1;
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun return 0;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun
find_mergeable(unsigned int size,unsigned int align,slab_flags_t flags,const char * name,void (* ctor)(void *))186*4882a593Smuzhiyun struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
187*4882a593Smuzhiyun slab_flags_t flags, const char *name, void (*ctor)(void *))
188*4882a593Smuzhiyun {
189*4882a593Smuzhiyun struct kmem_cache *s;
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun if (slab_nomerge)
192*4882a593Smuzhiyun return NULL;
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun if (ctor)
195*4882a593Smuzhiyun return NULL;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun size = ALIGN(size, sizeof(void *));
198*4882a593Smuzhiyun align = calculate_alignment(flags, align, size);
199*4882a593Smuzhiyun size = ALIGN(size, align);
200*4882a593Smuzhiyun flags = kmem_cache_flags(size, flags, name);
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun if (flags & SLAB_NEVER_MERGE)
203*4882a593Smuzhiyun return NULL;
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun list_for_each_entry_reverse(s, &slab_caches, list) {
206*4882a593Smuzhiyun if (slab_unmergeable(s))
207*4882a593Smuzhiyun continue;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun if (size > s->size)
210*4882a593Smuzhiyun continue;
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
213*4882a593Smuzhiyun continue;
214*4882a593Smuzhiyun /*
215*4882a593Smuzhiyun * Check if alignment is compatible.
216*4882a593Smuzhiyun * Courtesy of Adrian Drzewiecki
217*4882a593Smuzhiyun */
218*4882a593Smuzhiyun if ((s->size & ~(align - 1)) != s->size)
219*4882a593Smuzhiyun continue;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun if (s->size - size >= sizeof(void *))
222*4882a593Smuzhiyun continue;
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_SLAB) && align &&
225*4882a593Smuzhiyun (align > s->align || s->align % align))
226*4882a593Smuzhiyun continue;
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun return s;
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun return NULL;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun
create_cache(const char * name,unsigned int object_size,unsigned int align,slab_flags_t flags,unsigned int useroffset,unsigned int usersize,void (* ctor)(void *),struct kmem_cache * root_cache)233*4882a593Smuzhiyun static struct kmem_cache *create_cache(const char *name,
234*4882a593Smuzhiyun unsigned int object_size, unsigned int align,
235*4882a593Smuzhiyun slab_flags_t flags, unsigned int useroffset,
236*4882a593Smuzhiyun unsigned int usersize, void (*ctor)(void *),
237*4882a593Smuzhiyun struct kmem_cache *root_cache)
238*4882a593Smuzhiyun {
239*4882a593Smuzhiyun struct kmem_cache *s;
240*4882a593Smuzhiyun int err;
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun if (WARN_ON(useroffset + usersize > object_size))
243*4882a593Smuzhiyun useroffset = usersize = 0;
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun err = -ENOMEM;
246*4882a593Smuzhiyun s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
247*4882a593Smuzhiyun if (!s)
248*4882a593Smuzhiyun goto out;
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun s->name = name;
251*4882a593Smuzhiyun s->size = s->object_size = object_size;
252*4882a593Smuzhiyun s->align = align;
253*4882a593Smuzhiyun s->ctor = ctor;
254*4882a593Smuzhiyun s->useroffset = useroffset;
255*4882a593Smuzhiyun s->usersize = usersize;
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun err = __kmem_cache_create(s, flags);
258*4882a593Smuzhiyun if (err)
259*4882a593Smuzhiyun goto out_free_cache;
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun s->refcount = 1;
262*4882a593Smuzhiyun list_add(&s->list, &slab_caches);
263*4882a593Smuzhiyun out:
264*4882a593Smuzhiyun if (err)
265*4882a593Smuzhiyun return ERR_PTR(err);
266*4882a593Smuzhiyun return s;
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun out_free_cache:
269*4882a593Smuzhiyun kmem_cache_free(kmem_cache, s);
270*4882a593Smuzhiyun goto out;
271*4882a593Smuzhiyun }
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun /**
274*4882a593Smuzhiyun * kmem_cache_create_usercopy - Create a cache with a region suitable
275*4882a593Smuzhiyun * for copying to userspace
276*4882a593Smuzhiyun * @name: A string which is used in /proc/slabinfo to identify this cache.
277*4882a593Smuzhiyun * @size: The size of objects to be created in this cache.
278*4882a593Smuzhiyun * @align: The required alignment for the objects.
279*4882a593Smuzhiyun * @flags: SLAB flags
280*4882a593Smuzhiyun * @useroffset: Usercopy region offset
281*4882a593Smuzhiyun * @usersize: Usercopy region size
282*4882a593Smuzhiyun * @ctor: A constructor for the objects.
283*4882a593Smuzhiyun *
284*4882a593Smuzhiyun * Cannot be called within a interrupt, but can be interrupted.
285*4882a593Smuzhiyun * The @ctor is run when new pages are allocated by the cache.
286*4882a593Smuzhiyun *
287*4882a593Smuzhiyun * The flags are
288*4882a593Smuzhiyun *
289*4882a593Smuzhiyun * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
290*4882a593Smuzhiyun * to catch references to uninitialised memory.
291*4882a593Smuzhiyun *
292*4882a593Smuzhiyun * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
293*4882a593Smuzhiyun * for buffer overruns.
294*4882a593Smuzhiyun *
295*4882a593Smuzhiyun * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
296*4882a593Smuzhiyun * cacheline. This can be beneficial if you're counting cycles as closely
297*4882a593Smuzhiyun * as davem.
298*4882a593Smuzhiyun *
299*4882a593Smuzhiyun * Return: a pointer to the cache on success, NULL on failure.
300*4882a593Smuzhiyun */
301*4882a593Smuzhiyun struct kmem_cache *
kmem_cache_create_usercopy(const char * name,unsigned int size,unsigned int align,slab_flags_t flags,unsigned int useroffset,unsigned int usersize,void (* ctor)(void *))302*4882a593Smuzhiyun kmem_cache_create_usercopy(const char *name,
303*4882a593Smuzhiyun unsigned int size, unsigned int align,
304*4882a593Smuzhiyun slab_flags_t flags,
305*4882a593Smuzhiyun unsigned int useroffset, unsigned int usersize,
306*4882a593Smuzhiyun void (*ctor)(void *))
307*4882a593Smuzhiyun {
308*4882a593Smuzhiyun struct kmem_cache *s = NULL;
309*4882a593Smuzhiyun const char *cache_name;
310*4882a593Smuzhiyun int err;
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun get_online_cpus();
313*4882a593Smuzhiyun get_online_mems();
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun #ifdef CONFIG_SLUB_DEBUG
316*4882a593Smuzhiyun /*
317*4882a593Smuzhiyun * If no slub_debug was enabled globally, the static key is not yet
318*4882a593Smuzhiyun * enabled by setup_slub_debug(). Enable it if the cache is being
319*4882a593Smuzhiyun * created with any of the debugging flags passed explicitly.
320*4882a593Smuzhiyun */
321*4882a593Smuzhiyun if (flags & SLAB_DEBUG_FLAGS)
322*4882a593Smuzhiyun static_branch_enable(&slub_debug_enabled);
323*4882a593Smuzhiyun #endif
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun mutex_lock(&slab_mutex);
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun err = kmem_cache_sanity_check(name, size);
328*4882a593Smuzhiyun if (err) {
329*4882a593Smuzhiyun goto out_unlock;
330*4882a593Smuzhiyun }
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun /* Refuse requests with allocator specific flags */
333*4882a593Smuzhiyun if (flags & ~SLAB_FLAGS_PERMITTED) {
334*4882a593Smuzhiyun err = -EINVAL;
335*4882a593Smuzhiyun goto out_unlock;
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun /*
339*4882a593Smuzhiyun * Some allocators will constraint the set of valid flags to a subset
340*4882a593Smuzhiyun * of all flags. We expect them to define CACHE_CREATE_MASK in this
341*4882a593Smuzhiyun * case, and we'll just provide them with a sanitized version of the
342*4882a593Smuzhiyun * passed flags.
343*4882a593Smuzhiyun */
344*4882a593Smuzhiyun flags &= CACHE_CREATE_MASK;
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun /* Fail closed on bad usersize of useroffset values. */
347*4882a593Smuzhiyun if (WARN_ON(!usersize && useroffset) ||
348*4882a593Smuzhiyun WARN_ON(size < usersize || size - usersize < useroffset))
349*4882a593Smuzhiyun usersize = useroffset = 0;
350*4882a593Smuzhiyun
351*4882a593Smuzhiyun if (!usersize)
352*4882a593Smuzhiyun s = __kmem_cache_alias(name, size, align, flags, ctor);
353*4882a593Smuzhiyun if (s)
354*4882a593Smuzhiyun goto out_unlock;
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun cache_name = kstrdup_const(name, GFP_KERNEL);
357*4882a593Smuzhiyun if (!cache_name) {
358*4882a593Smuzhiyun err = -ENOMEM;
359*4882a593Smuzhiyun goto out_unlock;
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun s = create_cache(cache_name, size,
363*4882a593Smuzhiyun calculate_alignment(flags, align, size),
364*4882a593Smuzhiyun flags, useroffset, usersize, ctor, NULL);
365*4882a593Smuzhiyun if (IS_ERR(s)) {
366*4882a593Smuzhiyun err = PTR_ERR(s);
367*4882a593Smuzhiyun kfree_const(cache_name);
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun out_unlock:
371*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun put_online_mems();
374*4882a593Smuzhiyun put_online_cpus();
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun if (err) {
377*4882a593Smuzhiyun if (flags & SLAB_PANIC)
378*4882a593Smuzhiyun panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
379*4882a593Smuzhiyun name, err);
380*4882a593Smuzhiyun else {
381*4882a593Smuzhiyun pr_warn("kmem_cache_create(%s) failed with error %d\n",
382*4882a593Smuzhiyun name, err);
383*4882a593Smuzhiyun dump_stack();
384*4882a593Smuzhiyun }
385*4882a593Smuzhiyun return NULL;
386*4882a593Smuzhiyun }
387*4882a593Smuzhiyun return s;
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_create_usercopy);
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun /**
392*4882a593Smuzhiyun * kmem_cache_create - Create a cache.
393*4882a593Smuzhiyun * @name: A string which is used in /proc/slabinfo to identify this cache.
394*4882a593Smuzhiyun * @size: The size of objects to be created in this cache.
395*4882a593Smuzhiyun * @align: The required alignment for the objects.
396*4882a593Smuzhiyun * @flags: SLAB flags
397*4882a593Smuzhiyun * @ctor: A constructor for the objects.
398*4882a593Smuzhiyun *
399*4882a593Smuzhiyun * Cannot be called within a interrupt, but can be interrupted.
400*4882a593Smuzhiyun * The @ctor is run when new pages are allocated by the cache.
401*4882a593Smuzhiyun *
402*4882a593Smuzhiyun * The flags are
403*4882a593Smuzhiyun *
404*4882a593Smuzhiyun * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
405*4882a593Smuzhiyun * to catch references to uninitialised memory.
406*4882a593Smuzhiyun *
407*4882a593Smuzhiyun * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check
408*4882a593Smuzhiyun * for buffer overruns.
409*4882a593Smuzhiyun *
410*4882a593Smuzhiyun * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
411*4882a593Smuzhiyun * cacheline. This can be beneficial if you're counting cycles as closely
412*4882a593Smuzhiyun * as davem.
413*4882a593Smuzhiyun *
414*4882a593Smuzhiyun * Return: a pointer to the cache on success, NULL on failure.
415*4882a593Smuzhiyun */
416*4882a593Smuzhiyun struct kmem_cache *
kmem_cache_create(const char * name,unsigned int size,unsigned int align,slab_flags_t flags,void (* ctor)(void *))417*4882a593Smuzhiyun kmem_cache_create(const char *name, unsigned int size, unsigned int align,
418*4882a593Smuzhiyun slab_flags_t flags, void (*ctor)(void *))
419*4882a593Smuzhiyun {
420*4882a593Smuzhiyun return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
421*4882a593Smuzhiyun ctor);
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_create);
424*4882a593Smuzhiyun
slab_caches_to_rcu_destroy_workfn(struct work_struct * work)425*4882a593Smuzhiyun static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
426*4882a593Smuzhiyun {
427*4882a593Smuzhiyun LIST_HEAD(to_destroy);
428*4882a593Smuzhiyun struct kmem_cache *s, *s2;
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun /*
431*4882a593Smuzhiyun * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
432*4882a593Smuzhiyun * @slab_caches_to_rcu_destroy list. The slab pages are freed
433*4882a593Smuzhiyun * through RCU and the associated kmem_cache are dereferenced
434*4882a593Smuzhiyun * while freeing the pages, so the kmem_caches should be freed only
435*4882a593Smuzhiyun * after the pending RCU operations are finished. As rcu_barrier()
436*4882a593Smuzhiyun * is a pretty slow operation, we batch all pending destructions
437*4882a593Smuzhiyun * asynchronously.
438*4882a593Smuzhiyun */
439*4882a593Smuzhiyun mutex_lock(&slab_mutex);
440*4882a593Smuzhiyun list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
441*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun if (list_empty(&to_destroy))
444*4882a593Smuzhiyun return;
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun rcu_barrier();
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun list_for_each_entry_safe(s, s2, &to_destroy, list) {
449*4882a593Smuzhiyun debugfs_slab_release(s);
450*4882a593Smuzhiyun kfence_shutdown_cache(s);
451*4882a593Smuzhiyun #ifdef SLAB_SUPPORTS_SYSFS
452*4882a593Smuzhiyun sysfs_slab_release(s);
453*4882a593Smuzhiyun #else
454*4882a593Smuzhiyun slab_kmem_cache_release(s);
455*4882a593Smuzhiyun #endif
456*4882a593Smuzhiyun }
457*4882a593Smuzhiyun }
458*4882a593Smuzhiyun
shutdown_cache(struct kmem_cache * s)459*4882a593Smuzhiyun static int shutdown_cache(struct kmem_cache *s)
460*4882a593Smuzhiyun {
461*4882a593Smuzhiyun /* free asan quarantined objects */
462*4882a593Smuzhiyun kasan_cache_shutdown(s);
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun if (__kmem_cache_shutdown(s) != 0)
465*4882a593Smuzhiyun return -EBUSY;
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun list_del(&s->list);
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun if (s->flags & SLAB_TYPESAFE_BY_RCU) {
470*4882a593Smuzhiyun #ifdef SLAB_SUPPORTS_SYSFS
471*4882a593Smuzhiyun sysfs_slab_unlink(s);
472*4882a593Smuzhiyun #endif
473*4882a593Smuzhiyun list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
474*4882a593Smuzhiyun schedule_work(&slab_caches_to_rcu_destroy_work);
475*4882a593Smuzhiyun } else {
476*4882a593Smuzhiyun kfence_shutdown_cache(s);
477*4882a593Smuzhiyun debugfs_slab_release(s);
478*4882a593Smuzhiyun #ifdef SLAB_SUPPORTS_SYSFS
479*4882a593Smuzhiyun sysfs_slab_unlink(s);
480*4882a593Smuzhiyun sysfs_slab_release(s);
481*4882a593Smuzhiyun #else
482*4882a593Smuzhiyun slab_kmem_cache_release(s);
483*4882a593Smuzhiyun #endif
484*4882a593Smuzhiyun }
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun return 0;
487*4882a593Smuzhiyun }
488*4882a593Smuzhiyun
slab_kmem_cache_release(struct kmem_cache * s)489*4882a593Smuzhiyun void slab_kmem_cache_release(struct kmem_cache *s)
490*4882a593Smuzhiyun {
491*4882a593Smuzhiyun __kmem_cache_release(s);
492*4882a593Smuzhiyun kfree_const(s->name);
493*4882a593Smuzhiyun kmem_cache_free(kmem_cache, s);
494*4882a593Smuzhiyun }
495*4882a593Smuzhiyun
kmem_cache_destroy(struct kmem_cache * s)496*4882a593Smuzhiyun void kmem_cache_destroy(struct kmem_cache *s)
497*4882a593Smuzhiyun {
498*4882a593Smuzhiyun int err;
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun if (unlikely(!s))
501*4882a593Smuzhiyun return;
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun get_online_cpus();
504*4882a593Smuzhiyun get_online_mems();
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun mutex_lock(&slab_mutex);
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun s->refcount--;
509*4882a593Smuzhiyun if (s->refcount)
510*4882a593Smuzhiyun goto out_unlock;
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun err = shutdown_cache(s);
513*4882a593Smuzhiyun if (err) {
514*4882a593Smuzhiyun pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
515*4882a593Smuzhiyun s->name);
516*4882a593Smuzhiyun dump_stack();
517*4882a593Smuzhiyun }
518*4882a593Smuzhiyun out_unlock:
519*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun put_online_mems();
522*4882a593Smuzhiyun put_online_cpus();
523*4882a593Smuzhiyun }
524*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_destroy);
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun /**
527*4882a593Smuzhiyun * kmem_cache_shrink - Shrink a cache.
528*4882a593Smuzhiyun * @cachep: The cache to shrink.
529*4882a593Smuzhiyun *
530*4882a593Smuzhiyun * Releases as many slabs as possible for a cache.
531*4882a593Smuzhiyun * To help debugging, a zero exit status indicates all slabs were released.
532*4882a593Smuzhiyun *
533*4882a593Smuzhiyun * Return: %0 if all slabs were released, non-zero otherwise
534*4882a593Smuzhiyun */
kmem_cache_shrink(struct kmem_cache * cachep)535*4882a593Smuzhiyun int kmem_cache_shrink(struct kmem_cache *cachep)
536*4882a593Smuzhiyun {
537*4882a593Smuzhiyun int ret;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun get_online_cpus();
540*4882a593Smuzhiyun get_online_mems();
541*4882a593Smuzhiyun kasan_cache_shrink(cachep);
542*4882a593Smuzhiyun ret = __kmem_cache_shrink(cachep);
543*4882a593Smuzhiyun put_online_mems();
544*4882a593Smuzhiyun put_online_cpus();
545*4882a593Smuzhiyun return ret;
546*4882a593Smuzhiyun }
547*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_shrink);
548*4882a593Smuzhiyun
slab_is_available(void)549*4882a593Smuzhiyun bool slab_is_available(void)
550*4882a593Smuzhiyun {
551*4882a593Smuzhiyun return slab_state >= UP;
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun
554*4882a593Smuzhiyun #ifndef CONFIG_SLOB
555*4882a593Smuzhiyun /* Create a cache during boot when no slab services are available yet */
create_boot_cache(struct kmem_cache * s,const char * name,unsigned int size,slab_flags_t flags,unsigned int useroffset,unsigned int usersize)556*4882a593Smuzhiyun void __init create_boot_cache(struct kmem_cache *s, const char *name,
557*4882a593Smuzhiyun unsigned int size, slab_flags_t flags,
558*4882a593Smuzhiyun unsigned int useroffset, unsigned int usersize)
559*4882a593Smuzhiyun {
560*4882a593Smuzhiyun int err;
561*4882a593Smuzhiyun unsigned int align = ARCH_KMALLOC_MINALIGN;
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun s->name = name;
564*4882a593Smuzhiyun s->size = s->object_size = size;
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun /*
567*4882a593Smuzhiyun * For power of two sizes, guarantee natural alignment for kmalloc
568*4882a593Smuzhiyun * caches, regardless of SL*B debugging options.
569*4882a593Smuzhiyun */
570*4882a593Smuzhiyun if (is_power_of_2(size))
571*4882a593Smuzhiyun align = max(align, size);
572*4882a593Smuzhiyun s->align = calculate_alignment(flags, align, size);
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun s->useroffset = useroffset;
575*4882a593Smuzhiyun s->usersize = usersize;
576*4882a593Smuzhiyun
577*4882a593Smuzhiyun err = __kmem_cache_create(s, flags);
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun if (err)
580*4882a593Smuzhiyun panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n",
581*4882a593Smuzhiyun name, size, err);
582*4882a593Smuzhiyun
583*4882a593Smuzhiyun s->refcount = -1; /* Exempt from merging for now */
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun
create_kmalloc_cache(const char * name,unsigned int size,slab_flags_t flags,unsigned int useroffset,unsigned int usersize)586*4882a593Smuzhiyun struct kmem_cache *__init create_kmalloc_cache(const char *name,
587*4882a593Smuzhiyun unsigned int size, slab_flags_t flags,
588*4882a593Smuzhiyun unsigned int useroffset, unsigned int usersize)
589*4882a593Smuzhiyun {
590*4882a593Smuzhiyun struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun if (!s)
593*4882a593Smuzhiyun panic("Out of memory when creating slab %s\n", name);
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun create_boot_cache(s, name, size, flags, useroffset, usersize);
596*4882a593Smuzhiyun kasan_cache_create_kmalloc(s);
597*4882a593Smuzhiyun list_add(&s->list, &slab_caches);
598*4882a593Smuzhiyun s->refcount = 1;
599*4882a593Smuzhiyun return s;
600*4882a593Smuzhiyun }
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun struct kmem_cache *
603*4882a593Smuzhiyun kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
604*4882a593Smuzhiyun { /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
605*4882a593Smuzhiyun EXPORT_SYMBOL(kmalloc_caches);
606*4882a593Smuzhiyun
607*4882a593Smuzhiyun /*
608*4882a593Smuzhiyun * Conversion table for small slabs sizes / 8 to the index in the
609*4882a593Smuzhiyun * kmalloc array. This is necessary for slabs < 192 since we have non power
610*4882a593Smuzhiyun * of two cache sizes there. The size of larger slabs can be determined using
611*4882a593Smuzhiyun * fls.
612*4882a593Smuzhiyun */
613*4882a593Smuzhiyun static u8 size_index[24] __ro_after_init = {
614*4882a593Smuzhiyun 3, /* 8 */
615*4882a593Smuzhiyun 4, /* 16 */
616*4882a593Smuzhiyun 5, /* 24 */
617*4882a593Smuzhiyun 5, /* 32 */
618*4882a593Smuzhiyun 6, /* 40 */
619*4882a593Smuzhiyun 6, /* 48 */
620*4882a593Smuzhiyun 6, /* 56 */
621*4882a593Smuzhiyun 6, /* 64 */
622*4882a593Smuzhiyun 1, /* 72 */
623*4882a593Smuzhiyun 1, /* 80 */
624*4882a593Smuzhiyun 1, /* 88 */
625*4882a593Smuzhiyun 1, /* 96 */
626*4882a593Smuzhiyun 7, /* 104 */
627*4882a593Smuzhiyun 7, /* 112 */
628*4882a593Smuzhiyun 7, /* 120 */
629*4882a593Smuzhiyun 7, /* 128 */
630*4882a593Smuzhiyun 2, /* 136 */
631*4882a593Smuzhiyun 2, /* 144 */
632*4882a593Smuzhiyun 2, /* 152 */
633*4882a593Smuzhiyun 2, /* 160 */
634*4882a593Smuzhiyun 2, /* 168 */
635*4882a593Smuzhiyun 2, /* 176 */
636*4882a593Smuzhiyun 2, /* 184 */
637*4882a593Smuzhiyun 2 /* 192 */
638*4882a593Smuzhiyun };
639*4882a593Smuzhiyun
size_index_elem(unsigned int bytes)640*4882a593Smuzhiyun static inline unsigned int size_index_elem(unsigned int bytes)
641*4882a593Smuzhiyun {
642*4882a593Smuzhiyun return (bytes - 1) / 8;
643*4882a593Smuzhiyun }
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun /*
646*4882a593Smuzhiyun * Find the kmem_cache structure that serves a given size of
647*4882a593Smuzhiyun * allocation
648*4882a593Smuzhiyun */
kmalloc_slab(size_t size,gfp_t flags)649*4882a593Smuzhiyun struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
650*4882a593Smuzhiyun {
651*4882a593Smuzhiyun unsigned int index;
652*4882a593Smuzhiyun struct kmem_cache *s = NULL;
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun if (size <= 192) {
655*4882a593Smuzhiyun if (!size)
656*4882a593Smuzhiyun return ZERO_SIZE_PTR;
657*4882a593Smuzhiyun
658*4882a593Smuzhiyun index = size_index[size_index_elem(size)];
659*4882a593Smuzhiyun } else {
660*4882a593Smuzhiyun if (WARN_ON_ONCE(size > KMALLOC_MAX_CACHE_SIZE))
661*4882a593Smuzhiyun return NULL;
662*4882a593Smuzhiyun index = fls(size - 1);
663*4882a593Smuzhiyun }
664*4882a593Smuzhiyun
665*4882a593Smuzhiyun trace_android_vh_kmalloc_slab(index, flags, &s);
666*4882a593Smuzhiyun if (s)
667*4882a593Smuzhiyun return s;
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun return kmalloc_caches[kmalloc_type(flags)][index];
670*4882a593Smuzhiyun }
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA
673*4882a593Smuzhiyun #define INIT_KMALLOC_INFO(__size, __short_size) \
674*4882a593Smuzhiyun { \
675*4882a593Smuzhiyun .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
676*4882a593Smuzhiyun .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
677*4882a593Smuzhiyun .name[KMALLOC_DMA] = "dma-kmalloc-" #__short_size, \
678*4882a593Smuzhiyun .size = __size, \
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun #else
681*4882a593Smuzhiyun #define INIT_KMALLOC_INFO(__size, __short_size) \
682*4882a593Smuzhiyun { \
683*4882a593Smuzhiyun .name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
684*4882a593Smuzhiyun .name[KMALLOC_RECLAIM] = "kmalloc-rcl-" #__short_size, \
685*4882a593Smuzhiyun .size = __size, \
686*4882a593Smuzhiyun }
687*4882a593Smuzhiyun #endif
688*4882a593Smuzhiyun
689*4882a593Smuzhiyun /*
690*4882a593Smuzhiyun * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
691*4882a593Smuzhiyun * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
692*4882a593Smuzhiyun * kmalloc-67108864.
693*4882a593Smuzhiyun */
694*4882a593Smuzhiyun const struct kmalloc_info_struct kmalloc_info[] __initconst = {
695*4882a593Smuzhiyun INIT_KMALLOC_INFO(0, 0),
696*4882a593Smuzhiyun INIT_KMALLOC_INFO(96, 96),
697*4882a593Smuzhiyun INIT_KMALLOC_INFO(192, 192),
698*4882a593Smuzhiyun INIT_KMALLOC_INFO(8, 8),
699*4882a593Smuzhiyun INIT_KMALLOC_INFO(16, 16),
700*4882a593Smuzhiyun INIT_KMALLOC_INFO(32, 32),
701*4882a593Smuzhiyun INIT_KMALLOC_INFO(64, 64),
702*4882a593Smuzhiyun INIT_KMALLOC_INFO(128, 128),
703*4882a593Smuzhiyun INIT_KMALLOC_INFO(256, 256),
704*4882a593Smuzhiyun INIT_KMALLOC_INFO(512, 512),
705*4882a593Smuzhiyun INIT_KMALLOC_INFO(1024, 1k),
706*4882a593Smuzhiyun INIT_KMALLOC_INFO(2048, 2k),
707*4882a593Smuzhiyun INIT_KMALLOC_INFO(4096, 4k),
708*4882a593Smuzhiyun INIT_KMALLOC_INFO(8192, 8k),
709*4882a593Smuzhiyun INIT_KMALLOC_INFO(16384, 16k),
710*4882a593Smuzhiyun INIT_KMALLOC_INFO(32768, 32k),
711*4882a593Smuzhiyun INIT_KMALLOC_INFO(65536, 64k),
712*4882a593Smuzhiyun INIT_KMALLOC_INFO(131072, 128k),
713*4882a593Smuzhiyun INIT_KMALLOC_INFO(262144, 256k),
714*4882a593Smuzhiyun INIT_KMALLOC_INFO(524288, 512k),
715*4882a593Smuzhiyun INIT_KMALLOC_INFO(1048576, 1M),
716*4882a593Smuzhiyun INIT_KMALLOC_INFO(2097152, 2M),
717*4882a593Smuzhiyun INIT_KMALLOC_INFO(4194304, 4M),
718*4882a593Smuzhiyun INIT_KMALLOC_INFO(8388608, 8M),
719*4882a593Smuzhiyun INIT_KMALLOC_INFO(16777216, 16M),
720*4882a593Smuzhiyun INIT_KMALLOC_INFO(33554432, 32M),
721*4882a593Smuzhiyun INIT_KMALLOC_INFO(67108864, 64M)
722*4882a593Smuzhiyun };
723*4882a593Smuzhiyun
724*4882a593Smuzhiyun /*
725*4882a593Smuzhiyun * Patch up the size_index table if we have strange large alignment
726*4882a593Smuzhiyun * requirements for the kmalloc array. This is only the case for
727*4882a593Smuzhiyun * MIPS it seems. The standard arches will not generate any code here.
728*4882a593Smuzhiyun *
729*4882a593Smuzhiyun * Largest permitted alignment is 256 bytes due to the way we
730*4882a593Smuzhiyun * handle the index determination for the smaller caches.
731*4882a593Smuzhiyun *
732*4882a593Smuzhiyun * Make sure that nothing crazy happens if someone starts tinkering
733*4882a593Smuzhiyun * around with ARCH_KMALLOC_MINALIGN
734*4882a593Smuzhiyun */
setup_kmalloc_cache_index_table(void)735*4882a593Smuzhiyun void __init setup_kmalloc_cache_index_table(void)
736*4882a593Smuzhiyun {
737*4882a593Smuzhiyun unsigned int i;
738*4882a593Smuzhiyun
739*4882a593Smuzhiyun BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
740*4882a593Smuzhiyun (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
741*4882a593Smuzhiyun
742*4882a593Smuzhiyun for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
743*4882a593Smuzhiyun unsigned int elem = size_index_elem(i);
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun if (elem >= ARRAY_SIZE(size_index))
746*4882a593Smuzhiyun break;
747*4882a593Smuzhiyun size_index[elem] = KMALLOC_SHIFT_LOW;
748*4882a593Smuzhiyun }
749*4882a593Smuzhiyun
750*4882a593Smuzhiyun if (KMALLOC_MIN_SIZE >= 64) {
751*4882a593Smuzhiyun /*
752*4882a593Smuzhiyun * The 96 byte size cache is not used if the alignment
753*4882a593Smuzhiyun * is 64 byte.
754*4882a593Smuzhiyun */
755*4882a593Smuzhiyun for (i = 64 + 8; i <= 96; i += 8)
756*4882a593Smuzhiyun size_index[size_index_elem(i)] = 7;
757*4882a593Smuzhiyun
758*4882a593Smuzhiyun }
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun if (KMALLOC_MIN_SIZE >= 128) {
761*4882a593Smuzhiyun /*
762*4882a593Smuzhiyun * The 192 byte sized cache is not used if the alignment
763*4882a593Smuzhiyun * is 128 byte. Redirect kmalloc to use the 256 byte cache
764*4882a593Smuzhiyun * instead.
765*4882a593Smuzhiyun */
766*4882a593Smuzhiyun for (i = 128 + 8; i <= 192; i += 8)
767*4882a593Smuzhiyun size_index[size_index_elem(i)] = 8;
768*4882a593Smuzhiyun }
769*4882a593Smuzhiyun }
770*4882a593Smuzhiyun
771*4882a593Smuzhiyun static void __init
new_kmalloc_cache(int idx,enum kmalloc_cache_type type,slab_flags_t flags)772*4882a593Smuzhiyun new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
773*4882a593Smuzhiyun {
774*4882a593Smuzhiyun if (type == KMALLOC_RECLAIM)
775*4882a593Smuzhiyun flags |= SLAB_RECLAIM_ACCOUNT;
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun kmalloc_caches[type][idx] = create_kmalloc_cache(
778*4882a593Smuzhiyun kmalloc_info[idx].name[type],
779*4882a593Smuzhiyun kmalloc_info[idx].size, flags, 0,
780*4882a593Smuzhiyun kmalloc_info[idx].size);
781*4882a593Smuzhiyun }
782*4882a593Smuzhiyun
783*4882a593Smuzhiyun /*
784*4882a593Smuzhiyun * Create the kmalloc array. Some of the regular kmalloc arrays
785*4882a593Smuzhiyun * may already have been created because they were needed to
786*4882a593Smuzhiyun * enable allocations for slab creation.
787*4882a593Smuzhiyun */
create_kmalloc_caches(slab_flags_t flags)788*4882a593Smuzhiyun void __init create_kmalloc_caches(slab_flags_t flags)
789*4882a593Smuzhiyun {
790*4882a593Smuzhiyun int i;
791*4882a593Smuzhiyun enum kmalloc_cache_type type;
792*4882a593Smuzhiyun
793*4882a593Smuzhiyun for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
794*4882a593Smuzhiyun for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
795*4882a593Smuzhiyun if (!kmalloc_caches[type][i])
796*4882a593Smuzhiyun new_kmalloc_cache(i, type, flags);
797*4882a593Smuzhiyun
798*4882a593Smuzhiyun /*
799*4882a593Smuzhiyun * Caches that are not of the two-to-the-power-of size.
800*4882a593Smuzhiyun * These have to be created immediately after the
801*4882a593Smuzhiyun * earlier power of two caches
802*4882a593Smuzhiyun */
803*4882a593Smuzhiyun if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
804*4882a593Smuzhiyun !kmalloc_caches[type][1])
805*4882a593Smuzhiyun new_kmalloc_cache(1, type, flags);
806*4882a593Smuzhiyun if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
807*4882a593Smuzhiyun !kmalloc_caches[type][2])
808*4882a593Smuzhiyun new_kmalloc_cache(2, type, flags);
809*4882a593Smuzhiyun }
810*4882a593Smuzhiyun }
811*4882a593Smuzhiyun
812*4882a593Smuzhiyun /* Kmalloc array is now usable */
813*4882a593Smuzhiyun slab_state = UP;
814*4882a593Smuzhiyun
815*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA
816*4882a593Smuzhiyun for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
817*4882a593Smuzhiyun struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
818*4882a593Smuzhiyun
819*4882a593Smuzhiyun if (s) {
820*4882a593Smuzhiyun kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
821*4882a593Smuzhiyun kmalloc_info[i].name[KMALLOC_DMA],
822*4882a593Smuzhiyun kmalloc_info[i].size,
823*4882a593Smuzhiyun SLAB_CACHE_DMA | flags, 0,
824*4882a593Smuzhiyun kmalloc_info[i].size);
825*4882a593Smuzhiyun }
826*4882a593Smuzhiyun }
827*4882a593Smuzhiyun #endif
828*4882a593Smuzhiyun }
829*4882a593Smuzhiyun #endif /* !CONFIG_SLOB */
830*4882a593Smuzhiyun
kmalloc_fix_flags(gfp_t flags)831*4882a593Smuzhiyun gfp_t kmalloc_fix_flags(gfp_t flags)
832*4882a593Smuzhiyun {
833*4882a593Smuzhiyun gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
834*4882a593Smuzhiyun
835*4882a593Smuzhiyun flags &= ~GFP_SLAB_BUG_MASK;
836*4882a593Smuzhiyun pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
837*4882a593Smuzhiyun invalid_mask, &invalid_mask, flags, &flags);
838*4882a593Smuzhiyun dump_stack();
839*4882a593Smuzhiyun
840*4882a593Smuzhiyun return flags;
841*4882a593Smuzhiyun }
842*4882a593Smuzhiyun
843*4882a593Smuzhiyun /*
844*4882a593Smuzhiyun * To avoid unnecessary overhead, we pass through large allocation requests
845*4882a593Smuzhiyun * directly to the page allocator. We use __GFP_COMP, because we will need to
846*4882a593Smuzhiyun * know the allocation order to free the pages properly in kfree.
847*4882a593Smuzhiyun */
kmalloc_order(size_t size,gfp_t flags,unsigned int order)848*4882a593Smuzhiyun void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
849*4882a593Smuzhiyun {
850*4882a593Smuzhiyun void *ret = NULL;
851*4882a593Smuzhiyun struct page *page;
852*4882a593Smuzhiyun
853*4882a593Smuzhiyun if (unlikely(flags & GFP_SLAB_BUG_MASK))
854*4882a593Smuzhiyun flags = kmalloc_fix_flags(flags);
855*4882a593Smuzhiyun
856*4882a593Smuzhiyun flags |= __GFP_COMP;
857*4882a593Smuzhiyun page = alloc_pages(flags, order);
858*4882a593Smuzhiyun if (likely(page)) {
859*4882a593Smuzhiyun ret = page_address(page);
860*4882a593Smuzhiyun mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
861*4882a593Smuzhiyun PAGE_SIZE << order);
862*4882a593Smuzhiyun }
863*4882a593Smuzhiyun ret = kasan_kmalloc_large(ret, size, flags);
864*4882a593Smuzhiyun /* As ret might get tagged, call kmemleak hook after KASAN. */
865*4882a593Smuzhiyun kmemleak_alloc(ret, size, 1, flags);
866*4882a593Smuzhiyun return ret;
867*4882a593Smuzhiyun }
868*4882a593Smuzhiyun EXPORT_SYMBOL(kmalloc_order);
869*4882a593Smuzhiyun
870*4882a593Smuzhiyun #ifdef CONFIG_TRACING
kmalloc_order_trace(size_t size,gfp_t flags,unsigned int order)871*4882a593Smuzhiyun void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
872*4882a593Smuzhiyun {
873*4882a593Smuzhiyun void *ret = kmalloc_order(size, flags, order);
874*4882a593Smuzhiyun trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
875*4882a593Smuzhiyun return ret;
876*4882a593Smuzhiyun }
877*4882a593Smuzhiyun EXPORT_SYMBOL(kmalloc_order_trace);
878*4882a593Smuzhiyun #endif
879*4882a593Smuzhiyun
880*4882a593Smuzhiyun #ifdef CONFIG_SLAB_FREELIST_RANDOM
881*4882a593Smuzhiyun /* Randomize a generic freelist */
freelist_randomize(struct rnd_state * state,unsigned int * list,unsigned int count)882*4882a593Smuzhiyun static void freelist_randomize(struct rnd_state *state, unsigned int *list,
883*4882a593Smuzhiyun unsigned int count)
884*4882a593Smuzhiyun {
885*4882a593Smuzhiyun unsigned int rand;
886*4882a593Smuzhiyun unsigned int i;
887*4882a593Smuzhiyun
888*4882a593Smuzhiyun for (i = 0; i < count; i++)
889*4882a593Smuzhiyun list[i] = i;
890*4882a593Smuzhiyun
891*4882a593Smuzhiyun /* Fisher-Yates shuffle */
892*4882a593Smuzhiyun for (i = count - 1; i > 0; i--) {
893*4882a593Smuzhiyun rand = prandom_u32_state(state);
894*4882a593Smuzhiyun rand %= (i + 1);
895*4882a593Smuzhiyun swap(list[i], list[rand]);
896*4882a593Smuzhiyun }
897*4882a593Smuzhiyun }
898*4882a593Smuzhiyun
899*4882a593Smuzhiyun /* Create a random sequence per cache */
cache_random_seq_create(struct kmem_cache * cachep,unsigned int count,gfp_t gfp)900*4882a593Smuzhiyun int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
901*4882a593Smuzhiyun gfp_t gfp)
902*4882a593Smuzhiyun {
903*4882a593Smuzhiyun struct rnd_state state;
904*4882a593Smuzhiyun
905*4882a593Smuzhiyun if (count < 2 || cachep->random_seq)
906*4882a593Smuzhiyun return 0;
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
909*4882a593Smuzhiyun if (!cachep->random_seq)
910*4882a593Smuzhiyun return -ENOMEM;
911*4882a593Smuzhiyun
912*4882a593Smuzhiyun /* Get best entropy at this stage of boot */
913*4882a593Smuzhiyun prandom_seed_state(&state, get_random_long());
914*4882a593Smuzhiyun
915*4882a593Smuzhiyun freelist_randomize(&state, cachep->random_seq, count);
916*4882a593Smuzhiyun return 0;
917*4882a593Smuzhiyun }
918*4882a593Smuzhiyun
919*4882a593Smuzhiyun /* Destroy the per-cache random freelist sequence */
cache_random_seq_destroy(struct kmem_cache * cachep)920*4882a593Smuzhiyun void cache_random_seq_destroy(struct kmem_cache *cachep)
921*4882a593Smuzhiyun {
922*4882a593Smuzhiyun kfree(cachep->random_seq);
923*4882a593Smuzhiyun cachep->random_seq = NULL;
924*4882a593Smuzhiyun }
925*4882a593Smuzhiyun #endif /* CONFIG_SLAB_FREELIST_RANDOM */
926*4882a593Smuzhiyun
927*4882a593Smuzhiyun #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
928*4882a593Smuzhiyun #ifdef CONFIG_SLAB
929*4882a593Smuzhiyun #define SLABINFO_RIGHTS (0600)
930*4882a593Smuzhiyun #else
931*4882a593Smuzhiyun #define SLABINFO_RIGHTS (0400)
932*4882a593Smuzhiyun #endif
933*4882a593Smuzhiyun
print_slabinfo_header(struct seq_file * m)934*4882a593Smuzhiyun static void print_slabinfo_header(struct seq_file *m)
935*4882a593Smuzhiyun {
936*4882a593Smuzhiyun /*
937*4882a593Smuzhiyun * Output format version, so at least we can change it
938*4882a593Smuzhiyun * without _too_ many complaints.
939*4882a593Smuzhiyun */
940*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_SLAB
941*4882a593Smuzhiyun seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
942*4882a593Smuzhiyun #else
943*4882a593Smuzhiyun seq_puts(m, "slabinfo - version: 2.1\n");
944*4882a593Smuzhiyun #endif
945*4882a593Smuzhiyun seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
946*4882a593Smuzhiyun seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
947*4882a593Smuzhiyun seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
948*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_SLAB
949*4882a593Smuzhiyun seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
950*4882a593Smuzhiyun seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
951*4882a593Smuzhiyun #endif
952*4882a593Smuzhiyun seq_putc(m, '\n');
953*4882a593Smuzhiyun }
954*4882a593Smuzhiyun
slab_start(struct seq_file * m,loff_t * pos)955*4882a593Smuzhiyun void *slab_start(struct seq_file *m, loff_t *pos)
956*4882a593Smuzhiyun {
957*4882a593Smuzhiyun mutex_lock(&slab_mutex);
958*4882a593Smuzhiyun return seq_list_start(&slab_caches, *pos);
959*4882a593Smuzhiyun }
960*4882a593Smuzhiyun
slab_next(struct seq_file * m,void * p,loff_t * pos)961*4882a593Smuzhiyun void *slab_next(struct seq_file *m, void *p, loff_t *pos)
962*4882a593Smuzhiyun {
963*4882a593Smuzhiyun return seq_list_next(p, &slab_caches, pos);
964*4882a593Smuzhiyun }
965*4882a593Smuzhiyun
slab_stop(struct seq_file * m,void * p)966*4882a593Smuzhiyun void slab_stop(struct seq_file *m, void *p)
967*4882a593Smuzhiyun {
968*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
969*4882a593Smuzhiyun }
970*4882a593Smuzhiyun
cache_show(struct kmem_cache * s,struct seq_file * m)971*4882a593Smuzhiyun static void cache_show(struct kmem_cache *s, struct seq_file *m)
972*4882a593Smuzhiyun {
973*4882a593Smuzhiyun struct slabinfo sinfo;
974*4882a593Smuzhiyun
975*4882a593Smuzhiyun memset(&sinfo, 0, sizeof(sinfo));
976*4882a593Smuzhiyun get_slabinfo(s, &sinfo);
977*4882a593Smuzhiyun
978*4882a593Smuzhiyun seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
979*4882a593Smuzhiyun s->name, sinfo.active_objs, sinfo.num_objs, s->size,
980*4882a593Smuzhiyun sinfo.objects_per_slab, (1 << sinfo.cache_order));
981*4882a593Smuzhiyun
982*4882a593Smuzhiyun seq_printf(m, " : tunables %4u %4u %4u",
983*4882a593Smuzhiyun sinfo.limit, sinfo.batchcount, sinfo.shared);
984*4882a593Smuzhiyun seq_printf(m, " : slabdata %6lu %6lu %6lu",
985*4882a593Smuzhiyun sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
986*4882a593Smuzhiyun slabinfo_show_stats(m, s);
987*4882a593Smuzhiyun seq_putc(m, '\n');
988*4882a593Smuzhiyun }
989*4882a593Smuzhiyun
slab_show(struct seq_file * m,void * p)990*4882a593Smuzhiyun static int slab_show(struct seq_file *m, void *p)
991*4882a593Smuzhiyun {
992*4882a593Smuzhiyun struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
993*4882a593Smuzhiyun
994*4882a593Smuzhiyun if (p == slab_caches.next)
995*4882a593Smuzhiyun print_slabinfo_header(m);
996*4882a593Smuzhiyun cache_show(s, m);
997*4882a593Smuzhiyun return 0;
998*4882a593Smuzhiyun }
999*4882a593Smuzhiyun
dump_unreclaimable_slab(void)1000*4882a593Smuzhiyun void dump_unreclaimable_slab(void)
1001*4882a593Smuzhiyun {
1002*4882a593Smuzhiyun struct kmem_cache *s, *s2;
1003*4882a593Smuzhiyun struct slabinfo sinfo;
1004*4882a593Smuzhiyun
1005*4882a593Smuzhiyun /*
1006*4882a593Smuzhiyun * Here acquiring slab_mutex is risky since we don't prefer to get
1007*4882a593Smuzhiyun * sleep in oom path. But, without mutex hold, it may introduce a
1008*4882a593Smuzhiyun * risk of crash.
1009*4882a593Smuzhiyun * Use mutex_trylock to protect the list traverse, dump nothing
1010*4882a593Smuzhiyun * without acquiring the mutex.
1011*4882a593Smuzhiyun */
1012*4882a593Smuzhiyun if (!mutex_trylock(&slab_mutex)) {
1013*4882a593Smuzhiyun pr_warn("excessive unreclaimable slab but cannot dump stats\n");
1014*4882a593Smuzhiyun return;
1015*4882a593Smuzhiyun }
1016*4882a593Smuzhiyun
1017*4882a593Smuzhiyun pr_info("Unreclaimable slab info:\n");
1018*4882a593Smuzhiyun pr_info("Name Used Total\n");
1019*4882a593Smuzhiyun
1020*4882a593Smuzhiyun list_for_each_entry_safe(s, s2, &slab_caches, list) {
1021*4882a593Smuzhiyun if (s->flags & SLAB_RECLAIM_ACCOUNT)
1022*4882a593Smuzhiyun continue;
1023*4882a593Smuzhiyun
1024*4882a593Smuzhiyun get_slabinfo(s, &sinfo);
1025*4882a593Smuzhiyun
1026*4882a593Smuzhiyun if (sinfo.num_objs > 0)
1027*4882a593Smuzhiyun pr_info("%-17s %10luKB %10luKB\n", s->name,
1028*4882a593Smuzhiyun (sinfo.active_objs * s->size) / 1024,
1029*4882a593Smuzhiyun (sinfo.num_objs * s->size) / 1024);
1030*4882a593Smuzhiyun }
1031*4882a593Smuzhiyun mutex_unlock(&slab_mutex);
1032*4882a593Smuzhiyun }
1033*4882a593Smuzhiyun
1034*4882a593Smuzhiyun #if defined(CONFIG_MEMCG_KMEM)
memcg_slab_show(struct seq_file * m,void * p)1035*4882a593Smuzhiyun int memcg_slab_show(struct seq_file *m, void *p)
1036*4882a593Smuzhiyun {
1037*4882a593Smuzhiyun /*
1038*4882a593Smuzhiyun * Deprecated.
1039*4882a593Smuzhiyun * Please, take a look at tools/cgroup/slabinfo.py .
1040*4882a593Smuzhiyun */
1041*4882a593Smuzhiyun return 0;
1042*4882a593Smuzhiyun }
1043*4882a593Smuzhiyun #endif
1044*4882a593Smuzhiyun
1045*4882a593Smuzhiyun /*
1046*4882a593Smuzhiyun * slabinfo_op - iterator that generates /proc/slabinfo
1047*4882a593Smuzhiyun *
1048*4882a593Smuzhiyun * Output layout:
1049*4882a593Smuzhiyun * cache-name
1050*4882a593Smuzhiyun * num-active-objs
1051*4882a593Smuzhiyun * total-objs
1052*4882a593Smuzhiyun * object size
1053*4882a593Smuzhiyun * num-active-slabs
1054*4882a593Smuzhiyun * total-slabs
1055*4882a593Smuzhiyun * num-pages-per-slab
1056*4882a593Smuzhiyun * + further values on SMP and with statistics enabled
1057*4882a593Smuzhiyun */
1058*4882a593Smuzhiyun static const struct seq_operations slabinfo_op = {
1059*4882a593Smuzhiyun .start = slab_start,
1060*4882a593Smuzhiyun .next = slab_next,
1061*4882a593Smuzhiyun .stop = slab_stop,
1062*4882a593Smuzhiyun .show = slab_show,
1063*4882a593Smuzhiyun };
1064*4882a593Smuzhiyun
slabinfo_open(struct inode * inode,struct file * file)1065*4882a593Smuzhiyun static int slabinfo_open(struct inode *inode, struct file *file)
1066*4882a593Smuzhiyun {
1067*4882a593Smuzhiyun return seq_open(file, &slabinfo_op);
1068*4882a593Smuzhiyun }
1069*4882a593Smuzhiyun
1070*4882a593Smuzhiyun static const struct proc_ops slabinfo_proc_ops = {
1071*4882a593Smuzhiyun .proc_flags = PROC_ENTRY_PERMANENT,
1072*4882a593Smuzhiyun .proc_open = slabinfo_open,
1073*4882a593Smuzhiyun .proc_read = seq_read,
1074*4882a593Smuzhiyun .proc_write = slabinfo_write,
1075*4882a593Smuzhiyun .proc_lseek = seq_lseek,
1076*4882a593Smuzhiyun .proc_release = seq_release,
1077*4882a593Smuzhiyun };
1078*4882a593Smuzhiyun
slab_proc_init(void)1079*4882a593Smuzhiyun static int __init slab_proc_init(void)
1080*4882a593Smuzhiyun {
1081*4882a593Smuzhiyun proc_create("slabinfo", SLABINFO_RIGHTS, NULL, &slabinfo_proc_ops);
1082*4882a593Smuzhiyun return 0;
1083*4882a593Smuzhiyun }
1084*4882a593Smuzhiyun module_init(slab_proc_init);
1085*4882a593Smuzhiyun
1086*4882a593Smuzhiyun #endif /* CONFIG_SLAB || CONFIG_SLUB_DEBUG */
1087*4882a593Smuzhiyun
__do_krealloc(const void * p,size_t new_size,gfp_t flags)1088*4882a593Smuzhiyun static __always_inline void *__do_krealloc(const void *p, size_t new_size,
1089*4882a593Smuzhiyun gfp_t flags)
1090*4882a593Smuzhiyun {
1091*4882a593Smuzhiyun void *ret;
1092*4882a593Smuzhiyun size_t ks;
1093*4882a593Smuzhiyun
1094*4882a593Smuzhiyun /* Don't use instrumented ksize to allow precise KASAN poisoning. */
1095*4882a593Smuzhiyun if (likely(!ZERO_OR_NULL_PTR(p))) {
1096*4882a593Smuzhiyun if (!kasan_check_byte(p))
1097*4882a593Smuzhiyun return NULL;
1098*4882a593Smuzhiyun ks = kfence_ksize(p) ?: __ksize(p);
1099*4882a593Smuzhiyun } else
1100*4882a593Smuzhiyun ks = 0;
1101*4882a593Smuzhiyun
1102*4882a593Smuzhiyun /* If the object still fits, repoison it precisely. */
1103*4882a593Smuzhiyun if (ks >= new_size) {
1104*4882a593Smuzhiyun p = kasan_krealloc((void *)p, new_size, flags);
1105*4882a593Smuzhiyun return (void *)p;
1106*4882a593Smuzhiyun }
1107*4882a593Smuzhiyun
1108*4882a593Smuzhiyun ret = kmalloc_track_caller(new_size, flags);
1109*4882a593Smuzhiyun if (ret && p) {
1110*4882a593Smuzhiyun /* Disable KASAN checks as the object's redzone is accessed. */
1111*4882a593Smuzhiyun kasan_disable_current();
1112*4882a593Smuzhiyun memcpy(ret, kasan_reset_tag(p), ks);
1113*4882a593Smuzhiyun kasan_enable_current();
1114*4882a593Smuzhiyun }
1115*4882a593Smuzhiyun
1116*4882a593Smuzhiyun return ret;
1117*4882a593Smuzhiyun }
1118*4882a593Smuzhiyun
1119*4882a593Smuzhiyun /**
1120*4882a593Smuzhiyun * krealloc - reallocate memory. The contents will remain unchanged.
1121*4882a593Smuzhiyun * @p: object to reallocate memory for.
1122*4882a593Smuzhiyun * @new_size: how many bytes of memory are required.
1123*4882a593Smuzhiyun * @flags: the type of memory to allocate.
1124*4882a593Smuzhiyun *
1125*4882a593Smuzhiyun * The contents of the object pointed to are preserved up to the
1126*4882a593Smuzhiyun * lesser of the new and old sizes. If @p is %NULL, krealloc()
1127*4882a593Smuzhiyun * behaves exactly like kmalloc(). If @new_size is 0 and @p is not a
1128*4882a593Smuzhiyun * %NULL pointer, the object pointed to is freed.
1129*4882a593Smuzhiyun *
1130*4882a593Smuzhiyun * Return: pointer to the allocated memory or %NULL in case of error
1131*4882a593Smuzhiyun */
krealloc(const void * p,size_t new_size,gfp_t flags)1132*4882a593Smuzhiyun void *krealloc(const void *p, size_t new_size, gfp_t flags)
1133*4882a593Smuzhiyun {
1134*4882a593Smuzhiyun void *ret;
1135*4882a593Smuzhiyun
1136*4882a593Smuzhiyun if (unlikely(!new_size)) {
1137*4882a593Smuzhiyun kfree(p);
1138*4882a593Smuzhiyun return ZERO_SIZE_PTR;
1139*4882a593Smuzhiyun }
1140*4882a593Smuzhiyun
1141*4882a593Smuzhiyun ret = __do_krealloc(p, new_size, flags);
1142*4882a593Smuzhiyun if (ret && kasan_reset_tag(p) != kasan_reset_tag(ret))
1143*4882a593Smuzhiyun kfree(p);
1144*4882a593Smuzhiyun
1145*4882a593Smuzhiyun return ret;
1146*4882a593Smuzhiyun }
1147*4882a593Smuzhiyun EXPORT_SYMBOL(krealloc);
1148*4882a593Smuzhiyun
1149*4882a593Smuzhiyun /**
1150*4882a593Smuzhiyun * kfree_sensitive - Clear sensitive information in memory before freeing
1151*4882a593Smuzhiyun * @p: object to free memory of
1152*4882a593Smuzhiyun *
1153*4882a593Smuzhiyun * The memory of the object @p points to is zeroed before freed.
1154*4882a593Smuzhiyun * If @p is %NULL, kfree_sensitive() does nothing.
1155*4882a593Smuzhiyun *
1156*4882a593Smuzhiyun * Note: this function zeroes the whole allocated buffer which can be a good
1157*4882a593Smuzhiyun * deal bigger than the requested buffer size passed to kmalloc(). So be
1158*4882a593Smuzhiyun * careful when using this function in performance sensitive code.
1159*4882a593Smuzhiyun */
kfree_sensitive(const void * p)1160*4882a593Smuzhiyun void kfree_sensitive(const void *p)
1161*4882a593Smuzhiyun {
1162*4882a593Smuzhiyun size_t ks;
1163*4882a593Smuzhiyun void *mem = (void *)p;
1164*4882a593Smuzhiyun
1165*4882a593Smuzhiyun ks = ksize(mem);
1166*4882a593Smuzhiyun if (ks)
1167*4882a593Smuzhiyun memzero_explicit(mem, ks);
1168*4882a593Smuzhiyun kfree(mem);
1169*4882a593Smuzhiyun }
1170*4882a593Smuzhiyun EXPORT_SYMBOL(kfree_sensitive);
1171*4882a593Smuzhiyun
1172*4882a593Smuzhiyun /**
1173*4882a593Smuzhiyun * ksize - get the actual amount of memory allocated for a given object
1174*4882a593Smuzhiyun * @objp: Pointer to the object
1175*4882a593Smuzhiyun *
1176*4882a593Smuzhiyun * kmalloc may internally round up allocations and return more memory
1177*4882a593Smuzhiyun * than requested. ksize() can be used to determine the actual amount of
1178*4882a593Smuzhiyun * memory allocated. The caller may use this additional memory, even though
1179*4882a593Smuzhiyun * a smaller amount of memory was initially specified with the kmalloc call.
1180*4882a593Smuzhiyun * The caller must guarantee that objp points to a valid object previously
1181*4882a593Smuzhiyun * allocated with either kmalloc() or kmem_cache_alloc(). The object
1182*4882a593Smuzhiyun * must not be freed during the duration of the call.
1183*4882a593Smuzhiyun *
1184*4882a593Smuzhiyun * Return: size of the actual memory used by @objp in bytes
1185*4882a593Smuzhiyun */
ksize(const void * objp)1186*4882a593Smuzhiyun size_t ksize(const void *objp)
1187*4882a593Smuzhiyun {
1188*4882a593Smuzhiyun size_t size;
1189*4882a593Smuzhiyun
1190*4882a593Smuzhiyun /*
1191*4882a593Smuzhiyun * We need to first check that the pointer to the object is valid, and
1192*4882a593Smuzhiyun * only then unpoison the memory. The report printed from ksize() is
1193*4882a593Smuzhiyun * more useful, then when it's printed later when the behaviour could
1194*4882a593Smuzhiyun * be undefined due to a potential use-after-free or double-free.
1195*4882a593Smuzhiyun *
1196*4882a593Smuzhiyun * We use kasan_check_byte(), which is supported for the hardware
1197*4882a593Smuzhiyun * tag-based KASAN mode, unlike kasan_check_read/write().
1198*4882a593Smuzhiyun *
1199*4882a593Smuzhiyun * If the pointed to memory is invalid, we return 0 to avoid users of
1200*4882a593Smuzhiyun * ksize() writing to and potentially corrupting the memory region.
1201*4882a593Smuzhiyun *
1202*4882a593Smuzhiyun * We want to perform the check before __ksize(), to avoid potentially
1203*4882a593Smuzhiyun * crashing in __ksize() due to accessing invalid metadata.
1204*4882a593Smuzhiyun */
1205*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp))
1206*4882a593Smuzhiyun return 0;
1207*4882a593Smuzhiyun
1208*4882a593Smuzhiyun size = kfence_ksize(objp) ?: __ksize(objp);
1209*4882a593Smuzhiyun /*
1210*4882a593Smuzhiyun * We assume that ksize callers could use whole allocated area,
1211*4882a593Smuzhiyun * so we need to unpoison this area.
1212*4882a593Smuzhiyun */
1213*4882a593Smuzhiyun kasan_unpoison_range(objp, size);
1214*4882a593Smuzhiyun return size;
1215*4882a593Smuzhiyun }
1216*4882a593Smuzhiyun EXPORT_SYMBOL(ksize);
1217*4882a593Smuzhiyun
1218*4882a593Smuzhiyun /* Tracepoints definitions. */
1219*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(kmalloc);
1220*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
1221*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
1222*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
1223*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(kfree);
1224*4882a593Smuzhiyun EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
1225*4882a593Smuzhiyun
should_failslab(struct kmem_cache * s,gfp_t gfpflags)1226*4882a593Smuzhiyun int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
1227*4882a593Smuzhiyun {
1228*4882a593Smuzhiyun if (__should_failslab(s, gfpflags))
1229*4882a593Smuzhiyun return -ENOMEM;
1230*4882a593Smuzhiyun return 0;
1231*4882a593Smuzhiyun }
1232*4882a593Smuzhiyun ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
1233