kernel/mm/slab.c

1 // SPDX-License-Identifier: GPL-2.0
7  * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
9  * Major cleanup, different bufctl logic, per-cpu arrays
17  *	Pub: Prentice Hall	ISBN 0-13-101908-2
19  *	The Slab Allocator: An Object-Caching Kernel Memory Allocator
26  * page long) and always contiguous), and each slab contains multiple
48  * Each cache has a short per-cpu head array, most allocs
54  * The c_cpuarray may not be read with enabled local interrupts -
61  *  The per-cpu arrays are never accessed from the wrong cpu, no locking,
62  *  	and local interrupts are disabled so slab code is preempt-safe.
63  *  The non-constant members are protected with a per-cache irq spinlock.
65  * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
66  * in 2000 - many ideas in the current implementation are derived from
71  * 11 April '97.  Started multi-threading - markhe
72  *	The global cache-chain is protected by the mutex 'slab_mutex'.
73  *	The sem is only needed when accessing/extending the cache-chain, which
114 #include	<linux/fault-inject.h>
126 #include	<asm/page.h>
135  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
138  * STATS	- 1 to collect stats for /proc/slabinfo.
141  * FORCED_DEBUG	- 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
171 #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
177  * - LIFO ordering, to hand out cache-warm objects from _alloc
178  * - reduce the number of linked list operations
179  * - reduce spinlock operations
181  * The limit is stored in the per-cpu structure to reduce the data cache
221 				struct kmem_cache_node *n, struct page *page,
229 	INIT_LIST_HEAD(&parent->slabs_full);  in kmem_cache_node_init()
230 	INIT_LIST_HEAD(&parent->slabs_partial);  in kmem_cache_node_init()
231 	INIT_LIST_HEAD(&parent->slabs_free);  in kmem_cache_node_init()
232 	parent->total_slabs = 0;  in kmem_cache_node_init()
233 	parent->free_slabs = 0;  in kmem_cache_node_init()
234 	parent->shared = NULL;  in kmem_cache_node_init()
235 	parent->alien = NULL;  in kmem_cache_node_init()
236 	parent->colour_next = 0;  in kmem_cache_node_init()
237 	spin_lock_init(&parent->list_lock);  in kmem_cache_node_init()
238 	parent->free_objects = 0;  in kmem_cache_node_init()
239 	parent->free_touched = 0;  in kmem_cache_node_init()
245 		list_splice(&get_node(cachep, nodeid)->slab, listp);	\
250 	MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid);	\
251 	MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
252 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
257 #define	OBJFREELIST_SLAB(x)	((x)->flags & CFLGS_OBJFREELIST_SLAB)
258 #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
272 #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
273 #define	STATS_DEC_ACTIVE(x)	((x)->num_active--)
274 #define	STATS_INC_ALLOCED(x)	((x)->num_allocations++)
275 #define	STATS_INC_GROWN(x)	((x)->grown++)
276 #define	STATS_ADD_REAPED(x,y)	((x)->reaped += (y))
279 		if ((x)->num_active > (x)->high_mark)			\
280 			(x)->high_mark = (x)->num_active;		\
282 #define	STATS_INC_ERR(x)	((x)->errors++)
283 #define	STATS_INC_NODEALLOCS(x)	((x)->node_allocs++)
284 #define	STATS_INC_NODEFREES(x)	((x)->node_frees++)
285 #define STATS_INC_ACOVERFLOW(x)   ((x)->node_overflow++)
288 		if ((x)->max_freeable < i)				\
289 			(x)->max_freeable = i;				\
291 #define STATS_INC_ALLOCHIT(x)	atomic_inc(&(x)->allochit)
292 #define STATS_INC_ALLOCMISS(x)	atomic_inc(&(x)->allocmiss)
293 #define STATS_INC_FREEHIT(x)	atomic_inc(&(x)->freehit)
294 #define STATS_INC_FREEMISS(x)	atomic_inc(&(x)->freemiss)
318  * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
321  * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
323  * cachep->obj_offset: The real object.
324  * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
325  * cachep->size - 1* BYTES_PER_WORD: last caller address
330 	return cachep->obj_offset;  in obj_offset()
335 	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));  in dbg_redzone1()
336 	return (unsigned long long*) (objp + obj_offset(cachep) -  in dbg_redzone1()
342 	BUG_ON(!(cachep->flags & SLAB_RED_ZONE));  in dbg_redzone2()
343 	if (cachep->flags & SLAB_STORE_USER)  in dbg_redzone2()
344 		return (unsigned long long *)(objp + cachep->size -  in dbg_redzone2()
345 					      sizeof(unsigned long long) -  in dbg_redzone2()
347 	return (unsigned long long *) (objp + cachep->size -  in dbg_redzone2()
353 	BUG_ON(!(cachep->flags & SLAB_STORE_USER));  in dbg_userword()
354 	return (void **)(objp + cachep->size - BYTES_PER_WORD);  in dbg_userword()
375 static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,  in index_to_obj()  argument
378 	return page->s_mem + cache->size * idx;  in index_to_obj()
387 	.size = sizeof(struct kmem_cache),
395 	return this_cpu_ptr(cachep->cpu_cache);  in cpu_cache_get()
399  * Calculate the number of objects and left-over bytes for a given buffer size.
412 	 * - @buffer_size bytes for each object  in cache_estimate()
413 	 * - One freelist_idx_t for each object  in cache_estimate()
416 	 * freelist will be at the end of slab page. The objects will be  in cache_estimate()
420 	 * alignment will already be calculated into the size. Because  in cache_estimate()
443 	       function, cachep->name, msg);  in __slab_error()
469 				min(slab_max_order, MAX_ORDER - 1);  in slab_max_order_setup()
515 	if (reap_work->work.func == NULL) {  in start_cpu_timer()
526 		ac->avail = 0;  in init_arraycache()
527 		ac->limit = limit;  in init_arraycache()
528 		ac->batchcount = batch;  in init_arraycache()
529 		ac->touched = 0;  in init_arraycache()
553 					struct page *page, void *objp)  in cache_free_pfmemalloc()  argument
559 	page_node = page_to_nid(page);  in cache_free_pfmemalloc()
562 	spin_lock(&n->list_lock);  in cache_free_pfmemalloc()
564 	spin_unlock(&n->list_lock);  in cache_free_pfmemalloc()
579 	int nr = min3(from->avail, max, to->limit - to->avail);  in transfer_objects()
584 	memcpy(to->entry + to->avail, from->entry + from->avail -nr,  in transfer_objects()
587 	from->avail -= nr;  in transfer_objects()
588 	to->avail += nr;  in transfer_objects()
592 /* &alien->lock must be held by alien callers. */
595 	/* Avoid trivial double-free. */  in __free_one()
597 	    WARN_ON_ONCE(ac->avail > 0 && ac->entry[ac->avail - 1] == objp))  in __free_one()
599 	ac->entry[ac->avail++] = objp;  in __free_one()
653 		init_arraycache(&alc->ac, entries, batch);  in __alloc_alien_cache()
654 		spin_lock_init(&alc->lock);  in __alloc_alien_cache()
675 			for (i--; i >= 0; i--)  in alloc_alien_cache()
701 	if (ac->avail) {  in __drain_alien_cache()
702 		spin_lock(&n->list_lock);  in __drain_alien_cache()
708 		if (n->shared)  in __drain_alien_cache()
709 			transfer_objects(n->shared, ac, ac->limit);  in __drain_alien_cache()
711 		free_block(cachep, ac->entry, ac->avail, node, list);  in __drain_alien_cache()
712 		ac->avail = 0;  in __drain_alien_cache()
713 		spin_unlock(&n->list_lock);  in __drain_alien_cache()
724 	if (n->alien) {  in reap_alien()
725 		struct alien_cache *alc = n->alien[node];  in reap_alien()
729 			ac = &alc->ac;  in reap_alien()
730 			if (ac->avail && spin_trylock_irq(&alc->lock)) {  in reap_alien()
734 				spin_unlock_irq(&alc->lock);  in reap_alien()
754 			ac = &alc->ac;  in drain_alien_cache()
755 			spin_lock_irqsave(&alc->lock, flags);  in drain_alien_cache()
757 			spin_unlock_irqrestore(&alc->lock, flags);  in drain_alien_cache()
773 	if (n->alien && n->alien[page_node]) {  in __cache_free_alien()
774 		alien = n->alien[page_node];  in __cache_free_alien()
775 		ac = &alien->ac;  in __cache_free_alien()
776 		spin_lock(&alien->lock);  in __cache_free_alien()
777 		if (unlikely(ac->avail == ac->limit)) {  in __cache_free_alien()
782 		spin_unlock(&alien->lock);  in __cache_free_alien()
786 		spin_lock(&n->list_lock);  in __cache_free_alien()
788 		spin_unlock(&n->list_lock);  in __cache_free_alien()
829 		spin_lock_irq(&n->list_lock);  in init_cache_node()
830 		n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +  in init_cache_node()
831 				cachep->num;  in init_cache_node()
832 		spin_unlock_irq(&n->list_lock);  in init_cache_node()
839 		return -ENOMEM;  in init_cache_node()
842 	n->next_reap = jiffies + REAPTIMEOUT_NODE +  in init_cache_node()
845 	n->free_limit =  in init_cache_node()
846 		(1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;  in init_cache_node()
853 	cachep->node[node] = n;  in init_cache_node()
861  * either memory or cpu hotplug.  If memory is being hot-added, the kmem_cache_node
862  * will be allocated off-node since memory is not yet online for the new node.
886 	int ret = -ENOMEM;  in setup_kmem_cache_node()
894 		new_alien = alloc_alien_cache(node, cachep->limit, gfp);  in setup_kmem_cache_node()
899 	if (cachep->shared) {  in setup_kmem_cache_node()
901 			cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);  in setup_kmem_cache_node()
911 	spin_lock_irq(&n->list_lock);  in setup_kmem_cache_node()
912 	if (n->shared && force_change) {  in setup_kmem_cache_node()
913 		free_block(cachep, n->shared->entry,  in setup_kmem_cache_node()
914 				n->shared->avail, node, &list);  in setup_kmem_cache_node()
915 		n->shared->avail = 0;  in setup_kmem_cache_node()
918 	if (!n->shared || force_change) {  in setup_kmem_cache_node()
919 		old_shared = n->shared;  in setup_kmem_cache_node()
920 		n->shared = new_shared;  in setup_kmem_cache_node()
924 	if (!n->alien) {  in setup_kmem_cache_node()
925 		n->alien = new_alien;  in setup_kmem_cache_node()
929 	spin_unlock_irq(&n->list_lock);  in setup_kmem_cache_node()
933 	 * To protect lockless access to n->shared during irq disabled context.  in setup_kmem_cache_node()
934 	 * If n->shared isn't NULL in irq disabled context, accessing to it is  in setup_kmem_cache_node()
935 	 * guaranteed to be valid until irq is re-enabled, because it will be  in setup_kmem_cache_node()
968 		spin_lock_irq(&n->list_lock);  in cpuup_canceled()
971 		n->free_limit -= cachep->batchcount;  in cpuup_canceled()
974 		nc = per_cpu_ptr(cachep->cpu_cache, cpu);  in cpuup_canceled()
975 		free_block(cachep, nc->entry, nc->avail, node, &list);  in cpuup_canceled()
976 		nc->avail = 0;  in cpuup_canceled()
979 			spin_unlock_irq(&n->list_lock);  in cpuup_canceled()
983 		shared = n->shared;  in cpuup_canceled()
985 			free_block(cachep, shared->entry,  in cpuup_canceled()
986 				   shared->avail, node, &list);  in cpuup_canceled()
987 			n->shared = NULL;  in cpuup_canceled()
990 		alien = n->alien;  in cpuup_canceled()
991 		n->alien = NULL;  in cpuup_canceled()
993 		spin_unlock_irq(&n->list_lock);  in cpuup_canceled()
1046 	return -ENOMEM;  in cpuup_prepare()
1100  * Drains freelist for a node on each slab cache, used for memory hot-remove.
1101  * Returns -EBUSY if all objects cannot be drained so that the node is not
1120 		if (!list_empty(&n->slabs_full) ||  in drain_cache_node_node()
1121 		    !list_empty(&n->slabs_partial)) {  in drain_cache_node_node()
1122 			ret = -EBUSY;  in drain_cache_node_node()
1136 	nid = mnb->status_change_nid;  in slab_memory_callback()
1177 	spin_lock_init(&ptr->list_lock);  in init_list()
1180 	cachep->node[nodeid] = ptr;  in init_list()
1185  * size of kmem_cache_node.
1192 		cachep->node[node] = &init_kmem_cache_node[index + node];  in set_up_node()
1193 		cachep->node[node]->next_reap = jiffies +  in set_up_node()
1200  * Initialisation.  Called after the page allocator have been initialised and
1216 	 * Fragmentation resistance on low memory - only use bigger  in kmem_cache_init()
1217 	 * page orders on machines with more than 32MB of memory if  in kmem_cache_init()
1246 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids  in kmem_cache_init()
1252 	list_add(&kmem_cache->list, &slab_caches);  in kmem_cache_init()
1261 				kmalloc_info[INDEX_NODE].size,  in kmem_cache_init()
1263 				kmalloc_info[INDEX_NODE].size);  in kmem_cache_init()
1317 	 * Register the timers that return unneeded pages to the page allocator  in cpucache_init()
1342 	pr_warn("  cache: %s, object size: %d, order: %d\n",  in slab_out_of_memory()
1343 		cachep->name, cachep->size, cachep->gfporder);  in slab_out_of_memory()
1348 		spin_lock_irqsave(&n->list_lock, flags);  in slab_out_of_memory()
1349 		total_slabs = n->total_slabs;  in slab_out_of_memory()
1350 		free_slabs = n->free_slabs;  in slab_out_of_memory()
1351 		free_objs = n->free_objects;  in slab_out_of_memory()
1352 		spin_unlock_irqrestore(&n->list_lock, flags);  in slab_out_of_memory()
1355 			node, total_slabs - free_slabs, total_slabs,  in slab_out_of_memory()
1356 			(total_slabs * cachep->num) - free_objs,  in slab_out_of_memory()
1357 			total_slabs * cachep->num);  in slab_out_of_memory()
1363  * Interface to system's page allocator. No need to hold the
1364  * kmem_cache_node ->list_lock.
1370 static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,  in kmem_getpages()
1373 	struct page *page;  in kmem_getpages()  local
1375 	flags |= cachep->allocflags;  in kmem_getpages()
1377 	page = __alloc_pages_node(nodeid, flags, cachep->gfporder);  in kmem_getpages()
1378 	if (!page) {  in kmem_getpages()
1383 	account_slab_page(page, cachep->gfporder, cachep);  in kmem_getpages()
1384 	__SetPageSlab(page);  in kmem_getpages()
1386 	if (sk_memalloc_socks() && page_is_pfmemalloc(page))  in kmem_getpages()
1387 		SetPageSlabPfmemalloc(page);  in kmem_getpages()
1389 	return page;  in kmem_getpages()
1393  * Interface to system's page release.
1395 static void kmem_freepages(struct kmem_cache *cachep, struct page *page)  in kmem_freepages()  argument
1397 	int order = cachep->gfporder;  in kmem_freepages()
1399 	BUG_ON(!PageSlab(page));  in kmem_freepages()
1400 	__ClearPageSlabPfmemalloc(page);  in kmem_freepages()
1401 	__ClearPageSlab(page);  in kmem_freepages()
1402 	page_mapcount_reset(page);  in kmem_freepages()
1403 	page->mapping = NULL;  in kmem_freepages()
1405 	if (current->reclaim_state)  in kmem_freepages()
1406 		current->reclaim_state->reclaimed_slab += 1 << order;  in kmem_freepages()
1407 	unaccount_slab_page(page, order, cachep);  in kmem_freepages()
1408 	__free_pages(page, order);  in kmem_freepages()
1414 	struct page *page;  in kmem_rcu_free()  local
1416 	page = container_of(head, struct page, rcu_head);  in kmem_rcu_free()
1417 	cachep = page->slab_cache;  in kmem_rcu_free()
1419 	kmem_freepages(cachep, page);  in kmem_rcu_free()
1426 		(cachep->size % PAGE_SIZE) == 0)  in is_debug_pagealloc_cache()
1438 	__kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);  in slab_kernel_map()
1449 	int size = cachep->object_size;  in poison_obj()  local
1452 	memset(addr, val, size);  in poison_obj()
1453 	*(unsigned char *)(addr + size - 1) = POISON_END;  in poison_obj()
1474 		if (!(error & (error - 1))) {  in dump_line()
1490 	int i, size;  in print_objinfo()  local
1493 	if (cachep->flags & SLAB_RED_ZONE) {  in print_objinfo()
1499 	if (cachep->flags & SLAB_STORE_USER)  in print_objinfo()
1502 	size = cachep->object_size;  in print_objinfo()
1503 	for (i = 0; i < size && lines; i += 16, lines--) {  in print_objinfo()
1506 		if (i + limit > size)  in print_objinfo()
1507 			limit = size - i;  in print_objinfo()
1515 	int size, i;  in check_poison_obj()  local
1522 	size = cachep->object_size;  in check_poison_obj()
1524 	for (i = 0; i < size; i++) {  in check_poison_obj()
1526 		if (i == size - 1)  in check_poison_obj()
1534 				       print_tainted(), cachep->name,  in check_poison_obj()
1535 				       realobj, size);  in check_poison_obj()
1541 			if (i + limit > size)  in check_poison_obj()
1542 				limit = size - i;  in check_poison_obj()
1555 		struct page *page = virt_to_head_page(objp);  in check_poison_obj()  local
1558 		objnr = obj_to_index(cachep, page, objp);  in check_poison_obj()
1560 			objp = index_to_obj(cachep, page, objnr - 1);  in check_poison_obj()
1562 			pr_err("Prev obj: start=%px, len=%d\n", realobj, size);  in check_poison_obj()
1565 		if (objnr + 1 < cachep->num) {  in check_poison_obj()
1566 			objp = index_to_obj(cachep, page, objnr + 1);  in check_poison_obj()
1568 			pr_err("Next obj: start=%px, len=%d\n", realobj, size);  in check_poison_obj()
1577 						struct page *page)  in slab_destroy_debugcheck()  argument
1581 	if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {  in slab_destroy_debugcheck()
1582 		poison_obj(cachep, page->freelist - obj_offset(cachep),  in slab_destroy_debugcheck()
1586 	for (i = 0; i < cachep->num; i++) {  in slab_destroy_debugcheck()
1587 		void *objp = index_to_obj(cachep, page, i);  in slab_destroy_debugcheck()
1589 		if (cachep->flags & SLAB_POISON) {  in slab_destroy_debugcheck()
1593 		if (cachep->flags & SLAB_RED_ZONE) {  in slab_destroy_debugcheck()
1603 						struct page *page)  in slab_destroy_debugcheck()  argument
1609  * slab_destroy - destroy and release all objects in a slab
1611  * @page: page pointer being destroyed
1613  * Destroy all the objs in a slab page, and release the mem back to the system.
1614  * Before calling the slab page must have been unlinked from the cache. The
1615  * kmem_cache_node ->list_lock is not held/needed.
1617 static void slab_destroy(struct kmem_cache *cachep, struct page *page)  in slab_destroy()  argument
1621 	freelist = page->freelist;  in slab_destroy()
1622 	slab_destroy_debugcheck(cachep, page);  in slab_destroy()
1623 	if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))  in slab_destroy()
1624 		call_rcu(&page->rcu_head, kmem_rcu_free);  in slab_destroy()
1626 		kmem_freepages(cachep, page);  in slab_destroy()
1630 	 * although actual page can be freed in rcu context  in slab_destroy()
1633 		kmem_cache_free(cachep->freelist_cache, freelist);  in slab_destroy()
1637  * Update the size of the caches before calling slabs_destroy as it may
1642 	struct page *page, *n;  in slabs_destroy()  local
1644 	list_for_each_entry_safe(page, n, list, slab_list) {  in slabs_destroy()
1645 		list_del(&page->slab_list);  in slabs_destroy()
1646 		slab_destroy(cachep, page);  in slabs_destroy()
1651  * calculate_slab_order - calculate size (page order) of slabs
1653  * @size: size of objects to be created in this cache.
1660  * towards high-order requests, this should be changed.
1662  * Return: number of left-over bytes in a slab
1665 				size_t size, slab_flags_t flags)  in calculate_slab_order()  argument
1674 		num = cache_estimate(gfporder, size, flags, &remainder);  in calculate_slab_order()
1699 			if (freelist_cache->size > cachep->size / 2)  in calculate_slab_order()
1703 		/* Found something acceptable - save it away */  in calculate_slab_order()
1704 		cachep->num = num;  in calculate_slab_order()
1705 		cachep->gfporder = gfporder;  in calculate_slab_order()
1709 		 * A VFS-reclaimable slab tends to have most allocations  in calculate_slab_order()
1711 		 * higher-order pages when we are unable to shrink dcache.  in calculate_slab_order()
1736 	size_t size;  in alloc_kmem_cache_cpus()  local
1739 	size = sizeof(void *) * entries + sizeof(struct array_cache);  in alloc_kmem_cache_cpus()
1740 	cpu_cache = __alloc_percpu(size, sizeof(void *));  in alloc_kmem_cache_cpus()
1758 	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);  in setup_cpu_cache()
1759 	if (!cachep->cpu_cache)  in setup_cpu_cache()
1772 			cachep->node[node] = kmalloc_node(  in setup_cpu_cache()
1774 			BUG_ON(!cachep->node[node]);  in setup_cpu_cache()
1775 			kmem_cache_node_init(cachep->node[node]);  in setup_cpu_cache()
1779 	cachep->node[numa_mem_id()]->next_reap =  in setup_cpu_cache()
1783 	cpu_cache_get(cachep)->avail = 0;  in setup_cpu_cache()
1784 	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;  in setup_cpu_cache()
1785 	cpu_cache_get(cachep)->batchcount = 1;  in setup_cpu_cache()
1786 	cpu_cache_get(cachep)->touched = 0;  in setup_cpu_cache()
1787 	cachep->batchcount = 1;  in setup_cpu_cache()
1788 	cachep->limit = BOOT_CPUCACHE_ENTRIES;  in setup_cpu_cache()
1799 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,  in __kmem_cache_alias()  argument
1804 	cachep = find_mergeable(size, align, flags, name, ctor);  in __kmem_cache_alias()
1806 		cachep->refcount++;  in __kmem_cache_alias()
1812 		cachep->object_size = max_t(int, cachep->object_size, size);  in __kmem_cache_alias()
1818 			size_t size, slab_flags_t flags)  in set_objfreelist_slab_cache()  argument
1822 	cachep->num = 0;  in set_objfreelist_slab_cache()
1825 	 * If slab auto-initialization on free is enabled, store the freelist  in set_objfreelist_slab_cache()
1826 	 * off-slab, so that its contents don't end up in one of the allocated  in set_objfreelist_slab_cache()
1832 	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU)  in set_objfreelist_slab_cache()
1835 	left = calculate_slab_order(cachep, size,  in set_objfreelist_slab_cache()
1837 	if (!cachep->num)  in set_objfreelist_slab_cache()
1840 	if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size)  in set_objfreelist_slab_cache()
1843 	cachep->colour = left / cachep->colour_off;  in set_objfreelist_slab_cache()
1849 			size_t size, slab_flags_t flags)  in set_off_slab_cache()  argument
1853 	cachep->num = 0;  in set_off_slab_cache()
1856 	 * Always use on-slab management when SLAB_NOLEAKTRACE  in set_off_slab_cache()
1863 	 * Size is large, assume best to place the slab management obj  in set_off_slab_cache()
1864 	 * off-slab (should allow better packing of objs).  in set_off_slab_cache()
1866 	left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB);  in set_off_slab_cache()
1867 	if (!cachep->num)  in set_off_slab_cache()
1871 	 * If the slab has been placed off-slab, and we have enough space then  in set_off_slab_cache()
1872 	 * move it on-slab. This is at the expense of any extra colouring.  in set_off_slab_cache()
1874 	if (left >= cachep->num * sizeof(freelist_idx_t))  in set_off_slab_cache()
1877 	cachep->colour = left / cachep->colour_off;  in set_off_slab_cache()
1883 			size_t size, slab_flags_t flags)  in set_on_slab_cache()  argument
1887 	cachep->num = 0;  in set_on_slab_cache()
1889 	left = calculate_slab_order(cachep, size, flags);  in set_on_slab_cache()
1890 	if (!cachep->num)  in set_on_slab_cache()
1893 	cachep->colour = left / cachep->colour_off;  in set_on_slab_cache()
1899  * __kmem_cache_create - Create a cache.
1909  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
1912  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
1915  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
1926 	unsigned int size = cachep->size;  in __kmem_cache_create()  local
1932 	 * large objects, if the increased size would increase the object size  in __kmem_cache_create()
1936 	if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +  in __kmem_cache_create()
1945 	 * Check that size is in terms of words.  This is needed to avoid  in __kmem_cache_create()
1947 	 * sure any on-slab bufctl's are also correctly aligned.  in __kmem_cache_create()
1949 	size = ALIGN(size, BYTES_PER_WORD);  in __kmem_cache_create()
1954 		 * aligned, by adjusting the object size accordingly. */  in __kmem_cache_create()
1955 		size = ALIGN(size, REDZONE_ALIGN);  in __kmem_cache_create()
1959 	if (ralign < cachep->align) {  in __kmem_cache_create()
1960 		ralign = cachep->align;  in __kmem_cache_create()
1968 	cachep->align = ralign;  in __kmem_cache_create()
1969 	cachep->colour_off = cache_line_size();  in __kmem_cache_create()
1971 	if (cachep->colour_off < cachep->align)  in __kmem_cache_create()
1972 		cachep->colour_off = cachep->align;  in __kmem_cache_create()
1982 	 * Both debugging options require word-alignment which is calculated  in __kmem_cache_create()
1987 		cachep->obj_offset += sizeof(unsigned long long);  in __kmem_cache_create()
1988 		size += 2 * sizeof(unsigned long long);  in __kmem_cache_create()
1996 			size += REDZONE_ALIGN;  in __kmem_cache_create()
1998 			size += BYTES_PER_WORD;  in __kmem_cache_create()
2002 	kasan_cache_create(cachep, &size, &flags);  in __kmem_cache_create()
2004 	size = ALIGN(size, cachep->align);  in __kmem_cache_create()
2009 	if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)  in __kmem_cache_create()
2010 		size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);  in __kmem_cache_create()
2014 	 * To activate debug pagealloc, off-slab management is necessary  in __kmem_cache_create()
2017 	 * to check size >= 256. It guarantees that all necessary small  in __kmem_cache_create()
2021 		size >= 256 && cachep->object_size > cache_line_size()) {  in __kmem_cache_create()
2022 		if (size < PAGE_SIZE || size % PAGE_SIZE == 0) {  in __kmem_cache_create()
2023 			size_t tmp_size = ALIGN(size, PAGE_SIZE);  in __kmem_cache_create()
2027 				cachep->obj_offset += tmp_size - size;  in __kmem_cache_create()
2028 				size = tmp_size;  in __kmem_cache_create()
2035 	if (set_objfreelist_slab_cache(cachep, size, flags)) {  in __kmem_cache_create()
2040 	if (set_off_slab_cache(cachep, size, flags)) {  in __kmem_cache_create()
2045 	if (set_on_slab_cache(cachep, size, flags))  in __kmem_cache_create()
2048 	return -E2BIG;  in __kmem_cache_create()
2051 	cachep->freelist_size = cachep->num * sizeof(freelist_idx_t);  in __kmem_cache_create()
2052 	cachep->flags = flags;  in __kmem_cache_create()
2053 	cachep->allocflags = __GFP_COMP;  in __kmem_cache_create()
2055 		cachep->allocflags |= GFP_DMA;  in __kmem_cache_create()
2057 		cachep->allocflags |= GFP_DMA32;  in __kmem_cache_create()
2059 		cachep->allocflags |= __GFP_RECLAIMABLE;  in __kmem_cache_create()
2060 	cachep->size = size;  in __kmem_cache_create()
2061 	cachep->reciprocal_buffer_size = reciprocal_value(size);  in __kmem_cache_create()
2070 		(cachep->flags & SLAB_POISON) &&  in __kmem_cache_create()
2072 		cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);  in __kmem_cache_create()
2076 		cachep->freelist_cache =  in __kmem_cache_create()
2077 			kmalloc_slab(cachep->freelist_size, 0u);  in __kmem_cache_create()
2109 	assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);  in check_spinlock_acquired()
2117 	assert_spin_locked(&get_node(cachep, node)->list_lock);  in check_spinlock_acquired_node()
2134 	if (!ac || !ac->avail)  in drain_array_locked()
2137 	tofree = free_all ? ac->avail : (ac->limit + 4) / 5;  in drain_array_locked()
2138 	if (tofree > ac->avail)  in drain_array_locked()
2139 		tofree = (ac->avail + 1) / 2;  in drain_array_locked()
2141 	free_block(cachep, ac->entry, tofree, node, list);  in drain_array_locked()
2142 	ac->avail -= tofree;  in drain_array_locked()
2143 	memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);  in drain_array_locked()
2157 	spin_lock(&n->list_lock);  in do_drain()
2158 	free_block(cachep, ac->entry, ac->avail, node, &list);  in do_drain()
2159 	spin_unlock(&n->list_lock);  in do_drain()
2160 	ac->avail = 0;  in do_drain()
2173 		if (n->alien)  in drain_cpu_caches()
2174 			drain_alien_cache(cachep, n->alien);  in drain_cpu_caches()
2177 		spin_lock_irq(&n->list_lock);  in drain_cpu_caches()
2178 		drain_array_locked(cachep, n->shared, node, true, &list);  in drain_cpu_caches()
2179 		spin_unlock_irq(&n->list_lock);  in drain_cpu_caches()
2196 	struct page *page;  in drain_freelist()  local
2199 	while (nr_freed < tofree && !list_empty(&n->slabs_free)) {  in drain_freelist()
2201 		spin_lock_irq(&n->list_lock);  in drain_freelist()
2202 		p = n->slabs_free.prev;  in drain_freelist()
2203 		if (p == &n->slabs_free) {  in drain_freelist()
2204 			spin_unlock_irq(&n->list_lock);  in drain_freelist()
2208 		page = list_entry(p, struct page, slab_list);  in drain_freelist()
2209 		list_del(&page->slab_list);  in drain_freelist()
2210 		n->free_slabs--;  in drain_freelist()
2211 		n->total_slabs--;  in drain_freelist()
2216 		n->free_objects -= cache->num;  in drain_freelist()
2217 		spin_unlock_irq(&n->list_lock);  in drain_freelist()
2218 		slab_destroy(cache, page);  in drain_freelist()
2231 		if (!list_empty(&n->slabs_full) ||  in __kmem_cache_empty()
2232 		    !list_empty(&n->slabs_partial))  in __kmem_cache_empty()
2249 		ret += !list_empty(&n->slabs_full) ||  in __kmem_cache_shrink()
2250 			!list_empty(&n->slabs_partial);  in __kmem_cache_shrink()
2267 	free_percpu(cachep->cpu_cache);  in __kmem_cache_release()
2271 		kfree(n->shared);  in __kmem_cache_release()
2272 		free_alien_cache(n->alien);  in __kmem_cache_release()
2274 		cachep->node[i] = NULL;  in __kmem_cache_release()
2281  * For a slab cache when the slab descriptor is off-slab, the
2284  * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
2286  * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
2287  * This is a "chicken-and-egg" problem.
2289  * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
2293 				   struct page *page, int colour_off,  in alloc_slabmgmt()  argument
2297 	void *addr = page_address(page);  in alloc_slabmgmt()
2299 	page->s_mem = addr + colour_off;  in alloc_slabmgmt()
2300 	page->active = 0;  in alloc_slabmgmt()
2305 		/* Slab management obj is off-slab. */  in alloc_slabmgmt()
2306 		freelist = kmem_cache_alloc_node(cachep->freelist_cache,  in alloc_slabmgmt()
2310 		freelist = addr + (PAGE_SIZE << cachep->gfporder) -  in alloc_slabmgmt()
2311 				cachep->freelist_size;  in alloc_slabmgmt()
2317 static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)  in get_free_obj()  argument
2319 	return ((freelist_idx_t *)page->freelist)[idx];  in get_free_obj()
2322 static inline void set_free_obj(struct page *page,  in set_free_obj()  argument
2325 	((freelist_idx_t *)(page->freelist))[idx] = val;  in set_free_obj()
2328 static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)  in cache_init_objs_debug()  argument
2333 	for (i = 0; i < cachep->num; i++) {  in cache_init_objs_debug()
2334 		void *objp = index_to_obj(cachep, page, i);  in cache_init_objs_debug()
2336 		if (cachep->flags & SLAB_STORE_USER)  in cache_init_objs_debug()
2339 		if (cachep->flags & SLAB_RED_ZONE) {  in cache_init_objs_debug()
2348 		if (cachep->ctor && !(cachep->flags & SLAB_POISON)) {  in cache_init_objs_debug()
2351 			cachep->ctor(objp + obj_offset(cachep));  in cache_init_objs_debug()
2356 		if (cachep->flags & SLAB_RED_ZONE) {  in cache_init_objs_debug()
2363 		if (cachep->flags & SLAB_POISON) {  in cache_init_objs_debug()
2384  * return true if the pre-computed list is available, false otherwize.
2396 	/* Use a random state if the pre-computed list is not available */  in freelist_state_initialize()
2397 	if (!cachep->random_seq) {  in freelist_state_initialize()
2398 		prandom_seed_state(&state->rnd_state, rand);  in freelist_state_initialize()
2401 		state->list = cachep->random_seq;  in freelist_state_initialize()
2402 		state->count = count;  in freelist_state_initialize()
2403 		state->pos = rand % count;  in freelist_state_initialize()
2412 	if (state->pos >= state->count)  in next_random_slot()
2413 		state->pos = 0;  in next_random_slot()
2414 	return state->list[state->pos++];  in next_random_slot()
2418 static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)  in swap_free_obj()  argument
2420 	swap(((freelist_idx_t *)page->freelist)[a],  in swap_free_obj()
2421 		((freelist_idx_t *)page->freelist)[b]);  in swap_free_obj()
2425  * Shuffle the freelist initialization state based on pre-computed lists.
2428 static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)  in shuffle_freelist()  argument
2430 	unsigned int objfreelist = 0, i, rand, count = cachep->num;  in shuffle_freelist()
2442 			objfreelist = count - 1;  in shuffle_freelist()
2445 		page->freelist = index_to_obj(cachep, page, objfreelist) +  in shuffle_freelist()
2447 		count--;  in shuffle_freelist()
2452 	 * Later use a pre-computed list for speed.  in shuffle_freelist()
2456 			set_free_obj(page, i, i);  in shuffle_freelist()
2458 		/* Fisher-Yates shuffle */  in shuffle_freelist()
2459 		for (i = count - 1; i > 0; i--) {  in shuffle_freelist()
2462 			swap_free_obj(page, i, rand);  in shuffle_freelist()
2466 			set_free_obj(page, i, next_random_slot(&state));  in shuffle_freelist()
2470 		set_free_obj(page, cachep->num - 1, objfreelist);  in shuffle_freelist()
2476 				struct page *page)  in shuffle_freelist()  argument
2483 			    struct page *page)  in cache_init_objs()  argument
2489 	cache_init_objs_debug(cachep, page);  in cache_init_objs()
2492 	shuffled = shuffle_freelist(cachep, page);  in cache_init_objs()
2495 		page->freelist = index_to_obj(cachep, page, cachep->num - 1) +  in cache_init_objs()
2499 	for (i = 0; i < cachep->num; i++) {  in cache_init_objs()
2500 		objp = index_to_obj(cachep, page, i);  in cache_init_objs()
2504 		if (DEBUG == 0 && cachep->ctor) {  in cache_init_objs()
2506 			cachep->ctor(objp);  in cache_init_objs()
2511 			set_free_obj(page, i, i);  in cache_init_objs()
2515 static void *slab_get_obj(struct kmem_cache *cachep, struct page *page)  in slab_get_obj()  argument
2519 	objp = index_to_obj(cachep, page, get_free_obj(page, page->active));  in slab_get_obj()
2520 	page->active++;  in slab_get_obj()
2526 			struct page *page, void *objp)  in slab_put_obj()  argument
2528 	unsigned int objnr = obj_to_index(cachep, page, objp);  in slab_put_obj()
2533 	for (i = page->active; i < cachep->num; i++) {  in slab_put_obj()
2534 		if (get_free_obj(page, i) == objnr) {  in slab_put_obj()
2536 			       cachep->name, objp);  in slab_put_obj()
2541 	page->active--;  in slab_put_obj()
2542 	if (!page->freelist)  in slab_put_obj()
2543 		page->freelist = objp + obj_offset(cachep);  in slab_put_obj()
2545 	set_free_obj(page, page->active, objnr);  in slab_put_obj()
2553 static void slab_map_pages(struct kmem_cache *cache, struct page *page,  in slab_map_pages()  argument
2556 	page->slab_cache = cache;  in slab_map_pages()
2557 	page->freelist = freelist;  in slab_map_pages()
2564 static struct page *cache_grow_begin(struct kmem_cache *cachep,  in cache_grow_begin()
2572 	struct page *page;  in cache_grow_begin()  local
2581 	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));  in cache_grow_begin()
2589 	 * Get mem for the objs.  Attempt to allocate a physical page from  in cache_grow_begin()
2592 	page = kmem_getpages(cachep, local_flags, nodeid);  in cache_grow_begin()
2593 	if (!page)  in cache_grow_begin()
2596 	page_node = page_to_nid(page);  in cache_grow_begin()
2600 	n->colour_next++;  in cache_grow_begin()
2601 	if (n->colour_next >= cachep->colour)  in cache_grow_begin()
2602 		n->colour_next = 0;  in cache_grow_begin()
2604 	offset = n->colour_next;  in cache_grow_begin()
2605 	if (offset >= cachep->colour)  in cache_grow_begin()
2608 	offset *= cachep->colour_off;  in cache_grow_begin()
2612 	 * page_address() in the latter returns a non-tagged pointer,  in cache_grow_begin()
2615 	kasan_poison_slab(page);  in cache_grow_begin()
2618 	freelist = alloc_slabmgmt(cachep, page, offset,  in cache_grow_begin()
2623 	slab_map_pages(cachep, page, freelist);  in cache_grow_begin()
2625 	cache_init_objs(cachep, page);  in cache_grow_begin()
2630 	return page;  in cache_grow_begin()
2633 	kmem_freepages(cachep, page);  in cache_grow_begin()
2640 static void cache_grow_end(struct kmem_cache *cachep, struct page *page)  in cache_grow_end()  argument
2647 	if (!page)  in cache_grow_end()
2650 	INIT_LIST_HEAD(&page->slab_list);  in cache_grow_end()
2651 	n = get_node(cachep, page_to_nid(page));  in cache_grow_end()
2653 	spin_lock(&n->list_lock);  in cache_grow_end()
2654 	n->total_slabs++;  in cache_grow_end()
2655 	if (!page->active) {  in cache_grow_end()
2656 		list_add_tail(&page->slab_list, &n->slabs_free);  in cache_grow_end()
2657 		n->free_slabs++;  in cache_grow_end()
2659 		fixup_slab_list(cachep, n, page, &list);  in cache_grow_end()
2662 	n->free_objects += cachep->num - page->active;  in cache_grow_end()
2663 	spin_unlock(&n->list_lock);  in cache_grow_end()
2672  * - detect bad pointers.
2673  * - POISON/RED_ZONE checking
2710 	struct page *page;  in cache_free_debugcheck()  local
2714 	objp -= obj_offset(cachep);  in cache_free_debugcheck()
2716 	page = virt_to_head_page(objp);  in cache_free_debugcheck()
2718 	if (cachep->flags & SLAB_RED_ZONE) {  in cache_free_debugcheck()
2723 	if (cachep->flags & SLAB_STORE_USER)  in cache_free_debugcheck()
2726 	objnr = obj_to_index(cachep, page, objp);  in cache_free_debugcheck()
2728 	BUG_ON(objnr >= cachep->num);  in cache_free_debugcheck()
2729 	BUG_ON(objp != index_to_obj(cachep, page, objnr));  in cache_free_debugcheck()
2731 	if (cachep->flags & SLAB_POISON) {  in cache_free_debugcheck()
2751 		objp = next - obj_offset(cachep);  in fixup_objfreelist_debug()
2759 				struct kmem_cache_node *n, struct page *page,  in fixup_slab_list()  argument
2763 	list_del(&page->slab_list);  in fixup_slab_list()
2764 	if (page->active == cachep->num) {  in fixup_slab_list()
2765 		list_add(&page->slab_list, &n->slabs_full);  in fixup_slab_list()
2769 			if (cachep->flags & SLAB_POISON) {  in fixup_slab_list()
2770 				void **objp = page->freelist;  in fixup_slab_list()
2776 			page->freelist = NULL;  in fixup_slab_list()
2779 		list_add(&page->slab_list, &n->slabs_partial);  in fixup_slab_list()
2782 /* Try to find non-pfmemalloc slab if needed */
2783 static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,  in get_valid_first_slab()
2784 					struct page *page, bool pfmemalloc)  in get_valid_first_slab()  argument
2786 	if (!page)  in get_valid_first_slab()
2790 		return page;  in get_valid_first_slab()
2792 	if (!PageSlabPfmemalloc(page))  in get_valid_first_slab()
2793 		return page;  in get_valid_first_slab()
2796 	if (n->free_objects > n->free_limit) {  in get_valid_first_slab()
2797 		ClearPageSlabPfmemalloc(page);  in get_valid_first_slab()
2798 		return page;  in get_valid_first_slab()
2802 	list_del(&page->slab_list);  in get_valid_first_slab()
2803 	if (!page->active) {  in get_valid_first_slab()
2804 		list_add_tail(&page->slab_list, &n->slabs_free);  in get_valid_first_slab()
2805 		n->free_slabs++;  in get_valid_first_slab()
2807 		list_add_tail(&page->slab_list, &n->slabs_partial);  in get_valid_first_slab()
2809 	list_for_each_entry(page, &n->slabs_partial, slab_list) {  in get_valid_first_slab()
2810 		if (!PageSlabPfmemalloc(page))  in get_valid_first_slab()
2811 			return page;  in get_valid_first_slab()
2814 	n->free_touched = 1;  in get_valid_first_slab()
2815 	list_for_each_entry(page, &n->slabs_free, slab_list) {  in get_valid_first_slab()
2816 		if (!PageSlabPfmemalloc(page)) {  in get_valid_first_slab()
2817 			n->free_slabs--;  in get_valid_first_slab()
2818 			return page;  in get_valid_first_slab()
2825 static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)  in get_first_slab()
2827 	struct page *page;  in get_first_slab()  local
2829 	assert_spin_locked(&n->list_lock);  in get_first_slab()
2830 	page = list_first_entry_or_null(&n->slabs_partial, struct page,  in get_first_slab()
2832 	if (!page) {  in get_first_slab()
2833 		n->free_touched = 1;  in get_first_slab()
2834 		page = list_first_entry_or_null(&n->slabs_free, struct page,  in get_first_slab()
2836 		if (page)  in get_first_slab()
2837 			n->free_slabs--;  in get_first_slab()
2841 		page = get_valid_first_slab(n, page, pfmemalloc);  in get_first_slab()
2843 	return page;  in get_first_slab()
2849 	struct page *page;  in cache_alloc_pfmemalloc()  local
2856 	spin_lock(&n->list_lock);  in cache_alloc_pfmemalloc()
2857 	page = get_first_slab(n, true);  in cache_alloc_pfmemalloc()
2858 	if (!page) {  in cache_alloc_pfmemalloc()
2859 		spin_unlock(&n->list_lock);  in cache_alloc_pfmemalloc()
2863 	obj = slab_get_obj(cachep, page);  in cache_alloc_pfmemalloc()
2864 	n->free_objects--;  in cache_alloc_pfmemalloc()
2866 	fixup_slab_list(cachep, n, page, &list);  in cache_alloc_pfmemalloc()
2868 	spin_unlock(&n->list_lock);  in cache_alloc_pfmemalloc()
2879 		struct array_cache *ac, struct page *page, int batchcount)  in alloc_block()  argument
2885 	BUG_ON(page->active >= cachep->num);  in alloc_block()
2887 	while (page->active < cachep->num && batchcount--) {  in alloc_block()
2892 		ac->entry[ac->avail++] = slab_get_obj(cachep, page);  in alloc_block()
2905 	struct page *page;  in cache_alloc_refill()  local
2911 	batchcount = ac->batchcount;  in cache_alloc_refill()
2912 	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {  in cache_alloc_refill()
2922 	BUG_ON(ac->avail > 0 || !n);  in cache_alloc_refill()
2923 	shared = READ_ONCE(n->shared);  in cache_alloc_refill()
2924 	if (!n->free_objects && (!shared || !shared->avail))  in cache_alloc_refill()
2927 	spin_lock(&n->list_lock);  in cache_alloc_refill()
2928 	shared = READ_ONCE(n->shared);  in cache_alloc_refill()
2932 		shared->touched = 1;  in cache_alloc_refill()
2938 		page = get_first_slab(n, false);  in cache_alloc_refill()
2939 		if (!page)  in cache_alloc_refill()
2944 		batchcount = alloc_block(cachep, ac, page, batchcount);  in cache_alloc_refill()
2945 		fixup_slab_list(cachep, n, page, &list);  in cache_alloc_refill()
2949 	n->free_objects -= ac->avail;  in cache_alloc_refill()
2951 	spin_unlock(&n->list_lock);  in cache_alloc_refill()
2955 	if (unlikely(!ac->avail)) {  in cache_alloc_refill()
2964 		page = cache_grow_begin(cachep, gfp_exact_node(flags), node);  in cache_alloc_refill()
2971 		if (!ac->avail && page)  in cache_alloc_refill()
2972 			alloc_block(cachep, ac, page, batchcount);  in cache_alloc_refill()
2973 		cache_grow_end(cachep, page);  in cache_alloc_refill()
2975 		if (!ac->avail)  in cache_alloc_refill()
2978 	ac->touched = 1;  in cache_alloc_refill()
2980 	return ac->entry[--ac->avail];  in cache_alloc_refill()
2993 	WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));  in cache_alloc_debugcheck_after()
2996 	if (cachep->flags & SLAB_POISON) {  in cache_alloc_debugcheck_after()
3001 	if (cachep->flags & SLAB_STORE_USER)  in cache_alloc_debugcheck_after()
3004 	if (cachep->flags & SLAB_RED_ZONE) {  in cache_alloc_debugcheck_after()
3017 	if (cachep->ctor && cachep->flags & SLAB_POISON)  in cache_alloc_debugcheck_after()
3018 		cachep->ctor(objp);  in cache_alloc_debugcheck_after()
3019 	if ((unsigned long)objp & (arch_slab_minalign() - 1)) {  in cache_alloc_debugcheck_after()
3037 	if (likely(ac->avail)) {  in ____cache_alloc()
3038 		ac->touched = 1;  in ____cache_alloc()
3039 		objp = ac->entry[--ac->avail];  in ____cache_alloc()
3056 	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't  in ____cache_alloc()
3060 		kmemleak_erase(&ac->entry[ac->avail]);  in ____cache_alloc()
3078 	if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))  in alternate_node_alloc()
3080 	else if (current->mempolicy)  in alternate_node_alloc()
3091  * perform an allocation without specifying a node. This allows the page
3102 	struct page *page;  in fallback_alloc()  local
3123 			get_node(cache, nid)->free_objects) {  in fallback_alloc()
3138 		page = cache_grow_begin(cache, flags, numa_mem_id());  in fallback_alloc()
3139 		cache_grow_end(cache, page);  in fallback_alloc()
3140 		if (page) {  in fallback_alloc()
3141 			nid = page_to_nid(page);  in fallback_alloc()
3165 	struct page *page;  in ____cache_alloc_node()  local
3175 	spin_lock(&n->list_lock);  in ____cache_alloc_node()
3176 	page = get_first_slab(n, false);  in ____cache_alloc_node()
3177 	if (!page)  in ____cache_alloc_node()
3186 	BUG_ON(page->active == cachep->num);  in ____cache_alloc_node()
3188 	obj = slab_get_obj(cachep, page);  in ____cache_alloc_node()
3189 	n->free_objects--;  in ____cache_alloc_node()
3191 	fixup_slab_list(cachep, n, page, &list);  in ____cache_alloc_node()
3193 	spin_unlock(&n->list_lock);  in ____cache_alloc_node()
3198 	spin_unlock(&n->list_lock);  in ____cache_alloc_node()
3199 	page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);  in ____cache_alloc_node()
3200 	if (page) {  in ____cache_alloc_node()
3202 		obj = slab_get_obj(cachep, page);  in ____cache_alloc_node()
3204 	cache_grow_end(cachep, page);  in ____cache_alloc_node()
3268 	if (current->mempolicy || cpuset_do_slab_mem_spread()) {  in __do_cache_alloc()
3334 	struct page *page;  in free_block()  local
3336 	n->free_objects += nr_objects;  in free_block()
3340 		struct page *page;  in free_block()  local
3344 		page = virt_to_head_page(objp);  in free_block()
3345 		list_del(&page->slab_list);  in free_block()
3347 		slab_put_obj(cachep, page, objp);  in free_block()
3351 		if (page->active == 0) {  in free_block()
3352 			list_add(&page->slab_list, &n->slabs_free);  in free_block()
3353 			n->free_slabs++;  in free_block()
3356 			 * partial list on free - maximum time for the  in free_block()
3359 			list_add_tail(&page->slab_list, &n->slabs_partial);  in free_block()
3363 	while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {  in free_block()
3364 		n->free_objects -= cachep->num;  in free_block()
3366 		page = list_last_entry(&n->slabs_free, struct page, slab_list);  in free_block()
3367 		list_move(&page->slab_list, list);  in free_block()
3368 		n->free_slabs--;  in free_block()
3369 		n->total_slabs--;  in free_block()
3380 	batchcount = ac->batchcount;  in cache_flusharray()
3384 	spin_lock(&n->list_lock);  in cache_flusharray()
3385 	if (n->shared) {  in cache_flusharray()
3386 		struct array_cache *shared_array = n->shared;  in cache_flusharray()
3387 		int max = shared_array->limit - shared_array->avail;  in cache_flusharray()
3391 			memcpy(&(shared_array->entry[shared_array->avail]),  in cache_flusharray()
3392 			       ac->entry, sizeof(void *) * batchcount);  in cache_flusharray()
3393 			shared_array->avail += batchcount;  in cache_flusharray()
3398 	free_block(cachep, ac->entry, batchcount, node, &list);  in cache_flusharray()
3403 		struct page *page;  in cache_flusharray()  local
3405 		list_for_each_entry(page, &n->slabs_free, slab_list) {  in cache_flusharray()
3406 			BUG_ON(page->active);  in cache_flusharray()
3413 	spin_unlock(&n->list_lock);  in cache_flusharray()
3414 	ac->avail -= batchcount;  in cache_flusharray()
3415 	memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);  in cache_flusharray()
3429 		kmemleak_free_recursive(objp, cachep->flags);  in __cache_free()
3442 		memset(objp, 0, cachep->object_size);  in __cache_free()
3447 	/* Use KCSAN to help debug racy use-after-free. */  in __cache_free()
3448 	if (!(cachep->flags & SLAB_TYPESAFE_BY_RCU))  in __cache_free()
3449 		__kcsan_check_access(objp, cachep->object_size,  in __cache_free()
3461 	kmemleak_free_recursive(objp, cachep->flags);  in ___cache_free()
3468 	 * is per page memory  reference) to get nodeid. Instead use a global  in ___cache_free()
3475 	if (ac->avail < ac->limit) {  in ___cache_free()
3483 		struct page *page = virt_to_head_page(objp);  in ___cache_free()  local
3485 		if (unlikely(PageSlabPfmemalloc(page))) {  in ___cache_free()
3486 			cache_free_pfmemalloc(cachep, page, objp);  in ___cache_free()
3495  * kmem_cache_alloc - Allocate an object
3506 	void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_);  in kmem_cache_alloc()
3509 			       cachep->object_size, cachep->size, flags);  in kmem_cache_alloc()
3517 				  size_t size, void **p, unsigned long caller)  in cache_alloc_debugcheck_after_bulk()  argument
3521 	for (i = 0; i < size; i++)  in cache_alloc_debugcheck_after_bulk()
3525 int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,  in kmem_cache_alloc_bulk()  argument
3531 	s = slab_pre_alloc_hook(s, &objcg, size, flags);  in kmem_cache_alloc_bulk()
3538 	for (i = 0; i < size; i++) {  in kmem_cache_alloc_bulk()
3539 		void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags);  in kmem_cache_alloc_bulk()
3547 	cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_);  in kmem_cache_alloc_bulk()
3553 	slab_post_alloc_hook(s, objcg, flags, size, p,  in kmem_cache_alloc_bulk()
3556 	return size;  in kmem_cache_alloc_bulk()
3568 kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)  in kmem_cache_alloc_trace()  argument
3572 	ret = slab_alloc(cachep, flags, size, _RET_IP_);  in kmem_cache_alloc_trace()
3574 	ret = kasan_kmalloc(cachep, ret, size, flags);  in kmem_cache_alloc_trace()
3576 		      size, cachep->size, flags);  in kmem_cache_alloc_trace()
3584  * kmem_cache_alloc_node - Allocate an object on the specified node
3598 	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);  in kmem_cache_alloc_node()
3601 				    cachep->object_size, cachep->size,  in kmem_cache_alloc_node()
3612 				  size_t size)  in kmem_cache_alloc_node_trace()  argument
3616 	ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);  in kmem_cache_alloc_node_trace()
3618 	ret = kasan_kmalloc(cachep, ret, size, flags);  in kmem_cache_alloc_node_trace()
3620 			   size, cachep->size,  in kmem_cache_alloc_node_trace()
3628 __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)  in __do_kmalloc_node()  argument
3633 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))  in __do_kmalloc_node()
3635 	cachep = kmalloc_slab(size, flags);  in __do_kmalloc_node()
3638 	ret = kmem_cache_alloc_node_trace(cachep, flags, node, size);  in __do_kmalloc_node()
3639 	ret = kasan_kmalloc(cachep, ret, size, flags);  in __do_kmalloc_node()
3644 void *__kmalloc_node(size_t size, gfp_t flags, int node)  in __kmalloc_node()  argument
3646 	return __do_kmalloc_node(size, flags, node, _RET_IP_);  in __kmalloc_node()
3650 void *__kmalloc_node_track_caller(size_t size, gfp_t flags,  in __kmalloc_node_track_caller()  argument
3653 	return __do_kmalloc_node(size, flags, node, caller);  in __kmalloc_node_track_caller()
3659  * __do_kmalloc - allocate memory
3660  * @size: how many bytes of memory are required.
3666 static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,  in __do_kmalloc()  argument
3672 	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))  in __do_kmalloc()
3674 	cachep = kmalloc_slab(size, flags);  in __do_kmalloc()
3677 	ret = slab_alloc(cachep, flags, size, caller);  in __do_kmalloc()
3679 	ret = kasan_kmalloc(cachep, ret, size, flags);  in __do_kmalloc()
3681 		      size, cachep->size, flags);  in __do_kmalloc()
3686 void *__kmalloc(size_t size, gfp_t flags)  in __kmalloc()  argument
3688 	return __do_kmalloc(size, flags, _RET_IP_);  in __kmalloc()
3692 void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)  in __kmalloc_track_caller()  argument
3694 	return __do_kmalloc(size, flags, caller);  in __kmalloc_track_caller()
3699  * kmem_cache_free - Deallocate an object
3714 	debug_check_no_locks_freed(objp, cachep->object_size);  in kmem_cache_free()
3715 	if (!(cachep->flags & SLAB_DEBUG_OBJECTS))  in kmem_cache_free()
3716 		debug_check_no_obj_freed(objp, cachep->object_size);  in kmem_cache_free()
3724 void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)  in kmem_cache_free_bulk()  argument
3730 	for (i = 0; i < size; i++) {  in kmem_cache_free_bulk()
3740 		debug_check_no_locks_freed(objp, s->object_size);  in kmem_cache_free_bulk()
3741 		if (!(s->flags & SLAB_DEBUG_OBJECTS))  in kmem_cache_free_bulk()
3742 			debug_check_no_obj_freed(objp, s->object_size);  in kmem_cache_free_bulk()
3753  * kfree - free previously allocated memory
3777 	debug_check_no_locks_freed(objp, c->object_size);  in kfree()
3779 	debug_check_no_obj_freed(objp, c->object_size);  in kfree()
3804 	if (!cachep->list.next) {  in setup_kmem_cache_nodes()
3806 		node--;  in setup_kmem_cache_nodes()
3810 				kfree(n->shared);  in setup_kmem_cache_nodes()
3811 				free_alien_cache(n->alien);  in setup_kmem_cache_nodes()
3813 				cachep->node[node] = NULL;  in setup_kmem_cache_nodes()
3815 			node--;  in setup_kmem_cache_nodes()
3818 	return -ENOMEM;  in setup_kmem_cache_nodes()
3830 		return -ENOMEM;  in do_tune_cpucache()
3832 	prev = cachep->cpu_cache;  in do_tune_cpucache()
3833 	cachep->cpu_cache = cpu_cache;  in do_tune_cpucache()
3842 	cachep->batchcount = batchcount;  in do_tune_cpucache()
3843 	cachep->limit = limit;  in do_tune_cpucache()
3844 	cachep->shared = shared;  in do_tune_cpucache()
3857 		spin_lock_irq(&n->list_lock);  in do_tune_cpucache()
3858 		free_block(cachep, ac->entry, ac->avail, node, &list);  in do_tune_cpucache()
3859 		spin_unlock_irq(&n->list_lock);  in do_tune_cpucache()
3876 	err = cache_random_seq_create(cachep, cachep->num, gfp);  in enable_cpucache()
3884 	 * - create a LIFO ordering, i.e. return objects that are cache-warm  in enable_cpucache()
3885 	 * - reduce the number of spinlock operations.  in enable_cpucache()
3886 	 * - reduce the number of linked list operations on the slab and  in enable_cpucache()
3888 	 * The numbers are guessed, we should auto-tune as described by  in enable_cpucache()
3891 	if (cachep->size > 131072)  in enable_cpucache()
3893 	else if (cachep->size > PAGE_SIZE)  in enable_cpucache()
3895 	else if (cachep->size > 1024)  in enable_cpucache()
3897 	else if (cachep->size > 256)  in enable_cpucache()
3912 	if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)  in enable_cpucache()
3929 		       cachep->name, -err);  in enable_cpucache()
3943 	/* ac from n->shared can be freed if we don't hold the slab_mutex. */  in drain_array()
3946 	if (!ac || !ac->avail)  in drain_array()
3949 	if (ac->touched) {  in drain_array()
3950 		ac->touched = 0;  in drain_array()
3954 	spin_lock_irq(&n->list_lock);  in drain_array()
3956 	spin_unlock_irq(&n->list_lock);  in drain_array()
3962  * cache_reap - Reclaim memory from caches.
3967  * - clear the per-cpu caches for this CPU.
3968  * - return freeable pages to the main free memory pool.
3970  * If we cannot acquire the cache chain mutex then just give up - we'll try
4002 		if (time_after(n->next_reap, jiffies))  in cache_reap()
4005 		n->next_reap = jiffies + REAPTIMEOUT_NODE;  in cache_reap()
4007 		drain_array(searchp, n, n->shared, node);  in cache_reap()
4009 		if (n->free_touched)  in cache_reap()
4010 			n->free_touched = 0;  in cache_reap()
4014 			freed = drain_freelist(searchp, n, (n->free_limit +  in cache_reap()
4015 				5 * searchp->num - 1) / (5 * searchp->num));  in cache_reap()
4040 		spin_lock_irq(&n->list_lock);  in get_slabinfo()
4042 		total_slabs += n->total_slabs;  in get_slabinfo()
4043 		free_slabs += n->free_slabs;  in get_slabinfo()
4044 		free_objs += n->free_objects;  in get_slabinfo()
4046 		if (n->shared)  in get_slabinfo()
4047 			shared_avail += n->shared->avail;  in get_slabinfo()
4049 		spin_unlock_irq(&n->list_lock);  in get_slabinfo()
4051 	num_objs = total_slabs * cachep->num;  in get_slabinfo()
4052 	active_slabs = total_slabs - free_slabs;  in get_slabinfo()
4053 	active_objs = num_objs - free_objs;  in get_slabinfo()
4055 	sinfo->active_objs = active_objs;  in get_slabinfo()
4056 	sinfo->num_objs = num_objs;  in get_slabinfo()
4057 	sinfo->active_slabs = active_slabs;  in get_slabinfo()
4058 	sinfo->num_slabs = total_slabs;  in get_slabinfo()
4059 	sinfo->shared_avail = shared_avail;  in get_slabinfo()
4060 	sinfo->limit = cachep->limit;  in get_slabinfo()
4061 	sinfo->batchcount = cachep->batchcount;  in get_slabinfo()
4062 	sinfo->shared = cachep->shared;  in get_slabinfo()
4063 	sinfo->objects_per_slab = cachep->num;  in get_slabinfo()
4064 	sinfo->cache_order = cachep->gfporder;  in get_slabinfo()
4072 		unsigned long high = cachep->high_mark;  in slabinfo_show_stats()
4073 		unsigned long allocs = cachep->num_allocations;  in slabinfo_show_stats()
4074 		unsigned long grown = cachep->grown;  in slabinfo_show_stats()
4075 		unsigned long reaped = cachep->reaped;  in slabinfo_show_stats()
4076 		unsigned long errors = cachep->errors;  in slabinfo_show_stats()
4077 		unsigned long max_freeable = cachep->max_freeable;  in slabinfo_show_stats()
4078 		unsigned long node_allocs = cachep->node_allocs;  in slabinfo_show_stats()
4079 		unsigned long node_frees = cachep->node_frees;  in slabinfo_show_stats()
4080 		unsigned long overflows = cachep->node_overflow;  in slabinfo_show_stats()
4089 		unsigned long allochit = atomic_read(&cachep->allochit);  in slabinfo_show_stats()
4090 		unsigned long allocmiss = atomic_read(&cachep->allocmiss);  in slabinfo_show_stats()
4091 		unsigned long freehit = atomic_read(&cachep->freehit);  in slabinfo_show_stats()
4092 		unsigned long freemiss = atomic_read(&cachep->freemiss);  in slabinfo_show_stats()
4102  * slabinfo_write - Tuning for the slab allocator
4118 		return -EINVAL;  in slabinfo_write()
4120 		return -EFAULT;  in slabinfo_write()
4125 		return -EINVAL;  in slabinfo_write()
4129 		return -EINVAL;  in slabinfo_write()
4133 	res = -EINVAL;  in slabinfo_write()
4135 		if (!strcmp(cachep->name, kbuf)) {  in slabinfo_write()
4162 void __check_heap_object(const void *ptr, unsigned long n, struct page *page,  in __check_heap_object()  argument
4172 	cachep = page->slab_cache;  in __check_heap_object()
4173 	objnr = obj_to_index(cachep, page, (void *)ptr);  in __check_heap_object()
4174 	BUG_ON(objnr >= cachep->num);  in __check_heap_object()
4178 		offset = ptr - kfence_object_start(ptr);  in __check_heap_object()
4180 		offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);  in __check_heap_object()
4183 	if (offset >= cachep->useroffset &&  in __check_heap_object()
4184 	    offset - cachep->useroffset <= cachep->usersize &&  in __check_heap_object()
4185 	    n <= cachep->useroffset - offset + cachep->usersize)  in __check_heap_object()
4195 	    offset <= cachep->object_size &&  in __check_heap_object()
4196 	    n <= cachep->object_size - offset) {  in __check_heap_object()
4197 		usercopy_warn("SLAB object", cachep->name, to_user, offset, n);  in __check_heap_object()
4201 	usercopy_abort("SLAB object", cachep->name, to_user, offset, n);  in __check_heap_object()
4206  * __ksize -- Uninstrumented ksize.
4212  * Return: size of the actual memory used by @objp in bytes
4217 	size_t size;  in __ksize()  local
4224 	size = c ? c->object_size : 0;  in __ksize()
4226 	return size;  in __ksize()