Lines Matching +full:page +full:- +full:size
1 // SPDX-License-Identifier: GPL-2.0
7 * kmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
9 * Major cleanup, different bufctl logic, per-cpu arrays
17 * Pub: Prentice Hall ISBN 0-13-101908-2
19 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
26 * page long) and always contiguous), and each slab contains multiple
48 * Each cache has a short per-cpu head array, most allocs
54 * The c_cpuarray may not be read with enabled local interrupts -
61 * The per-cpu arrays are never accessed from the wrong cpu, no locking,
62 * and local interrupts are disabled so slab code is preempt-safe.
63 * The non-constant members are protected with a per-cache irq spinlock.
65 * Many thanks to Mark Hemment, who wrote another per-cpu slab patch
66 * in 2000 - many ideas in the current implementation are derived from
71 * 11 April '97. Started multi-threading - markhe
72 * The global cache-chain is protected by the mutex 'slab_mutex'.
73 * The sem is only needed when accessing/extending the cache-chain, which
114 #include <linux/fault-inject.h>
126 #include <asm/page.h>
135 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
138 * STATS - 1 to collect stats for /proc/slabinfo.
141 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
171 #define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
177 * - LIFO ordering, to hand out cache-warm objects from _alloc
178 * - reduce the number of linked list operations
179 * - reduce spinlock operations
181 * The limit is stored in the per-cpu structure to reduce the data cache
221 struct kmem_cache_node *n, struct page *page,
229 INIT_LIST_HEAD(&parent->slabs_full); in kmem_cache_node_init()
230 INIT_LIST_HEAD(&parent->slabs_partial); in kmem_cache_node_init()
231 INIT_LIST_HEAD(&parent->slabs_free); in kmem_cache_node_init()
232 parent->total_slabs = 0; in kmem_cache_node_init()
233 parent->free_slabs = 0; in kmem_cache_node_init()
234 parent->shared = NULL; in kmem_cache_node_init()
235 parent->alien = NULL; in kmem_cache_node_init()
236 parent->colour_next = 0; in kmem_cache_node_init()
237 spin_lock_init(&parent->list_lock); in kmem_cache_node_init()
238 parent->free_objects = 0; in kmem_cache_node_init()
239 parent->free_touched = 0; in kmem_cache_node_init()
245 list_splice(&get_node(cachep, nodeid)->slab, listp); \
250 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
251 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
252 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
257 #define OBJFREELIST_SLAB(x) ((x)->flags & CFLGS_OBJFREELIST_SLAB)
258 #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
272 #define STATS_INC_ACTIVE(x) ((x)->num_active++)
273 #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
274 #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
275 #define STATS_INC_GROWN(x) ((x)->grown++)
276 #define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
279 if ((x)->num_active > (x)->high_mark) \
280 (x)->high_mark = (x)->num_active; \
282 #define STATS_INC_ERR(x) ((x)->errors++)
283 #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
284 #define STATS_INC_NODEFREES(x) ((x)->node_frees++)
285 #define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
288 if ((x)->max_freeable < i) \
289 (x)->max_freeable = i; \
291 #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
292 #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
293 #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
294 #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
318 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
321 * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
323 * cachep->obj_offset: The real object.
324 * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
325 * cachep->size - 1* BYTES_PER_WORD: last caller address
330 return cachep->obj_offset; in obj_offset()
335 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); in dbg_redzone1()
336 return (unsigned long long*) (objp + obj_offset(cachep) - in dbg_redzone1()
342 BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); in dbg_redzone2()
343 if (cachep->flags & SLAB_STORE_USER) in dbg_redzone2()
344 return (unsigned long long *)(objp + cachep->size - in dbg_redzone2()
345 sizeof(unsigned long long) - in dbg_redzone2()
347 return (unsigned long long *) (objp + cachep->size - in dbg_redzone2()
353 BUG_ON(!(cachep->flags & SLAB_STORE_USER)); in dbg_userword()
354 return (void **)(objp + cachep->size - BYTES_PER_WORD); in dbg_userword()
375 static inline void *index_to_obj(struct kmem_cache *cache, struct page *page, in index_to_obj() argument
378 return page->s_mem + cache->size * idx; in index_to_obj()
387 .size = sizeof(struct kmem_cache),
395 return this_cpu_ptr(cachep->cpu_cache); in cpu_cache_get()
399 * Calculate the number of objects and left-over bytes for a given buffer size.
412 * - @buffer_size bytes for each object in cache_estimate()
413 * - One freelist_idx_t for each object in cache_estimate()
416 * freelist will be at the end of slab page. The objects will be in cache_estimate()
420 * alignment will already be calculated into the size. Because in cache_estimate()
443 function, cachep->name, msg); in __slab_error()
469 min(slab_max_order, MAX_ORDER - 1); in slab_max_order_setup()
515 if (reap_work->work.func == NULL) { in start_cpu_timer()
526 ac->avail = 0; in init_arraycache()
527 ac->limit = limit; in init_arraycache()
528 ac->batchcount = batch; in init_arraycache()
529 ac->touched = 0; in init_arraycache()
553 struct page *page, void *objp) in cache_free_pfmemalloc() argument
559 page_node = page_to_nid(page); in cache_free_pfmemalloc()
562 spin_lock(&n->list_lock); in cache_free_pfmemalloc()
564 spin_unlock(&n->list_lock); in cache_free_pfmemalloc()
579 int nr = min3(from->avail, max, to->limit - to->avail); in transfer_objects()
584 memcpy(to->entry + to->avail, from->entry + from->avail -nr, in transfer_objects()
587 from->avail -= nr; in transfer_objects()
588 to->avail += nr; in transfer_objects()
592 /* &alien->lock must be held by alien callers. */
595 /* Avoid trivial double-free. */ in __free_one()
597 WARN_ON_ONCE(ac->avail > 0 && ac->entry[ac->avail - 1] == objp)) in __free_one()
599 ac->entry[ac->avail++] = objp; in __free_one()
653 init_arraycache(&alc->ac, entries, batch); in __alloc_alien_cache()
654 spin_lock_init(&alc->lock); in __alloc_alien_cache()
675 for (i--; i >= 0; i--) in alloc_alien_cache()
701 if (ac->avail) { in __drain_alien_cache()
702 spin_lock(&n->list_lock); in __drain_alien_cache()
708 if (n->shared) in __drain_alien_cache()
709 transfer_objects(n->shared, ac, ac->limit); in __drain_alien_cache()
711 free_block(cachep, ac->entry, ac->avail, node, list); in __drain_alien_cache()
712 ac->avail = 0; in __drain_alien_cache()
713 spin_unlock(&n->list_lock); in __drain_alien_cache()
724 if (n->alien) { in reap_alien()
725 struct alien_cache *alc = n->alien[node]; in reap_alien()
729 ac = &alc->ac; in reap_alien()
730 if (ac->avail && spin_trylock_irq(&alc->lock)) { in reap_alien()
734 spin_unlock_irq(&alc->lock); in reap_alien()
754 ac = &alc->ac; in drain_alien_cache()
755 spin_lock_irqsave(&alc->lock, flags); in drain_alien_cache()
757 spin_unlock_irqrestore(&alc->lock, flags); in drain_alien_cache()
773 if (n->alien && n->alien[page_node]) { in __cache_free_alien()
774 alien = n->alien[page_node]; in __cache_free_alien()
775 ac = &alien->ac; in __cache_free_alien()
776 spin_lock(&alien->lock); in __cache_free_alien()
777 if (unlikely(ac->avail == ac->limit)) { in __cache_free_alien()
782 spin_unlock(&alien->lock); in __cache_free_alien()
786 spin_lock(&n->list_lock); in __cache_free_alien()
788 spin_unlock(&n->list_lock); in __cache_free_alien()
829 spin_lock_irq(&n->list_lock); in init_cache_node()
830 n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount + in init_cache_node()
831 cachep->num; in init_cache_node()
832 spin_unlock_irq(&n->list_lock); in init_cache_node()
839 return -ENOMEM; in init_cache_node()
842 n->next_reap = jiffies + REAPTIMEOUT_NODE + in init_cache_node()
845 n->free_limit = in init_cache_node()
846 (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num; in init_cache_node()
853 cachep->node[node] = n; in init_cache_node()
861 * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
862 * will be allocated off-node since memory is not yet online for the new node.
886 int ret = -ENOMEM; in setup_kmem_cache_node()
894 new_alien = alloc_alien_cache(node, cachep->limit, gfp); in setup_kmem_cache_node()
899 if (cachep->shared) { in setup_kmem_cache_node()
901 cachep->shared * cachep->batchcount, 0xbaadf00d, gfp); in setup_kmem_cache_node()
911 spin_lock_irq(&n->list_lock); in setup_kmem_cache_node()
912 if (n->shared && force_change) { in setup_kmem_cache_node()
913 free_block(cachep, n->shared->entry, in setup_kmem_cache_node()
914 n->shared->avail, node, &list); in setup_kmem_cache_node()
915 n->shared->avail = 0; in setup_kmem_cache_node()
918 if (!n->shared || force_change) { in setup_kmem_cache_node()
919 old_shared = n->shared; in setup_kmem_cache_node()
920 n->shared = new_shared; in setup_kmem_cache_node()
924 if (!n->alien) { in setup_kmem_cache_node()
925 n->alien = new_alien; in setup_kmem_cache_node()
929 spin_unlock_irq(&n->list_lock); in setup_kmem_cache_node()
933 * To protect lockless access to n->shared during irq disabled context. in setup_kmem_cache_node()
934 * If n->shared isn't NULL in irq disabled context, accessing to it is in setup_kmem_cache_node()
935 * guaranteed to be valid until irq is re-enabled, because it will be in setup_kmem_cache_node()
968 spin_lock_irq(&n->list_lock); in cpuup_canceled()
971 n->free_limit -= cachep->batchcount; in cpuup_canceled()
974 nc = per_cpu_ptr(cachep->cpu_cache, cpu); in cpuup_canceled()
975 free_block(cachep, nc->entry, nc->avail, node, &list); in cpuup_canceled()
976 nc->avail = 0; in cpuup_canceled()
979 spin_unlock_irq(&n->list_lock); in cpuup_canceled()
983 shared = n->shared; in cpuup_canceled()
985 free_block(cachep, shared->entry, in cpuup_canceled()
986 shared->avail, node, &list); in cpuup_canceled()
987 n->shared = NULL; in cpuup_canceled()
990 alien = n->alien; in cpuup_canceled()
991 n->alien = NULL; in cpuup_canceled()
993 spin_unlock_irq(&n->list_lock); in cpuup_canceled()
1046 return -ENOMEM; in cpuup_prepare()
1100 * Drains freelist for a node on each slab cache, used for memory hot-remove.
1101 * Returns -EBUSY if all objects cannot be drained so that the node is not
1120 if (!list_empty(&n->slabs_full) || in drain_cache_node_node()
1121 !list_empty(&n->slabs_partial)) { in drain_cache_node_node()
1122 ret = -EBUSY; in drain_cache_node_node()
1136 nid = mnb->status_change_nid; in slab_memory_callback()
1177 spin_lock_init(&ptr->list_lock); in init_list()
1180 cachep->node[nodeid] = ptr; in init_list()
1185 * size of kmem_cache_node.
1192 cachep->node[node] = &init_kmem_cache_node[index + node]; in set_up_node()
1193 cachep->node[node]->next_reap = jiffies + in set_up_node()
1200 * Initialisation. Called after the page allocator have been initialised and
1216 * Fragmentation resistance on low memory - only use bigger in kmem_cache_init()
1217 * page orders on machines with more than 32MB of memory if in kmem_cache_init()
1246 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids in kmem_cache_init()
1252 list_add(&kmem_cache->list, &slab_caches); in kmem_cache_init()
1261 kmalloc_info[INDEX_NODE].size, in kmem_cache_init()
1263 kmalloc_info[INDEX_NODE].size); in kmem_cache_init()
1317 * Register the timers that return unneeded pages to the page allocator in cpucache_init()
1342 pr_warn(" cache: %s, object size: %d, order: %d\n", in slab_out_of_memory()
1343 cachep->name, cachep->size, cachep->gfporder); in slab_out_of_memory()
1348 spin_lock_irqsave(&n->list_lock, flags); in slab_out_of_memory()
1349 total_slabs = n->total_slabs; in slab_out_of_memory()
1350 free_slabs = n->free_slabs; in slab_out_of_memory()
1351 free_objs = n->free_objects; in slab_out_of_memory()
1352 spin_unlock_irqrestore(&n->list_lock, flags); in slab_out_of_memory()
1355 node, total_slabs - free_slabs, total_slabs, in slab_out_of_memory()
1356 (total_slabs * cachep->num) - free_objs, in slab_out_of_memory()
1357 total_slabs * cachep->num); in slab_out_of_memory()
1363 * Interface to system's page allocator. No need to hold the
1364 * kmem_cache_node ->list_lock.
1370 static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, in kmem_getpages()
1373 struct page *page; in kmem_getpages() local
1375 flags |= cachep->allocflags; in kmem_getpages()
1377 page = __alloc_pages_node(nodeid, flags, cachep->gfporder); in kmem_getpages()
1378 if (!page) { in kmem_getpages()
1383 account_slab_page(page, cachep->gfporder, cachep); in kmem_getpages()
1384 __SetPageSlab(page); in kmem_getpages()
1386 if (sk_memalloc_socks() && page_is_pfmemalloc(page)) in kmem_getpages()
1387 SetPageSlabPfmemalloc(page); in kmem_getpages()
1389 return page; in kmem_getpages()
1393 * Interface to system's page release.
1395 static void kmem_freepages(struct kmem_cache *cachep, struct page *page) in kmem_freepages() argument
1397 int order = cachep->gfporder; in kmem_freepages()
1399 BUG_ON(!PageSlab(page)); in kmem_freepages()
1400 __ClearPageSlabPfmemalloc(page); in kmem_freepages()
1401 __ClearPageSlab(page); in kmem_freepages()
1402 page_mapcount_reset(page); in kmem_freepages()
1403 page->mapping = NULL; in kmem_freepages()
1405 if (current->reclaim_state) in kmem_freepages()
1406 current->reclaim_state->reclaimed_slab += 1 << order; in kmem_freepages()
1407 unaccount_slab_page(page, order, cachep); in kmem_freepages()
1408 __free_pages(page, order); in kmem_freepages()
1414 struct page *page; in kmem_rcu_free() local
1416 page = container_of(head, struct page, rcu_head); in kmem_rcu_free()
1417 cachep = page->slab_cache; in kmem_rcu_free()
1419 kmem_freepages(cachep, page); in kmem_rcu_free()
1426 (cachep->size % PAGE_SIZE) == 0) in is_debug_pagealloc_cache()
1438 __kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); in slab_kernel_map()
1449 int size = cachep->object_size; in poison_obj() local
1452 memset(addr, val, size); in poison_obj()
1453 *(unsigned char *)(addr + size - 1) = POISON_END; in poison_obj()
1474 if (!(error & (error - 1))) { in dump_line()
1490 int i, size; in print_objinfo() local
1493 if (cachep->flags & SLAB_RED_ZONE) { in print_objinfo()
1499 if (cachep->flags & SLAB_STORE_USER) in print_objinfo()
1502 size = cachep->object_size; in print_objinfo()
1503 for (i = 0; i < size && lines; i += 16, lines--) { in print_objinfo()
1506 if (i + limit > size) in print_objinfo()
1507 limit = size - i; in print_objinfo()
1515 int size, i; in check_poison_obj() local
1522 size = cachep->object_size; in check_poison_obj()
1524 for (i = 0; i < size; i++) { in check_poison_obj()
1526 if (i == size - 1) in check_poison_obj()
1534 print_tainted(), cachep->name, in check_poison_obj()
1535 realobj, size); in check_poison_obj()
1541 if (i + limit > size) in check_poison_obj()
1542 limit = size - i; in check_poison_obj()
1555 struct page *page = virt_to_head_page(objp); in check_poison_obj() local
1558 objnr = obj_to_index(cachep, page, objp); in check_poison_obj()
1560 objp = index_to_obj(cachep, page, objnr - 1); in check_poison_obj()
1562 pr_err("Prev obj: start=%px, len=%d\n", realobj, size); in check_poison_obj()
1565 if (objnr + 1 < cachep->num) { in check_poison_obj()
1566 objp = index_to_obj(cachep, page, objnr + 1); in check_poison_obj()
1568 pr_err("Next obj: start=%px, len=%d\n", realobj, size); in check_poison_obj()
1577 struct page *page) in slab_destroy_debugcheck() argument
1581 if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) { in slab_destroy_debugcheck()
1582 poison_obj(cachep, page->freelist - obj_offset(cachep), in slab_destroy_debugcheck()
1586 for (i = 0; i < cachep->num; i++) { in slab_destroy_debugcheck()
1587 void *objp = index_to_obj(cachep, page, i); in slab_destroy_debugcheck()
1589 if (cachep->flags & SLAB_POISON) { in slab_destroy_debugcheck()
1593 if (cachep->flags & SLAB_RED_ZONE) { in slab_destroy_debugcheck()
1603 struct page *page) in slab_destroy_debugcheck() argument
1609 * slab_destroy - destroy and release all objects in a slab
1611 * @page: page pointer being destroyed
1613 * Destroy all the objs in a slab page, and release the mem back to the system.
1614 * Before calling the slab page must have been unlinked from the cache. The
1615 * kmem_cache_node ->list_lock is not held/needed.
1617 static void slab_destroy(struct kmem_cache *cachep, struct page *page) in slab_destroy() argument
1621 freelist = page->freelist; in slab_destroy()
1622 slab_destroy_debugcheck(cachep, page); in slab_destroy()
1623 if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU)) in slab_destroy()
1624 call_rcu(&page->rcu_head, kmem_rcu_free); in slab_destroy()
1626 kmem_freepages(cachep, page); in slab_destroy()
1630 * although actual page can be freed in rcu context in slab_destroy()
1633 kmem_cache_free(cachep->freelist_cache, freelist); in slab_destroy()
1637 * Update the size of the caches before calling slabs_destroy as it may
1642 struct page *page, *n; in slabs_destroy() local
1644 list_for_each_entry_safe(page, n, list, slab_list) { in slabs_destroy()
1645 list_del(&page->slab_list); in slabs_destroy()
1646 slab_destroy(cachep, page); in slabs_destroy()
1651 * calculate_slab_order - calculate size (page order) of slabs
1653 * @size: size of objects to be created in this cache.
1660 * towards high-order requests, this should be changed.
1662 * Return: number of left-over bytes in a slab
1665 size_t size, slab_flags_t flags) in calculate_slab_order() argument
1674 num = cache_estimate(gfporder, size, flags, &remainder); in calculate_slab_order()
1699 if (freelist_cache->size > cachep->size / 2) in calculate_slab_order()
1703 /* Found something acceptable - save it away */ in calculate_slab_order()
1704 cachep->num = num; in calculate_slab_order()
1705 cachep->gfporder = gfporder; in calculate_slab_order()
1709 * A VFS-reclaimable slab tends to have most allocations in calculate_slab_order()
1711 * higher-order pages when we are unable to shrink dcache. in calculate_slab_order()
1736 size_t size; in alloc_kmem_cache_cpus() local
1739 size = sizeof(void *) * entries + sizeof(struct array_cache); in alloc_kmem_cache_cpus()
1740 cpu_cache = __alloc_percpu(size, sizeof(void *)); in alloc_kmem_cache_cpus()
1758 cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1); in setup_cpu_cache()
1759 if (!cachep->cpu_cache) in setup_cpu_cache()
1772 cachep->node[node] = kmalloc_node( in setup_cpu_cache()
1774 BUG_ON(!cachep->node[node]); in setup_cpu_cache()
1775 kmem_cache_node_init(cachep->node[node]); in setup_cpu_cache()
1779 cachep->node[numa_mem_id()]->next_reap = in setup_cpu_cache()
1783 cpu_cache_get(cachep)->avail = 0; in setup_cpu_cache()
1784 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; in setup_cpu_cache()
1785 cpu_cache_get(cachep)->batchcount = 1; in setup_cpu_cache()
1786 cpu_cache_get(cachep)->touched = 0; in setup_cpu_cache()
1787 cachep->batchcount = 1; in setup_cpu_cache()
1788 cachep->limit = BOOT_CPUCACHE_ENTRIES; in setup_cpu_cache()
1799 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, in __kmem_cache_alias() argument
1804 cachep = find_mergeable(size, align, flags, name, ctor); in __kmem_cache_alias()
1806 cachep->refcount++; in __kmem_cache_alias()
1812 cachep->object_size = max_t(int, cachep->object_size, size); in __kmem_cache_alias()
1818 size_t size, slab_flags_t flags) in set_objfreelist_slab_cache() argument
1822 cachep->num = 0; in set_objfreelist_slab_cache()
1825 * If slab auto-initialization on free is enabled, store the freelist in set_objfreelist_slab_cache()
1826 * off-slab, so that its contents don't end up in one of the allocated in set_objfreelist_slab_cache()
1832 if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU) in set_objfreelist_slab_cache()
1835 left = calculate_slab_order(cachep, size, in set_objfreelist_slab_cache()
1837 if (!cachep->num) in set_objfreelist_slab_cache()
1840 if (cachep->num * sizeof(freelist_idx_t) > cachep->object_size) in set_objfreelist_slab_cache()
1843 cachep->colour = left / cachep->colour_off; in set_objfreelist_slab_cache()
1849 size_t size, slab_flags_t flags) in set_off_slab_cache() argument
1853 cachep->num = 0; in set_off_slab_cache()
1856 * Always use on-slab management when SLAB_NOLEAKTRACE in set_off_slab_cache()
1863 * Size is large, assume best to place the slab management obj in set_off_slab_cache()
1864 * off-slab (should allow better packing of objs). in set_off_slab_cache()
1866 left = calculate_slab_order(cachep, size, flags | CFLGS_OFF_SLAB); in set_off_slab_cache()
1867 if (!cachep->num) in set_off_slab_cache()
1871 * If the slab has been placed off-slab, and we have enough space then in set_off_slab_cache()
1872 * move it on-slab. This is at the expense of any extra colouring. in set_off_slab_cache()
1874 if (left >= cachep->num * sizeof(freelist_idx_t)) in set_off_slab_cache()
1877 cachep->colour = left / cachep->colour_off; in set_off_slab_cache()
1883 size_t size, slab_flags_t flags) in set_on_slab_cache() argument
1887 cachep->num = 0; in set_on_slab_cache()
1889 left = calculate_slab_order(cachep, size, flags); in set_on_slab_cache()
1890 if (!cachep->num) in set_on_slab_cache()
1893 cachep->colour = left / cachep->colour_off; in set_on_slab_cache()
1899 * __kmem_cache_create - Create a cache.
1909 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
1912 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
1915 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
1926 unsigned int size = cachep->size; in __kmem_cache_create() local
1932 * large objects, if the increased size would increase the object size in __kmem_cache_create()
1936 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN + in __kmem_cache_create()
1945 * Check that size is in terms of words. This is needed to avoid in __kmem_cache_create()
1947 * sure any on-slab bufctl's are also correctly aligned. in __kmem_cache_create()
1949 size = ALIGN(size, BYTES_PER_WORD); in __kmem_cache_create()
1954 * aligned, by adjusting the object size accordingly. */ in __kmem_cache_create()
1955 size = ALIGN(size, REDZONE_ALIGN); in __kmem_cache_create()
1959 if (ralign < cachep->align) { in __kmem_cache_create()
1960 ralign = cachep->align; in __kmem_cache_create()
1968 cachep->align = ralign; in __kmem_cache_create()
1969 cachep->colour_off = cache_line_size(); in __kmem_cache_create()
1971 if (cachep->colour_off < cachep->align) in __kmem_cache_create()
1972 cachep->colour_off = cachep->align; in __kmem_cache_create()
1982 * Both debugging options require word-alignment which is calculated in __kmem_cache_create()
1987 cachep->obj_offset += sizeof(unsigned long long); in __kmem_cache_create()
1988 size += 2 * sizeof(unsigned long long); in __kmem_cache_create()
1996 size += REDZONE_ALIGN; in __kmem_cache_create()
1998 size += BYTES_PER_WORD; in __kmem_cache_create()
2002 kasan_cache_create(cachep, &size, &flags); in __kmem_cache_create()
2004 size = ALIGN(size, cachep->align); in __kmem_cache_create()
2009 if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE) in __kmem_cache_create()
2010 size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align); in __kmem_cache_create()
2014 * To activate debug pagealloc, off-slab management is necessary in __kmem_cache_create()
2017 * to check size >= 256. It guarantees that all necessary small in __kmem_cache_create()
2021 size >= 256 && cachep->object_size > cache_line_size()) { in __kmem_cache_create()
2022 if (size < PAGE_SIZE || size % PAGE_SIZE == 0) { in __kmem_cache_create()
2023 size_t tmp_size = ALIGN(size, PAGE_SIZE); in __kmem_cache_create()
2027 cachep->obj_offset += tmp_size - size; in __kmem_cache_create()
2028 size = tmp_size; in __kmem_cache_create()
2035 if (set_objfreelist_slab_cache(cachep, size, flags)) { in __kmem_cache_create()
2040 if (set_off_slab_cache(cachep, size, flags)) { in __kmem_cache_create()
2045 if (set_on_slab_cache(cachep, size, flags)) in __kmem_cache_create()
2048 return -E2BIG; in __kmem_cache_create()
2051 cachep->freelist_size = cachep->num * sizeof(freelist_idx_t); in __kmem_cache_create()
2052 cachep->flags = flags; in __kmem_cache_create()
2053 cachep->allocflags = __GFP_COMP; in __kmem_cache_create()
2055 cachep->allocflags |= GFP_DMA; in __kmem_cache_create()
2057 cachep->allocflags |= GFP_DMA32; in __kmem_cache_create()
2059 cachep->allocflags |= __GFP_RECLAIMABLE; in __kmem_cache_create()
2060 cachep->size = size; in __kmem_cache_create()
2061 cachep->reciprocal_buffer_size = reciprocal_value(size); in __kmem_cache_create()
2070 (cachep->flags & SLAB_POISON) && in __kmem_cache_create()
2072 cachep->flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); in __kmem_cache_create()
2076 cachep->freelist_cache = in __kmem_cache_create()
2077 kmalloc_slab(cachep->freelist_size, 0u); in __kmem_cache_create()
2109 assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock); in check_spinlock_acquired()
2117 assert_spin_locked(&get_node(cachep, node)->list_lock); in check_spinlock_acquired_node()
2134 if (!ac || !ac->avail) in drain_array_locked()
2137 tofree = free_all ? ac->avail : (ac->limit + 4) / 5; in drain_array_locked()
2138 if (tofree > ac->avail) in drain_array_locked()
2139 tofree = (ac->avail + 1) / 2; in drain_array_locked()
2141 free_block(cachep, ac->entry, tofree, node, list); in drain_array_locked()
2142 ac->avail -= tofree; in drain_array_locked()
2143 memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail); in drain_array_locked()
2157 spin_lock(&n->list_lock); in do_drain()
2158 free_block(cachep, ac->entry, ac->avail, node, &list); in do_drain()
2159 spin_unlock(&n->list_lock); in do_drain()
2160 ac->avail = 0; in do_drain()
2173 if (n->alien) in drain_cpu_caches()
2174 drain_alien_cache(cachep, n->alien); in drain_cpu_caches()
2177 spin_lock_irq(&n->list_lock); in drain_cpu_caches()
2178 drain_array_locked(cachep, n->shared, node, true, &list); in drain_cpu_caches()
2179 spin_unlock_irq(&n->list_lock); in drain_cpu_caches()
2196 struct page *page; in drain_freelist() local
2199 while (nr_freed < tofree && !list_empty(&n->slabs_free)) { in drain_freelist()
2201 spin_lock_irq(&n->list_lock); in drain_freelist()
2202 p = n->slabs_free.prev; in drain_freelist()
2203 if (p == &n->slabs_free) { in drain_freelist()
2204 spin_unlock_irq(&n->list_lock); in drain_freelist()
2208 page = list_entry(p, struct page, slab_list); in drain_freelist()
2209 list_del(&page->slab_list); in drain_freelist()
2210 n->free_slabs--; in drain_freelist()
2211 n->total_slabs--; in drain_freelist()
2216 n->free_objects -= cache->num; in drain_freelist()
2217 spin_unlock_irq(&n->list_lock); in drain_freelist()
2218 slab_destroy(cache, page); in drain_freelist()
2231 if (!list_empty(&n->slabs_full) || in __kmem_cache_empty()
2232 !list_empty(&n->slabs_partial)) in __kmem_cache_empty()
2249 ret += !list_empty(&n->slabs_full) || in __kmem_cache_shrink()
2250 !list_empty(&n->slabs_partial); in __kmem_cache_shrink()
2267 free_percpu(cachep->cpu_cache); in __kmem_cache_release()
2271 kfree(n->shared); in __kmem_cache_release()
2272 free_alien_cache(n->alien); in __kmem_cache_release()
2274 cachep->node[i] = NULL; in __kmem_cache_release()
2281 * For a slab cache when the slab descriptor is off-slab, the
2284 * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
2286 * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
2287 * This is a "chicken-and-egg" problem.
2289 * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
2293 struct page *page, int colour_off, in alloc_slabmgmt() argument
2297 void *addr = page_address(page); in alloc_slabmgmt()
2299 page->s_mem = addr + colour_off; in alloc_slabmgmt()
2300 page->active = 0; in alloc_slabmgmt()
2305 /* Slab management obj is off-slab. */ in alloc_slabmgmt()
2306 freelist = kmem_cache_alloc_node(cachep->freelist_cache, in alloc_slabmgmt()
2310 freelist = addr + (PAGE_SIZE << cachep->gfporder) - in alloc_slabmgmt()
2311 cachep->freelist_size; in alloc_slabmgmt()
2317 static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx) in get_free_obj() argument
2319 return ((freelist_idx_t *)page->freelist)[idx]; in get_free_obj()
2322 static inline void set_free_obj(struct page *page, in set_free_obj() argument
2325 ((freelist_idx_t *)(page->freelist))[idx] = val; in set_free_obj()
2328 static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page) in cache_init_objs_debug() argument
2333 for (i = 0; i < cachep->num; i++) { in cache_init_objs_debug()
2334 void *objp = index_to_obj(cachep, page, i); in cache_init_objs_debug()
2336 if (cachep->flags & SLAB_STORE_USER) in cache_init_objs_debug()
2339 if (cachep->flags & SLAB_RED_ZONE) { in cache_init_objs_debug()
2348 if (cachep->ctor && !(cachep->flags & SLAB_POISON)) { in cache_init_objs_debug()
2351 cachep->ctor(objp + obj_offset(cachep)); in cache_init_objs_debug()
2356 if (cachep->flags & SLAB_RED_ZONE) { in cache_init_objs_debug()
2363 if (cachep->flags & SLAB_POISON) { in cache_init_objs_debug()
2384 * return true if the pre-computed list is available, false otherwize.
2396 /* Use a random state if the pre-computed list is not available */ in freelist_state_initialize()
2397 if (!cachep->random_seq) { in freelist_state_initialize()
2398 prandom_seed_state(&state->rnd_state, rand); in freelist_state_initialize()
2401 state->list = cachep->random_seq; in freelist_state_initialize()
2402 state->count = count; in freelist_state_initialize()
2403 state->pos = rand % count; in freelist_state_initialize()
2412 if (state->pos >= state->count) in next_random_slot()
2413 state->pos = 0; in next_random_slot()
2414 return state->list[state->pos++]; in next_random_slot()
2418 static void swap_free_obj(struct page *page, unsigned int a, unsigned int b) in swap_free_obj() argument
2420 swap(((freelist_idx_t *)page->freelist)[a], in swap_free_obj()
2421 ((freelist_idx_t *)page->freelist)[b]); in swap_free_obj()
2425 * Shuffle the freelist initialization state based on pre-computed lists.
2428 static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page) in shuffle_freelist() argument
2430 unsigned int objfreelist = 0, i, rand, count = cachep->num; in shuffle_freelist()
2442 objfreelist = count - 1; in shuffle_freelist()
2445 page->freelist = index_to_obj(cachep, page, objfreelist) + in shuffle_freelist()
2447 count--; in shuffle_freelist()
2452 * Later use a pre-computed list for speed. in shuffle_freelist()
2456 set_free_obj(page, i, i); in shuffle_freelist()
2458 /* Fisher-Yates shuffle */ in shuffle_freelist()
2459 for (i = count - 1; i > 0; i--) { in shuffle_freelist()
2462 swap_free_obj(page, i, rand); in shuffle_freelist()
2466 set_free_obj(page, i, next_random_slot(&state)); in shuffle_freelist()
2470 set_free_obj(page, cachep->num - 1, objfreelist); in shuffle_freelist()
2476 struct page *page) in shuffle_freelist() argument
2483 struct page *page) in cache_init_objs() argument
2489 cache_init_objs_debug(cachep, page); in cache_init_objs()
2492 shuffled = shuffle_freelist(cachep, page); in cache_init_objs()
2495 page->freelist = index_to_obj(cachep, page, cachep->num - 1) + in cache_init_objs()
2499 for (i = 0; i < cachep->num; i++) { in cache_init_objs()
2500 objp = index_to_obj(cachep, page, i); in cache_init_objs()
2504 if (DEBUG == 0 && cachep->ctor) { in cache_init_objs()
2506 cachep->ctor(objp); in cache_init_objs()
2511 set_free_obj(page, i, i); in cache_init_objs()
2515 static void *slab_get_obj(struct kmem_cache *cachep, struct page *page) in slab_get_obj() argument
2519 objp = index_to_obj(cachep, page, get_free_obj(page, page->active)); in slab_get_obj()
2520 page->active++; in slab_get_obj()
2526 struct page *page, void *objp) in slab_put_obj() argument
2528 unsigned int objnr = obj_to_index(cachep, page, objp); in slab_put_obj()
2533 for (i = page->active; i < cachep->num; i++) { in slab_put_obj()
2534 if (get_free_obj(page, i) == objnr) { in slab_put_obj()
2536 cachep->name, objp); in slab_put_obj()
2541 page->active--; in slab_put_obj()
2542 if (!page->freelist) in slab_put_obj()
2543 page->freelist = objp + obj_offset(cachep); in slab_put_obj()
2545 set_free_obj(page, page->active, objnr); in slab_put_obj()
2553 static void slab_map_pages(struct kmem_cache *cache, struct page *page, in slab_map_pages() argument
2556 page->slab_cache = cache; in slab_map_pages()
2557 page->freelist = freelist; in slab_map_pages()
2564 static struct page *cache_grow_begin(struct kmem_cache *cachep, in cache_grow_begin()
2572 struct page *page; in cache_grow_begin() local
2581 WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); in cache_grow_begin()
2589 * Get mem for the objs. Attempt to allocate a physical page from in cache_grow_begin()
2592 page = kmem_getpages(cachep, local_flags, nodeid); in cache_grow_begin()
2593 if (!page) in cache_grow_begin()
2596 page_node = page_to_nid(page); in cache_grow_begin()
2600 n->colour_next++; in cache_grow_begin()
2601 if (n->colour_next >= cachep->colour) in cache_grow_begin()
2602 n->colour_next = 0; in cache_grow_begin()
2604 offset = n->colour_next; in cache_grow_begin()
2605 if (offset >= cachep->colour) in cache_grow_begin()
2608 offset *= cachep->colour_off; in cache_grow_begin()
2612 * page_address() in the latter returns a non-tagged pointer, in cache_grow_begin()
2615 kasan_poison_slab(page); in cache_grow_begin()
2618 freelist = alloc_slabmgmt(cachep, page, offset, in cache_grow_begin()
2623 slab_map_pages(cachep, page, freelist); in cache_grow_begin()
2625 cache_init_objs(cachep, page); in cache_grow_begin()
2630 return page; in cache_grow_begin()
2633 kmem_freepages(cachep, page); in cache_grow_begin()
2640 static void cache_grow_end(struct kmem_cache *cachep, struct page *page) in cache_grow_end() argument
2647 if (!page) in cache_grow_end()
2650 INIT_LIST_HEAD(&page->slab_list); in cache_grow_end()
2651 n = get_node(cachep, page_to_nid(page)); in cache_grow_end()
2653 spin_lock(&n->list_lock); in cache_grow_end()
2654 n->total_slabs++; in cache_grow_end()
2655 if (!page->active) { in cache_grow_end()
2656 list_add_tail(&page->slab_list, &n->slabs_free); in cache_grow_end()
2657 n->free_slabs++; in cache_grow_end()
2659 fixup_slab_list(cachep, n, page, &list); in cache_grow_end()
2662 n->free_objects += cachep->num - page->active; in cache_grow_end()
2663 spin_unlock(&n->list_lock); in cache_grow_end()
2672 * - detect bad pointers.
2673 * - POISON/RED_ZONE checking
2710 struct page *page; in cache_free_debugcheck() local
2714 objp -= obj_offset(cachep); in cache_free_debugcheck()
2716 page = virt_to_head_page(objp); in cache_free_debugcheck()
2718 if (cachep->flags & SLAB_RED_ZONE) { in cache_free_debugcheck()
2723 if (cachep->flags & SLAB_STORE_USER) in cache_free_debugcheck()
2726 objnr = obj_to_index(cachep, page, objp); in cache_free_debugcheck()
2728 BUG_ON(objnr >= cachep->num); in cache_free_debugcheck()
2729 BUG_ON(objp != index_to_obj(cachep, page, objnr)); in cache_free_debugcheck()
2731 if (cachep->flags & SLAB_POISON) { in cache_free_debugcheck()
2751 objp = next - obj_offset(cachep); in fixup_objfreelist_debug()
2759 struct kmem_cache_node *n, struct page *page, in fixup_slab_list() argument
2763 list_del(&page->slab_list); in fixup_slab_list()
2764 if (page->active == cachep->num) { in fixup_slab_list()
2765 list_add(&page->slab_list, &n->slabs_full); in fixup_slab_list()
2769 if (cachep->flags & SLAB_POISON) { in fixup_slab_list()
2770 void **objp = page->freelist; in fixup_slab_list()
2776 page->freelist = NULL; in fixup_slab_list()
2779 list_add(&page->slab_list, &n->slabs_partial); in fixup_slab_list()
2782 /* Try to find non-pfmemalloc slab if needed */
2783 static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n, in get_valid_first_slab()
2784 struct page *page, bool pfmemalloc) in get_valid_first_slab() argument
2786 if (!page) in get_valid_first_slab()
2790 return page; in get_valid_first_slab()
2792 if (!PageSlabPfmemalloc(page)) in get_valid_first_slab()
2793 return page; in get_valid_first_slab()
2796 if (n->free_objects > n->free_limit) { in get_valid_first_slab()
2797 ClearPageSlabPfmemalloc(page); in get_valid_first_slab()
2798 return page; in get_valid_first_slab()
2802 list_del(&page->slab_list); in get_valid_first_slab()
2803 if (!page->active) { in get_valid_first_slab()
2804 list_add_tail(&page->slab_list, &n->slabs_free); in get_valid_first_slab()
2805 n->free_slabs++; in get_valid_first_slab()
2807 list_add_tail(&page->slab_list, &n->slabs_partial); in get_valid_first_slab()
2809 list_for_each_entry(page, &n->slabs_partial, slab_list) { in get_valid_first_slab()
2810 if (!PageSlabPfmemalloc(page)) in get_valid_first_slab()
2811 return page; in get_valid_first_slab()
2814 n->free_touched = 1; in get_valid_first_slab()
2815 list_for_each_entry(page, &n->slabs_free, slab_list) { in get_valid_first_slab()
2816 if (!PageSlabPfmemalloc(page)) { in get_valid_first_slab()
2817 n->free_slabs--; in get_valid_first_slab()
2818 return page; in get_valid_first_slab()
2825 static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc) in get_first_slab()
2827 struct page *page; in get_first_slab() local
2829 assert_spin_locked(&n->list_lock); in get_first_slab()
2830 page = list_first_entry_or_null(&n->slabs_partial, struct page, in get_first_slab()
2832 if (!page) { in get_first_slab()
2833 n->free_touched = 1; in get_first_slab()
2834 page = list_first_entry_or_null(&n->slabs_free, struct page, in get_first_slab()
2836 if (page) in get_first_slab()
2837 n->free_slabs--; in get_first_slab()
2841 page = get_valid_first_slab(n, page, pfmemalloc); in get_first_slab()
2843 return page; in get_first_slab()
2849 struct page *page; in cache_alloc_pfmemalloc() local
2856 spin_lock(&n->list_lock); in cache_alloc_pfmemalloc()
2857 page = get_first_slab(n, true); in cache_alloc_pfmemalloc()
2858 if (!page) { in cache_alloc_pfmemalloc()
2859 spin_unlock(&n->list_lock); in cache_alloc_pfmemalloc()
2863 obj = slab_get_obj(cachep, page); in cache_alloc_pfmemalloc()
2864 n->free_objects--; in cache_alloc_pfmemalloc()
2866 fixup_slab_list(cachep, n, page, &list); in cache_alloc_pfmemalloc()
2868 spin_unlock(&n->list_lock); in cache_alloc_pfmemalloc()
2879 struct array_cache *ac, struct page *page, int batchcount) in alloc_block() argument
2885 BUG_ON(page->active >= cachep->num); in alloc_block()
2887 while (page->active < cachep->num && batchcount--) { in alloc_block()
2892 ac->entry[ac->avail++] = slab_get_obj(cachep, page); in alloc_block()
2905 struct page *page; in cache_alloc_refill() local
2911 batchcount = ac->batchcount; in cache_alloc_refill()
2912 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { in cache_alloc_refill()
2922 BUG_ON(ac->avail > 0 || !n); in cache_alloc_refill()
2923 shared = READ_ONCE(n->shared); in cache_alloc_refill()
2924 if (!n->free_objects && (!shared || !shared->avail)) in cache_alloc_refill()
2927 spin_lock(&n->list_lock); in cache_alloc_refill()
2928 shared = READ_ONCE(n->shared); in cache_alloc_refill()
2932 shared->touched = 1; in cache_alloc_refill()
2938 page = get_first_slab(n, false); in cache_alloc_refill()
2939 if (!page) in cache_alloc_refill()
2944 batchcount = alloc_block(cachep, ac, page, batchcount); in cache_alloc_refill()
2945 fixup_slab_list(cachep, n, page, &list); in cache_alloc_refill()
2949 n->free_objects -= ac->avail; in cache_alloc_refill()
2951 spin_unlock(&n->list_lock); in cache_alloc_refill()
2955 if (unlikely(!ac->avail)) { in cache_alloc_refill()
2964 page = cache_grow_begin(cachep, gfp_exact_node(flags), node); in cache_alloc_refill()
2971 if (!ac->avail && page) in cache_alloc_refill()
2972 alloc_block(cachep, ac, page, batchcount); in cache_alloc_refill()
2973 cache_grow_end(cachep, page); in cache_alloc_refill()
2975 if (!ac->avail) in cache_alloc_refill()
2978 ac->touched = 1; in cache_alloc_refill()
2980 return ac->entry[--ac->avail]; in cache_alloc_refill()
2993 WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); in cache_alloc_debugcheck_after()
2996 if (cachep->flags & SLAB_POISON) { in cache_alloc_debugcheck_after()
3001 if (cachep->flags & SLAB_STORE_USER) in cache_alloc_debugcheck_after()
3004 if (cachep->flags & SLAB_RED_ZONE) { in cache_alloc_debugcheck_after()
3017 if (cachep->ctor && cachep->flags & SLAB_POISON) in cache_alloc_debugcheck_after()
3018 cachep->ctor(objp); in cache_alloc_debugcheck_after()
3019 if ((unsigned long)objp & (arch_slab_minalign() - 1)) { in cache_alloc_debugcheck_after()
3037 if (likely(ac->avail)) { in ____cache_alloc()
3038 ac->touched = 1; in ____cache_alloc()
3039 objp = ac->entry[--ac->avail]; in ____cache_alloc()
3056 * per-CPU caches is leaked, we need to make sure kmemleak doesn't in ____cache_alloc()
3060 kmemleak_erase(&ac->entry[ac->avail]); in ____cache_alloc()
3078 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) in alternate_node_alloc()
3080 else if (current->mempolicy) in alternate_node_alloc()
3091 * perform an allocation without specifying a node. This allows the page
3102 struct page *page; in fallback_alloc() local
3123 get_node(cache, nid)->free_objects) { in fallback_alloc()
3138 page = cache_grow_begin(cache, flags, numa_mem_id()); in fallback_alloc()
3139 cache_grow_end(cache, page); in fallback_alloc()
3140 if (page) { in fallback_alloc()
3141 nid = page_to_nid(page); in fallback_alloc()
3165 struct page *page; in ____cache_alloc_node() local
3175 spin_lock(&n->list_lock); in ____cache_alloc_node()
3176 page = get_first_slab(n, false); in ____cache_alloc_node()
3177 if (!page) in ____cache_alloc_node()
3186 BUG_ON(page->active == cachep->num); in ____cache_alloc_node()
3188 obj = slab_get_obj(cachep, page); in ____cache_alloc_node()
3189 n->free_objects--; in ____cache_alloc_node()
3191 fixup_slab_list(cachep, n, page, &list); in ____cache_alloc_node()
3193 spin_unlock(&n->list_lock); in ____cache_alloc_node()
3198 spin_unlock(&n->list_lock); in ____cache_alloc_node()
3199 page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); in ____cache_alloc_node()
3200 if (page) { in ____cache_alloc_node()
3202 obj = slab_get_obj(cachep, page); in ____cache_alloc_node()
3204 cache_grow_end(cachep, page); in ____cache_alloc_node()
3268 if (current->mempolicy || cpuset_do_slab_mem_spread()) { in __do_cache_alloc()
3334 struct page *page; in free_block() local
3336 n->free_objects += nr_objects; in free_block()
3340 struct page *page; in free_block() local
3344 page = virt_to_head_page(objp); in free_block()
3345 list_del(&page->slab_list); in free_block()
3347 slab_put_obj(cachep, page, objp); in free_block()
3351 if (page->active == 0) { in free_block()
3352 list_add(&page->slab_list, &n->slabs_free); in free_block()
3353 n->free_slabs++; in free_block()
3356 * partial list on free - maximum time for the in free_block()
3359 list_add_tail(&page->slab_list, &n->slabs_partial); in free_block()
3363 while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) { in free_block()
3364 n->free_objects -= cachep->num; in free_block()
3366 page = list_last_entry(&n->slabs_free, struct page, slab_list); in free_block()
3367 list_move(&page->slab_list, list); in free_block()
3368 n->free_slabs--; in free_block()
3369 n->total_slabs--; in free_block()
3380 batchcount = ac->batchcount; in cache_flusharray()
3384 spin_lock(&n->list_lock); in cache_flusharray()
3385 if (n->shared) { in cache_flusharray()
3386 struct array_cache *shared_array = n->shared; in cache_flusharray()
3387 int max = shared_array->limit - shared_array->avail; in cache_flusharray()
3391 memcpy(&(shared_array->entry[shared_array->avail]), in cache_flusharray()
3392 ac->entry, sizeof(void *) * batchcount); in cache_flusharray()
3393 shared_array->avail += batchcount; in cache_flusharray()
3398 free_block(cachep, ac->entry, batchcount, node, &list); in cache_flusharray()
3403 struct page *page; in cache_flusharray() local
3405 list_for_each_entry(page, &n->slabs_free, slab_list) { in cache_flusharray()
3406 BUG_ON(page->active); in cache_flusharray()
3413 spin_unlock(&n->list_lock); in cache_flusharray()
3414 ac->avail -= batchcount; in cache_flusharray()
3415 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); in cache_flusharray()
3429 kmemleak_free_recursive(objp, cachep->flags); in __cache_free()
3442 memset(objp, 0, cachep->object_size); in __cache_free()
3447 /* Use KCSAN to help debug racy use-after-free. */ in __cache_free()
3448 if (!(cachep->flags & SLAB_TYPESAFE_BY_RCU)) in __cache_free()
3449 __kcsan_check_access(objp, cachep->object_size, in __cache_free()
3461 kmemleak_free_recursive(objp, cachep->flags); in ___cache_free()
3468 * is per page memory reference) to get nodeid. Instead use a global in ___cache_free()
3475 if (ac->avail < ac->limit) { in ___cache_free()
3483 struct page *page = virt_to_head_page(objp); in ___cache_free() local
3485 if (unlikely(PageSlabPfmemalloc(page))) { in ___cache_free()
3486 cache_free_pfmemalloc(cachep, page, objp); in ___cache_free()
3495 * kmem_cache_alloc - Allocate an object
3506 void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_); in kmem_cache_alloc()
3509 cachep->object_size, cachep->size, flags); in kmem_cache_alloc()
3517 size_t size, void **p, unsigned long caller) in cache_alloc_debugcheck_after_bulk() argument
3521 for (i = 0; i < size; i++) in cache_alloc_debugcheck_after_bulk()
3525 int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, in kmem_cache_alloc_bulk() argument
3531 s = slab_pre_alloc_hook(s, &objcg, size, flags); in kmem_cache_alloc_bulk()
3538 for (i = 0; i < size; i++) { in kmem_cache_alloc_bulk()
3539 void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags); in kmem_cache_alloc_bulk()
3547 cache_alloc_debugcheck_after_bulk(s, flags, size, p, _RET_IP_); in kmem_cache_alloc_bulk()
3553 slab_post_alloc_hook(s, objcg, flags, size, p, in kmem_cache_alloc_bulk()
3556 return size; in kmem_cache_alloc_bulk()
3568 kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size) in kmem_cache_alloc_trace() argument
3572 ret = slab_alloc(cachep, flags, size, _RET_IP_); in kmem_cache_alloc_trace()
3574 ret = kasan_kmalloc(cachep, ret, size, flags); in kmem_cache_alloc_trace()
3576 size, cachep->size, flags); in kmem_cache_alloc_trace()
3584 * kmem_cache_alloc_node - Allocate an object on the specified node
3598 void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_); in kmem_cache_alloc_node()
3601 cachep->object_size, cachep->size, in kmem_cache_alloc_node()
3612 size_t size) in kmem_cache_alloc_node_trace() argument
3616 ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_); in kmem_cache_alloc_node_trace()
3618 ret = kasan_kmalloc(cachep, ret, size, flags); in kmem_cache_alloc_node_trace()
3620 size, cachep->size, in kmem_cache_alloc_node_trace()
3628 __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller) in __do_kmalloc_node() argument
3633 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) in __do_kmalloc_node()
3635 cachep = kmalloc_slab(size, flags); in __do_kmalloc_node()
3638 ret = kmem_cache_alloc_node_trace(cachep, flags, node, size); in __do_kmalloc_node()
3639 ret = kasan_kmalloc(cachep, ret, size, flags); in __do_kmalloc_node()
3644 void *__kmalloc_node(size_t size, gfp_t flags, int node) in __kmalloc_node() argument
3646 return __do_kmalloc_node(size, flags, node, _RET_IP_); in __kmalloc_node()
3650 void *__kmalloc_node_track_caller(size_t size, gfp_t flags, in __kmalloc_node_track_caller() argument
3653 return __do_kmalloc_node(size, flags, node, caller); in __kmalloc_node_track_caller()
3659 * __do_kmalloc - allocate memory
3660 * @size: how many bytes of memory are required.
3666 static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, in __do_kmalloc() argument
3672 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) in __do_kmalloc()
3674 cachep = kmalloc_slab(size, flags); in __do_kmalloc()
3677 ret = slab_alloc(cachep, flags, size, caller); in __do_kmalloc()
3679 ret = kasan_kmalloc(cachep, ret, size, flags); in __do_kmalloc()
3681 size, cachep->size, flags); in __do_kmalloc()
3686 void *__kmalloc(size_t size, gfp_t flags) in __kmalloc() argument
3688 return __do_kmalloc(size, flags, _RET_IP_); in __kmalloc()
3692 void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) in __kmalloc_track_caller() argument
3694 return __do_kmalloc(size, flags, caller); in __kmalloc_track_caller()
3699 * kmem_cache_free - Deallocate an object
3714 debug_check_no_locks_freed(objp, cachep->object_size); in kmem_cache_free()
3715 if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) in kmem_cache_free()
3716 debug_check_no_obj_freed(objp, cachep->object_size); in kmem_cache_free()
3724 void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) in kmem_cache_free_bulk() argument
3730 for (i = 0; i < size; i++) { in kmem_cache_free_bulk()
3740 debug_check_no_locks_freed(objp, s->object_size); in kmem_cache_free_bulk()
3741 if (!(s->flags & SLAB_DEBUG_OBJECTS)) in kmem_cache_free_bulk()
3742 debug_check_no_obj_freed(objp, s->object_size); in kmem_cache_free_bulk()
3753 * kfree - free previously allocated memory
3777 debug_check_no_locks_freed(objp, c->object_size); in kfree()
3779 debug_check_no_obj_freed(objp, c->object_size); in kfree()
3804 if (!cachep->list.next) { in setup_kmem_cache_nodes()
3806 node--; in setup_kmem_cache_nodes()
3810 kfree(n->shared); in setup_kmem_cache_nodes()
3811 free_alien_cache(n->alien); in setup_kmem_cache_nodes()
3813 cachep->node[node] = NULL; in setup_kmem_cache_nodes()
3815 node--; in setup_kmem_cache_nodes()
3818 return -ENOMEM; in setup_kmem_cache_nodes()
3830 return -ENOMEM; in do_tune_cpucache()
3832 prev = cachep->cpu_cache; in do_tune_cpucache()
3833 cachep->cpu_cache = cpu_cache; in do_tune_cpucache()
3842 cachep->batchcount = batchcount; in do_tune_cpucache()
3843 cachep->limit = limit; in do_tune_cpucache()
3844 cachep->shared = shared; in do_tune_cpucache()
3857 spin_lock_irq(&n->list_lock); in do_tune_cpucache()
3858 free_block(cachep, ac->entry, ac->avail, node, &list); in do_tune_cpucache()
3859 spin_unlock_irq(&n->list_lock); in do_tune_cpucache()
3876 err = cache_random_seq_create(cachep, cachep->num, gfp); in enable_cpucache()
3884 * - create a LIFO ordering, i.e. return objects that are cache-warm in enable_cpucache()
3885 * - reduce the number of spinlock operations. in enable_cpucache()
3886 * - reduce the number of linked list operations on the slab and in enable_cpucache()
3888 * The numbers are guessed, we should auto-tune as described by in enable_cpucache()
3891 if (cachep->size > 131072) in enable_cpucache()
3893 else if (cachep->size > PAGE_SIZE) in enable_cpucache()
3895 else if (cachep->size > 1024) in enable_cpucache()
3897 else if (cachep->size > 256) in enable_cpucache()
3912 if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1) in enable_cpucache()
3929 cachep->name, -err); in enable_cpucache()
3943 /* ac from n->shared can be freed if we don't hold the slab_mutex. */ in drain_array()
3946 if (!ac || !ac->avail) in drain_array()
3949 if (ac->touched) { in drain_array()
3950 ac->touched = 0; in drain_array()
3954 spin_lock_irq(&n->list_lock); in drain_array()
3956 spin_unlock_irq(&n->list_lock); in drain_array()
3962 * cache_reap - Reclaim memory from caches.
3967 * - clear the per-cpu caches for this CPU.
3968 * - return freeable pages to the main free memory pool.
3970 * If we cannot acquire the cache chain mutex then just give up - we'll try
4002 if (time_after(n->next_reap, jiffies)) in cache_reap()
4005 n->next_reap = jiffies + REAPTIMEOUT_NODE; in cache_reap()
4007 drain_array(searchp, n, n->shared, node); in cache_reap()
4009 if (n->free_touched) in cache_reap()
4010 n->free_touched = 0; in cache_reap()
4014 freed = drain_freelist(searchp, n, (n->free_limit + in cache_reap()
4015 5 * searchp->num - 1) / (5 * searchp->num)); in cache_reap()
4040 spin_lock_irq(&n->list_lock); in get_slabinfo()
4042 total_slabs += n->total_slabs; in get_slabinfo()
4043 free_slabs += n->free_slabs; in get_slabinfo()
4044 free_objs += n->free_objects; in get_slabinfo()
4046 if (n->shared) in get_slabinfo()
4047 shared_avail += n->shared->avail; in get_slabinfo()
4049 spin_unlock_irq(&n->list_lock); in get_slabinfo()
4051 num_objs = total_slabs * cachep->num; in get_slabinfo()
4052 active_slabs = total_slabs - free_slabs; in get_slabinfo()
4053 active_objs = num_objs - free_objs; in get_slabinfo()
4055 sinfo->active_objs = active_objs; in get_slabinfo()
4056 sinfo->num_objs = num_objs; in get_slabinfo()
4057 sinfo->active_slabs = active_slabs; in get_slabinfo()
4058 sinfo->num_slabs = total_slabs; in get_slabinfo()
4059 sinfo->shared_avail = shared_avail; in get_slabinfo()
4060 sinfo->limit = cachep->limit; in get_slabinfo()
4061 sinfo->batchcount = cachep->batchcount; in get_slabinfo()
4062 sinfo->shared = cachep->shared; in get_slabinfo()
4063 sinfo->objects_per_slab = cachep->num; in get_slabinfo()
4064 sinfo->cache_order = cachep->gfporder; in get_slabinfo()
4072 unsigned long high = cachep->high_mark; in slabinfo_show_stats()
4073 unsigned long allocs = cachep->num_allocations; in slabinfo_show_stats()
4074 unsigned long grown = cachep->grown; in slabinfo_show_stats()
4075 unsigned long reaped = cachep->reaped; in slabinfo_show_stats()
4076 unsigned long errors = cachep->errors; in slabinfo_show_stats()
4077 unsigned long max_freeable = cachep->max_freeable; in slabinfo_show_stats()
4078 unsigned long node_allocs = cachep->node_allocs; in slabinfo_show_stats()
4079 unsigned long node_frees = cachep->node_frees; in slabinfo_show_stats()
4080 unsigned long overflows = cachep->node_overflow; in slabinfo_show_stats()
4089 unsigned long allochit = atomic_read(&cachep->allochit); in slabinfo_show_stats()
4090 unsigned long allocmiss = atomic_read(&cachep->allocmiss); in slabinfo_show_stats()
4091 unsigned long freehit = atomic_read(&cachep->freehit); in slabinfo_show_stats()
4092 unsigned long freemiss = atomic_read(&cachep->freemiss); in slabinfo_show_stats()
4102 * slabinfo_write - Tuning for the slab allocator
4118 return -EINVAL; in slabinfo_write()
4120 return -EFAULT; in slabinfo_write()
4125 return -EINVAL; in slabinfo_write()
4129 return -EINVAL; in slabinfo_write()
4133 res = -EINVAL; in slabinfo_write()
4135 if (!strcmp(cachep->name, kbuf)) { in slabinfo_write()
4162 void __check_heap_object(const void *ptr, unsigned long n, struct page *page, in __check_heap_object() argument
4172 cachep = page->slab_cache; in __check_heap_object()
4173 objnr = obj_to_index(cachep, page, (void *)ptr); in __check_heap_object()
4174 BUG_ON(objnr >= cachep->num); in __check_heap_object()
4178 offset = ptr - kfence_object_start(ptr); in __check_heap_object()
4180 offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); in __check_heap_object()
4183 if (offset >= cachep->useroffset && in __check_heap_object()
4184 offset - cachep->useroffset <= cachep->usersize && in __check_heap_object()
4185 n <= cachep->useroffset - offset + cachep->usersize) in __check_heap_object()
4195 offset <= cachep->object_size && in __check_heap_object()
4196 n <= cachep->object_size - offset) { in __check_heap_object()
4197 usercopy_warn("SLAB object", cachep->name, to_user, offset, n); in __check_heap_object()
4201 usercopy_abort("SLAB object", cachep->name, to_user, offset, n); in __check_heap_object()
4206 * __ksize -- Uninstrumented ksize.
4212 * Return: size of the actual memory used by @objp in bytes
4217 size_t size; in __ksize() local
4224 size = c ? c->object_size : 0; in __ksize()
4226 return size; in __ksize()