1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * SLOB Allocator: Simple List Of Blocks
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Matt Mackall <mpm@selenic.com> 12/30/03
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * NUMA support by Paul Mundt, 2007.
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * How SLOB works:
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * The core of SLOB is a traditional K&R style heap allocator, with
12*4882a593Smuzhiyun * support for returning aligned objects. The granularity of this
13*4882a593Smuzhiyun * allocator is as little as 2 bytes, however typically most architectures
14*4882a593Smuzhiyun * will require 4 bytes on 32-bit and 8 bytes on 64-bit.
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * The slob heap is a set of linked list of pages from alloc_pages(),
17*4882a593Smuzhiyun * and within each page, there is a singly-linked list of free blocks
18*4882a593Smuzhiyun * (slob_t). The heap is grown on demand. To reduce fragmentation,
19*4882a593Smuzhiyun * heap pages are segregated into three lists, with objects less than
20*4882a593Smuzhiyun * 256 bytes, objects less than 1024 bytes, and all other objects.
21*4882a593Smuzhiyun *
22*4882a593Smuzhiyun * Allocation from heap involves first searching for a page with
23*4882a593Smuzhiyun * sufficient free blocks (using a next-fit-like approach) followed by
24*4882a593Smuzhiyun * a first-fit scan of the page. Deallocation inserts objects back
25*4882a593Smuzhiyun * into the free list in address order, so this is effectively an
26*4882a593Smuzhiyun * address-ordered first fit.
27*4882a593Smuzhiyun *
28*4882a593Smuzhiyun * Above this is an implementation of kmalloc/kfree. Blocks returned
29*4882a593Smuzhiyun * from kmalloc are prepended with a 4-byte header with the kmalloc size.
30*4882a593Smuzhiyun * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
31*4882a593Smuzhiyun * alloc_pages() directly, allocating compound pages so the page order
32*4882a593Smuzhiyun * does not have to be separately tracked.
33*4882a593Smuzhiyun * These objects are detected in kfree() because PageSlab()
34*4882a593Smuzhiyun * is false for them.
35*4882a593Smuzhiyun *
36*4882a593Smuzhiyun * SLAB is emulated on top of SLOB by simply calling constructors and
37*4882a593Smuzhiyun * destructors for every SLAB allocation. Objects are returned with the
38*4882a593Smuzhiyun * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
39*4882a593Smuzhiyun * case the low-level allocator will fragment blocks to create the proper
40*4882a593Smuzhiyun * alignment. Again, objects of page-size or greater are allocated by
41*4882a593Smuzhiyun * calling alloc_pages(). As SLAB objects know their size, no separate
42*4882a593Smuzhiyun * size bookkeeping is necessary and there is essentially no allocation
43*4882a593Smuzhiyun * space overhead, and compound pages aren't needed for multi-page
44*4882a593Smuzhiyun * allocations.
45*4882a593Smuzhiyun *
46*4882a593Smuzhiyun * NUMA support in SLOB is fairly simplistic, pushing most of the real
47*4882a593Smuzhiyun * logic down to the page allocator, and simply doing the node accounting
48*4882a593Smuzhiyun * on the upper levels. In the event that a node id is explicitly
49*4882a593Smuzhiyun * provided, __alloc_pages_node() with the specified node id is used
50*4882a593Smuzhiyun * instead. The common case (or when the node id isn't explicitly provided)
51*4882a593Smuzhiyun * will default to the current node, as per numa_node_id().
52*4882a593Smuzhiyun *
53*4882a593Smuzhiyun * Node aware pages are still inserted in to the global freelist, and
54*4882a593Smuzhiyun * these are scanned for by matching against the node id encoded in the
55*4882a593Smuzhiyun * page flags. As a result, block allocations that can be satisfied from
56*4882a593Smuzhiyun * the freelist will only be done so on pages residing on the same node,
57*4882a593Smuzhiyun * in order to prevent random node placement.
58*4882a593Smuzhiyun */
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun #include <linux/kernel.h>
61*4882a593Smuzhiyun #include <linux/slab.h>
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun #include <linux/mm.h>
64*4882a593Smuzhiyun #include <linux/swap.h> /* struct reclaim_state */
65*4882a593Smuzhiyun #include <linux/cache.h>
66*4882a593Smuzhiyun #include <linux/init.h>
67*4882a593Smuzhiyun #include <linux/export.h>
68*4882a593Smuzhiyun #include <linux/rcupdate.h>
69*4882a593Smuzhiyun #include <linux/list.h>
70*4882a593Smuzhiyun #include <linux/kmemleak.h>
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun #include <trace/events/kmem.h>
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun #include <linux/atomic.h>
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun #include "slab.h"
77*4882a593Smuzhiyun /*
78*4882a593Smuzhiyun * slob_block has a field 'units', which indicates size of block if +ve,
79*4882a593Smuzhiyun * or offset of next block if -ve (in SLOB_UNITs).
80*4882a593Smuzhiyun *
81*4882a593Smuzhiyun * Free blocks of size 1 unit simply contain the offset of the next block.
82*4882a593Smuzhiyun * Those with larger size contain their size in the first SLOB_UNIT of
83*4882a593Smuzhiyun * memory, and the offset of the next free block in the second SLOB_UNIT.
84*4882a593Smuzhiyun */
85*4882a593Smuzhiyun #if PAGE_SIZE <= (32767 * 2)
86*4882a593Smuzhiyun typedef s16 slobidx_t;
87*4882a593Smuzhiyun #else
88*4882a593Smuzhiyun typedef s32 slobidx_t;
89*4882a593Smuzhiyun #endif
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun struct slob_block {
92*4882a593Smuzhiyun slobidx_t units;
93*4882a593Smuzhiyun };
94*4882a593Smuzhiyun typedef struct slob_block slob_t;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun /*
97*4882a593Smuzhiyun * All partially free slob pages go on these lists.
98*4882a593Smuzhiyun */
99*4882a593Smuzhiyun #define SLOB_BREAK1 256
100*4882a593Smuzhiyun #define SLOB_BREAK2 1024
101*4882a593Smuzhiyun static LIST_HEAD(free_slob_small);
102*4882a593Smuzhiyun static LIST_HEAD(free_slob_medium);
103*4882a593Smuzhiyun static LIST_HEAD(free_slob_large);
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun /*
106*4882a593Smuzhiyun * slob_page_free: true for pages on free_slob_pages list.
107*4882a593Smuzhiyun */
slob_page_free(struct page * sp)108*4882a593Smuzhiyun static inline int slob_page_free(struct page *sp)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun return PageSlobFree(sp);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
set_slob_page_free(struct page * sp,struct list_head * list)113*4882a593Smuzhiyun static void set_slob_page_free(struct page *sp, struct list_head *list)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun list_add(&sp->slab_list, list);
116*4882a593Smuzhiyun __SetPageSlobFree(sp);
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
clear_slob_page_free(struct page * sp)119*4882a593Smuzhiyun static inline void clear_slob_page_free(struct page *sp)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun list_del(&sp->slab_list);
122*4882a593Smuzhiyun __ClearPageSlobFree(sp);
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun #define SLOB_UNIT sizeof(slob_t)
126*4882a593Smuzhiyun #define SLOB_UNITS(size) DIV_ROUND_UP(size, SLOB_UNIT)
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun /*
129*4882a593Smuzhiyun * struct slob_rcu is inserted at the tail of allocated slob blocks, which
130*4882a593Smuzhiyun * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free
131*4882a593Smuzhiyun * the block using call_rcu.
132*4882a593Smuzhiyun */
133*4882a593Smuzhiyun struct slob_rcu {
134*4882a593Smuzhiyun struct rcu_head head;
135*4882a593Smuzhiyun int size;
136*4882a593Smuzhiyun };
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun /*
139*4882a593Smuzhiyun * slob_lock protects all slob allocator structures.
140*4882a593Smuzhiyun */
141*4882a593Smuzhiyun static DEFINE_SPINLOCK(slob_lock);
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun /*
144*4882a593Smuzhiyun * Encode the given size and next info into a free slob block s.
145*4882a593Smuzhiyun */
set_slob(slob_t * s,slobidx_t size,slob_t * next)146*4882a593Smuzhiyun static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
149*4882a593Smuzhiyun slobidx_t offset = next - base;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun if (size > 1) {
152*4882a593Smuzhiyun s[0].units = size;
153*4882a593Smuzhiyun s[1].units = offset;
154*4882a593Smuzhiyun } else
155*4882a593Smuzhiyun s[0].units = -offset;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /*
159*4882a593Smuzhiyun * Return the size of a slob block.
160*4882a593Smuzhiyun */
slob_units(slob_t * s)161*4882a593Smuzhiyun static slobidx_t slob_units(slob_t *s)
162*4882a593Smuzhiyun {
163*4882a593Smuzhiyun if (s->units > 0)
164*4882a593Smuzhiyun return s->units;
165*4882a593Smuzhiyun return 1;
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun /*
169*4882a593Smuzhiyun * Return the next free slob block pointer after this one.
170*4882a593Smuzhiyun */
slob_next(slob_t * s)171*4882a593Smuzhiyun static slob_t *slob_next(slob_t *s)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
174*4882a593Smuzhiyun slobidx_t next;
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun if (s[0].units < 0)
177*4882a593Smuzhiyun next = -s[0].units;
178*4882a593Smuzhiyun else
179*4882a593Smuzhiyun next = s[1].units;
180*4882a593Smuzhiyun return base+next;
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun /*
184*4882a593Smuzhiyun * Returns true if s is the last free block in its page.
185*4882a593Smuzhiyun */
slob_last(slob_t * s)186*4882a593Smuzhiyun static int slob_last(slob_t *s)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun return !((unsigned long)slob_next(s) & ~PAGE_MASK);
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun
slob_new_pages(gfp_t gfp,int order,int node)191*4882a593Smuzhiyun static void *slob_new_pages(gfp_t gfp, int order, int node)
192*4882a593Smuzhiyun {
193*4882a593Smuzhiyun struct page *page;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun #ifdef CONFIG_NUMA
196*4882a593Smuzhiyun if (node != NUMA_NO_NODE)
197*4882a593Smuzhiyun page = __alloc_pages_node(node, gfp, order);
198*4882a593Smuzhiyun else
199*4882a593Smuzhiyun #endif
200*4882a593Smuzhiyun page = alloc_pages(gfp, order);
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun if (!page)
203*4882a593Smuzhiyun return NULL;
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
206*4882a593Smuzhiyun PAGE_SIZE << order);
207*4882a593Smuzhiyun return page_address(page);
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun
slob_free_pages(void * b,int order)210*4882a593Smuzhiyun static void slob_free_pages(void *b, int order)
211*4882a593Smuzhiyun {
212*4882a593Smuzhiyun struct page *sp = virt_to_page(b);
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun if (current->reclaim_state)
215*4882a593Smuzhiyun current->reclaim_state->reclaimed_slab += 1 << order;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
218*4882a593Smuzhiyun -(PAGE_SIZE << order));
219*4882a593Smuzhiyun __free_pages(sp, order);
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun /*
223*4882a593Smuzhiyun * slob_page_alloc() - Allocate a slob block within a given slob_page sp.
224*4882a593Smuzhiyun * @sp: Page to look in.
225*4882a593Smuzhiyun * @size: Size of the allocation.
226*4882a593Smuzhiyun * @align: Allocation alignment.
227*4882a593Smuzhiyun * @align_offset: Offset in the allocated block that will be aligned.
228*4882a593Smuzhiyun * @page_removed_from_list: Return parameter.
229*4882a593Smuzhiyun *
230*4882a593Smuzhiyun * Tries to find a chunk of memory at least @size bytes big within @page.
231*4882a593Smuzhiyun *
232*4882a593Smuzhiyun * Return: Pointer to memory if allocated, %NULL otherwise. If the
233*4882a593Smuzhiyun * allocation fills up @page then the page is removed from the
234*4882a593Smuzhiyun * freelist, in this case @page_removed_from_list will be set to
235*4882a593Smuzhiyun * true (set to false otherwise).
236*4882a593Smuzhiyun */
slob_page_alloc(struct page * sp,size_t size,int align,int align_offset,bool * page_removed_from_list)237*4882a593Smuzhiyun static void *slob_page_alloc(struct page *sp, size_t size, int align,
238*4882a593Smuzhiyun int align_offset, bool *page_removed_from_list)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun slob_t *prev, *cur, *aligned = NULL;
241*4882a593Smuzhiyun int delta = 0, units = SLOB_UNITS(size);
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun *page_removed_from_list = false;
244*4882a593Smuzhiyun for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
245*4882a593Smuzhiyun slobidx_t avail = slob_units(cur);
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun /*
248*4882a593Smuzhiyun * 'aligned' will hold the address of the slob block so that the
249*4882a593Smuzhiyun * address 'aligned'+'align_offset' is aligned according to the
250*4882a593Smuzhiyun * 'align' parameter. This is for kmalloc() which prepends the
251*4882a593Smuzhiyun * allocated block with its size, so that the block itself is
252*4882a593Smuzhiyun * aligned when needed.
253*4882a593Smuzhiyun */
254*4882a593Smuzhiyun if (align) {
255*4882a593Smuzhiyun aligned = (slob_t *)
256*4882a593Smuzhiyun (ALIGN((unsigned long)cur + align_offset, align)
257*4882a593Smuzhiyun - align_offset);
258*4882a593Smuzhiyun delta = aligned - cur;
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun if (avail >= units + delta) { /* room enough? */
261*4882a593Smuzhiyun slob_t *next;
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun if (delta) { /* need to fragment head to align? */
264*4882a593Smuzhiyun next = slob_next(cur);
265*4882a593Smuzhiyun set_slob(aligned, avail - delta, next);
266*4882a593Smuzhiyun set_slob(cur, delta, aligned);
267*4882a593Smuzhiyun prev = cur;
268*4882a593Smuzhiyun cur = aligned;
269*4882a593Smuzhiyun avail = slob_units(cur);
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun next = slob_next(cur);
273*4882a593Smuzhiyun if (avail == units) { /* exact fit? unlink. */
274*4882a593Smuzhiyun if (prev)
275*4882a593Smuzhiyun set_slob(prev, slob_units(prev), next);
276*4882a593Smuzhiyun else
277*4882a593Smuzhiyun sp->freelist = next;
278*4882a593Smuzhiyun } else { /* fragment */
279*4882a593Smuzhiyun if (prev)
280*4882a593Smuzhiyun set_slob(prev, slob_units(prev), cur + units);
281*4882a593Smuzhiyun else
282*4882a593Smuzhiyun sp->freelist = cur + units;
283*4882a593Smuzhiyun set_slob(cur + units, avail - units, next);
284*4882a593Smuzhiyun }
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun sp->units -= units;
287*4882a593Smuzhiyun if (!sp->units) {
288*4882a593Smuzhiyun clear_slob_page_free(sp);
289*4882a593Smuzhiyun *page_removed_from_list = true;
290*4882a593Smuzhiyun }
291*4882a593Smuzhiyun return cur;
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun if (slob_last(cur))
294*4882a593Smuzhiyun return NULL;
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun /*
299*4882a593Smuzhiyun * slob_alloc: entry point into the slob allocator.
300*4882a593Smuzhiyun */
slob_alloc(size_t size,gfp_t gfp,int align,int node,int align_offset)301*4882a593Smuzhiyun static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
302*4882a593Smuzhiyun int align_offset)
303*4882a593Smuzhiyun {
304*4882a593Smuzhiyun struct page *sp;
305*4882a593Smuzhiyun struct list_head *slob_list;
306*4882a593Smuzhiyun slob_t *b = NULL;
307*4882a593Smuzhiyun unsigned long flags;
308*4882a593Smuzhiyun bool _unused;
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun if (size < SLOB_BREAK1)
311*4882a593Smuzhiyun slob_list = &free_slob_small;
312*4882a593Smuzhiyun else if (size < SLOB_BREAK2)
313*4882a593Smuzhiyun slob_list = &free_slob_medium;
314*4882a593Smuzhiyun else
315*4882a593Smuzhiyun slob_list = &free_slob_large;
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun spin_lock_irqsave(&slob_lock, flags);
318*4882a593Smuzhiyun /* Iterate through each partially free page, try to find room */
319*4882a593Smuzhiyun list_for_each_entry(sp, slob_list, slab_list) {
320*4882a593Smuzhiyun bool page_removed_from_list = false;
321*4882a593Smuzhiyun #ifdef CONFIG_NUMA
322*4882a593Smuzhiyun /*
323*4882a593Smuzhiyun * If there's a node specification, search for a partial
324*4882a593Smuzhiyun * page with a matching node id in the freelist.
325*4882a593Smuzhiyun */
326*4882a593Smuzhiyun if (node != NUMA_NO_NODE && page_to_nid(sp) != node)
327*4882a593Smuzhiyun continue;
328*4882a593Smuzhiyun #endif
329*4882a593Smuzhiyun /* Enough room on this page? */
330*4882a593Smuzhiyun if (sp->units < SLOB_UNITS(size))
331*4882a593Smuzhiyun continue;
332*4882a593Smuzhiyun
333*4882a593Smuzhiyun b = slob_page_alloc(sp, size, align, align_offset, &page_removed_from_list);
334*4882a593Smuzhiyun if (!b)
335*4882a593Smuzhiyun continue;
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun /*
338*4882a593Smuzhiyun * If slob_page_alloc() removed sp from the list then we
339*4882a593Smuzhiyun * cannot call list functions on sp. If so allocation
340*4882a593Smuzhiyun * did not fragment the page anyway so optimisation is
341*4882a593Smuzhiyun * unnecessary.
342*4882a593Smuzhiyun */
343*4882a593Smuzhiyun if (!page_removed_from_list) {
344*4882a593Smuzhiyun /*
345*4882a593Smuzhiyun * Improve fragment distribution and reduce our average
346*4882a593Smuzhiyun * search time by starting our next search here. (see
347*4882a593Smuzhiyun * Knuth vol 1, sec 2.5, pg 449)
348*4882a593Smuzhiyun */
349*4882a593Smuzhiyun if (!list_is_first(&sp->slab_list, slob_list))
350*4882a593Smuzhiyun list_rotate_to_front(&sp->slab_list, slob_list);
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun break;
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun spin_unlock_irqrestore(&slob_lock, flags);
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun /* Not enough space: must allocate a new page */
357*4882a593Smuzhiyun if (!b) {
358*4882a593Smuzhiyun b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
359*4882a593Smuzhiyun if (!b)
360*4882a593Smuzhiyun return NULL;
361*4882a593Smuzhiyun sp = virt_to_page(b);
362*4882a593Smuzhiyun __SetPageSlab(sp);
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun spin_lock_irqsave(&slob_lock, flags);
365*4882a593Smuzhiyun sp->units = SLOB_UNITS(PAGE_SIZE);
366*4882a593Smuzhiyun sp->freelist = b;
367*4882a593Smuzhiyun INIT_LIST_HEAD(&sp->slab_list);
368*4882a593Smuzhiyun set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
369*4882a593Smuzhiyun set_slob_page_free(sp, slob_list);
370*4882a593Smuzhiyun b = slob_page_alloc(sp, size, align, align_offset, &_unused);
371*4882a593Smuzhiyun BUG_ON(!b);
372*4882a593Smuzhiyun spin_unlock_irqrestore(&slob_lock, flags);
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun if (unlikely(gfp & __GFP_ZERO))
375*4882a593Smuzhiyun memset(b, 0, size);
376*4882a593Smuzhiyun return b;
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun
379*4882a593Smuzhiyun /*
380*4882a593Smuzhiyun * slob_free: entry point into the slob allocator.
381*4882a593Smuzhiyun */
slob_free(void * block,int size)382*4882a593Smuzhiyun static void slob_free(void *block, int size)
383*4882a593Smuzhiyun {
384*4882a593Smuzhiyun struct page *sp;
385*4882a593Smuzhiyun slob_t *prev, *next, *b = (slob_t *)block;
386*4882a593Smuzhiyun slobidx_t units;
387*4882a593Smuzhiyun unsigned long flags;
388*4882a593Smuzhiyun struct list_head *slob_list;
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(block)))
391*4882a593Smuzhiyun return;
392*4882a593Smuzhiyun BUG_ON(!size);
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun sp = virt_to_page(block);
395*4882a593Smuzhiyun units = SLOB_UNITS(size);
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun spin_lock_irqsave(&slob_lock, flags);
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
400*4882a593Smuzhiyun /* Go directly to page allocator. Do not pass slob allocator */
401*4882a593Smuzhiyun if (slob_page_free(sp))
402*4882a593Smuzhiyun clear_slob_page_free(sp);
403*4882a593Smuzhiyun spin_unlock_irqrestore(&slob_lock, flags);
404*4882a593Smuzhiyun __ClearPageSlab(sp);
405*4882a593Smuzhiyun page_mapcount_reset(sp);
406*4882a593Smuzhiyun slob_free_pages(b, 0);
407*4882a593Smuzhiyun return;
408*4882a593Smuzhiyun }
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun if (!slob_page_free(sp)) {
411*4882a593Smuzhiyun /* This slob page is about to become partially free. Easy! */
412*4882a593Smuzhiyun sp->units = units;
413*4882a593Smuzhiyun sp->freelist = b;
414*4882a593Smuzhiyun set_slob(b, units,
415*4882a593Smuzhiyun (void *)((unsigned long)(b +
416*4882a593Smuzhiyun SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
417*4882a593Smuzhiyun if (size < SLOB_BREAK1)
418*4882a593Smuzhiyun slob_list = &free_slob_small;
419*4882a593Smuzhiyun else if (size < SLOB_BREAK2)
420*4882a593Smuzhiyun slob_list = &free_slob_medium;
421*4882a593Smuzhiyun else
422*4882a593Smuzhiyun slob_list = &free_slob_large;
423*4882a593Smuzhiyun set_slob_page_free(sp, slob_list);
424*4882a593Smuzhiyun goto out;
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun /*
428*4882a593Smuzhiyun * Otherwise the page is already partially free, so find reinsertion
429*4882a593Smuzhiyun * point.
430*4882a593Smuzhiyun */
431*4882a593Smuzhiyun sp->units += units;
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun if (b < (slob_t *)sp->freelist) {
434*4882a593Smuzhiyun if (b + units == sp->freelist) {
435*4882a593Smuzhiyun units += slob_units(sp->freelist);
436*4882a593Smuzhiyun sp->freelist = slob_next(sp->freelist);
437*4882a593Smuzhiyun }
438*4882a593Smuzhiyun set_slob(b, units, sp->freelist);
439*4882a593Smuzhiyun sp->freelist = b;
440*4882a593Smuzhiyun } else {
441*4882a593Smuzhiyun prev = sp->freelist;
442*4882a593Smuzhiyun next = slob_next(prev);
443*4882a593Smuzhiyun while (b > next) {
444*4882a593Smuzhiyun prev = next;
445*4882a593Smuzhiyun next = slob_next(prev);
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun if (!slob_last(prev) && b + units == next) {
449*4882a593Smuzhiyun units += slob_units(next);
450*4882a593Smuzhiyun set_slob(b, units, slob_next(next));
451*4882a593Smuzhiyun } else
452*4882a593Smuzhiyun set_slob(b, units, next);
453*4882a593Smuzhiyun
454*4882a593Smuzhiyun if (prev + slob_units(prev) == b) {
455*4882a593Smuzhiyun units = slob_units(b) + slob_units(prev);
456*4882a593Smuzhiyun set_slob(prev, units, slob_next(b));
457*4882a593Smuzhiyun } else
458*4882a593Smuzhiyun set_slob(prev, slob_units(prev), b);
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun out:
461*4882a593Smuzhiyun spin_unlock_irqrestore(&slob_lock, flags);
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun /*
465*4882a593Smuzhiyun * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
466*4882a593Smuzhiyun */
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun static __always_inline void *
__do_kmalloc_node(size_t size,gfp_t gfp,int node,unsigned long caller)469*4882a593Smuzhiyun __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
470*4882a593Smuzhiyun {
471*4882a593Smuzhiyun unsigned int *m;
472*4882a593Smuzhiyun unsigned int minalign;
473*4882a593Smuzhiyun void *ret;
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun minalign = max_t(unsigned int, ARCH_KMALLOC_MINALIGN,
476*4882a593Smuzhiyun arch_slab_minalign());
477*4882a593Smuzhiyun gfp &= gfp_allowed_mask;
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun fs_reclaim_acquire(gfp);
480*4882a593Smuzhiyun fs_reclaim_release(gfp);
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun if (size < PAGE_SIZE - minalign) {
483*4882a593Smuzhiyun int align = minalign;
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun /*
486*4882a593Smuzhiyun * For power of two sizes, guarantee natural alignment for
487*4882a593Smuzhiyun * kmalloc()'d objects.
488*4882a593Smuzhiyun */
489*4882a593Smuzhiyun if (is_power_of_2(size))
490*4882a593Smuzhiyun align = max_t(unsigned int, minalign, size);
491*4882a593Smuzhiyun
492*4882a593Smuzhiyun if (!size)
493*4882a593Smuzhiyun return ZERO_SIZE_PTR;
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun m = slob_alloc(size + minalign, gfp, align, node, minalign);
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun if (!m)
498*4882a593Smuzhiyun return NULL;
499*4882a593Smuzhiyun *m = size;
500*4882a593Smuzhiyun ret = (void *)m + minalign;
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun trace_kmalloc_node(caller, ret,
503*4882a593Smuzhiyun size, size + minalign, gfp, node);
504*4882a593Smuzhiyun } else {
505*4882a593Smuzhiyun unsigned int order = get_order(size);
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun if (likely(order))
508*4882a593Smuzhiyun gfp |= __GFP_COMP;
509*4882a593Smuzhiyun ret = slob_new_pages(gfp, order, node);
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun trace_kmalloc_node(caller, ret,
512*4882a593Smuzhiyun size, PAGE_SIZE << order, gfp, node);
513*4882a593Smuzhiyun }
514*4882a593Smuzhiyun
515*4882a593Smuzhiyun kmemleak_alloc(ret, size, 1, gfp);
516*4882a593Smuzhiyun return ret;
517*4882a593Smuzhiyun }
518*4882a593Smuzhiyun
__kmalloc(size_t size,gfp_t gfp)519*4882a593Smuzhiyun void *__kmalloc(size_t size, gfp_t gfp)
520*4882a593Smuzhiyun {
521*4882a593Smuzhiyun return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, _RET_IP_);
522*4882a593Smuzhiyun }
523*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc);
524*4882a593Smuzhiyun
__kmalloc_track_caller(size_t size,gfp_t gfp,unsigned long caller)525*4882a593Smuzhiyun void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller)
526*4882a593Smuzhiyun {
527*4882a593Smuzhiyun return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, caller);
528*4882a593Smuzhiyun }
529*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_track_caller);
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun #ifdef CONFIG_NUMA
__kmalloc_node_track_caller(size_t size,gfp_t gfp,int node,unsigned long caller)532*4882a593Smuzhiyun void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
533*4882a593Smuzhiyun int node, unsigned long caller)
534*4882a593Smuzhiyun {
535*4882a593Smuzhiyun return __do_kmalloc_node(size, gfp, node, caller);
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_node_track_caller);
538*4882a593Smuzhiyun #endif
539*4882a593Smuzhiyun
kfree(const void * block)540*4882a593Smuzhiyun void kfree(const void *block)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun struct page *sp;
543*4882a593Smuzhiyun
544*4882a593Smuzhiyun trace_kfree(_RET_IP_, block);
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun if (unlikely(ZERO_OR_NULL_PTR(block)))
547*4882a593Smuzhiyun return;
548*4882a593Smuzhiyun kmemleak_free(block);
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun sp = virt_to_page(block);
551*4882a593Smuzhiyun if (PageSlab(sp)) {
552*4882a593Smuzhiyun unsigned int align = max_t(unsigned int,
553*4882a593Smuzhiyun ARCH_KMALLOC_MINALIGN,
554*4882a593Smuzhiyun arch_slab_minalign());
555*4882a593Smuzhiyun unsigned int *m = (unsigned int *)(block - align);
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun slob_free(m, *m + align);
558*4882a593Smuzhiyun } else {
559*4882a593Smuzhiyun unsigned int order = compound_order(sp);
560*4882a593Smuzhiyun mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
561*4882a593Smuzhiyun -(PAGE_SIZE << order));
562*4882a593Smuzhiyun __free_pages(sp, order);
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun }
566*4882a593Smuzhiyun EXPORT_SYMBOL(kfree);
567*4882a593Smuzhiyun
568*4882a593Smuzhiyun /* can't use ksize for kmem_cache_alloc memory, only kmalloc */
__ksize(const void * block)569*4882a593Smuzhiyun size_t __ksize(const void *block)
570*4882a593Smuzhiyun {
571*4882a593Smuzhiyun struct page *sp;
572*4882a593Smuzhiyun unsigned int align;
573*4882a593Smuzhiyun unsigned int *m;
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun BUG_ON(!block);
576*4882a593Smuzhiyun if (unlikely(block == ZERO_SIZE_PTR))
577*4882a593Smuzhiyun return 0;
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun sp = virt_to_page(block);
580*4882a593Smuzhiyun if (unlikely(!PageSlab(sp)))
581*4882a593Smuzhiyun return page_size(sp);
582*4882a593Smuzhiyun
583*4882a593Smuzhiyun align = max_t(unsigned int, ARCH_KMALLOC_MINALIGN,
584*4882a593Smuzhiyun arch_slab_minalign());
585*4882a593Smuzhiyun m = (unsigned int *)(block - align);
586*4882a593Smuzhiyun return SLOB_UNITS(*m) * SLOB_UNIT;
587*4882a593Smuzhiyun }
588*4882a593Smuzhiyun EXPORT_SYMBOL(__ksize);
589*4882a593Smuzhiyun
__kmem_cache_create(struct kmem_cache * c,slab_flags_t flags)590*4882a593Smuzhiyun int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags)
591*4882a593Smuzhiyun {
592*4882a593Smuzhiyun if (flags & SLAB_TYPESAFE_BY_RCU) {
593*4882a593Smuzhiyun /* leave room for rcu footer at the end of object */
594*4882a593Smuzhiyun c->size += sizeof(struct slob_rcu);
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun c->flags = flags;
597*4882a593Smuzhiyun return 0;
598*4882a593Smuzhiyun }
599*4882a593Smuzhiyun
slob_alloc_node(struct kmem_cache * c,gfp_t flags,int node)600*4882a593Smuzhiyun static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
601*4882a593Smuzhiyun {
602*4882a593Smuzhiyun void *b;
603*4882a593Smuzhiyun
604*4882a593Smuzhiyun flags &= gfp_allowed_mask;
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun fs_reclaim_acquire(flags);
607*4882a593Smuzhiyun fs_reclaim_release(flags);
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun if (c->size < PAGE_SIZE) {
610*4882a593Smuzhiyun b = slob_alloc(c->size, flags, c->align, node, 0);
611*4882a593Smuzhiyun trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
612*4882a593Smuzhiyun SLOB_UNITS(c->size) * SLOB_UNIT,
613*4882a593Smuzhiyun flags, node);
614*4882a593Smuzhiyun } else {
615*4882a593Smuzhiyun b = slob_new_pages(flags, get_order(c->size), node);
616*4882a593Smuzhiyun trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
617*4882a593Smuzhiyun PAGE_SIZE << get_order(c->size),
618*4882a593Smuzhiyun flags, node);
619*4882a593Smuzhiyun }
620*4882a593Smuzhiyun
621*4882a593Smuzhiyun if (b && c->ctor) {
622*4882a593Smuzhiyun WARN_ON_ONCE(flags & __GFP_ZERO);
623*4882a593Smuzhiyun c->ctor(b);
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
627*4882a593Smuzhiyun return b;
628*4882a593Smuzhiyun }
629*4882a593Smuzhiyun
kmem_cache_alloc(struct kmem_cache * cachep,gfp_t flags)630*4882a593Smuzhiyun void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
631*4882a593Smuzhiyun {
632*4882a593Smuzhiyun return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
633*4882a593Smuzhiyun }
634*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc);
635*4882a593Smuzhiyun
636*4882a593Smuzhiyun #ifdef CONFIG_NUMA
__kmalloc_node(size_t size,gfp_t gfp,int node)637*4882a593Smuzhiyun void *__kmalloc_node(size_t size, gfp_t gfp, int node)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun return __do_kmalloc_node(size, gfp, node, _RET_IP_);
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun EXPORT_SYMBOL(__kmalloc_node);
642*4882a593Smuzhiyun
kmem_cache_alloc_node(struct kmem_cache * cachep,gfp_t gfp,int node)643*4882a593Smuzhiyun void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t gfp, int node)
644*4882a593Smuzhiyun {
645*4882a593Smuzhiyun return slob_alloc_node(cachep, gfp, node);
646*4882a593Smuzhiyun }
647*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_node);
648*4882a593Smuzhiyun #endif
649*4882a593Smuzhiyun
__kmem_cache_free(void * b,int size)650*4882a593Smuzhiyun static void __kmem_cache_free(void *b, int size)
651*4882a593Smuzhiyun {
652*4882a593Smuzhiyun if (size < PAGE_SIZE)
653*4882a593Smuzhiyun slob_free(b, size);
654*4882a593Smuzhiyun else
655*4882a593Smuzhiyun slob_free_pages(b, get_order(size));
656*4882a593Smuzhiyun }
657*4882a593Smuzhiyun
kmem_rcu_free(struct rcu_head * head)658*4882a593Smuzhiyun static void kmem_rcu_free(struct rcu_head *head)
659*4882a593Smuzhiyun {
660*4882a593Smuzhiyun struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
661*4882a593Smuzhiyun void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun __kmem_cache_free(b, slob_rcu->size);
664*4882a593Smuzhiyun }
665*4882a593Smuzhiyun
kmem_cache_free(struct kmem_cache * c,void * b)666*4882a593Smuzhiyun void kmem_cache_free(struct kmem_cache *c, void *b)
667*4882a593Smuzhiyun {
668*4882a593Smuzhiyun kmemleak_free_recursive(b, c->flags);
669*4882a593Smuzhiyun if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
670*4882a593Smuzhiyun struct slob_rcu *slob_rcu;
671*4882a593Smuzhiyun slob_rcu = b + (c->size - sizeof(struct slob_rcu));
672*4882a593Smuzhiyun slob_rcu->size = c->size;
673*4882a593Smuzhiyun call_rcu(&slob_rcu->head, kmem_rcu_free);
674*4882a593Smuzhiyun } else {
675*4882a593Smuzhiyun __kmem_cache_free(b, c->size);
676*4882a593Smuzhiyun }
677*4882a593Smuzhiyun
678*4882a593Smuzhiyun trace_kmem_cache_free(_RET_IP_, b);
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_free);
681*4882a593Smuzhiyun
kmem_cache_free_bulk(struct kmem_cache * s,size_t size,void ** p)682*4882a593Smuzhiyun void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
683*4882a593Smuzhiyun {
684*4882a593Smuzhiyun __kmem_cache_free_bulk(s, size, p);
685*4882a593Smuzhiyun }
686*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_free_bulk);
687*4882a593Smuzhiyun
kmem_cache_alloc_bulk(struct kmem_cache * s,gfp_t flags,size_t size,void ** p)688*4882a593Smuzhiyun int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
689*4882a593Smuzhiyun void **p)
690*4882a593Smuzhiyun {
691*4882a593Smuzhiyun return __kmem_cache_alloc_bulk(s, flags, size, p);
692*4882a593Smuzhiyun }
693*4882a593Smuzhiyun EXPORT_SYMBOL(kmem_cache_alloc_bulk);
694*4882a593Smuzhiyun
__kmem_cache_shutdown(struct kmem_cache * c)695*4882a593Smuzhiyun int __kmem_cache_shutdown(struct kmem_cache *c)
696*4882a593Smuzhiyun {
697*4882a593Smuzhiyun /* No way to check for remaining objects */
698*4882a593Smuzhiyun return 0;
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun
__kmem_cache_release(struct kmem_cache * c)701*4882a593Smuzhiyun void __kmem_cache_release(struct kmem_cache *c)
702*4882a593Smuzhiyun {
703*4882a593Smuzhiyun }
704*4882a593Smuzhiyun
__kmem_cache_shrink(struct kmem_cache * d)705*4882a593Smuzhiyun int __kmem_cache_shrink(struct kmem_cache *d)
706*4882a593Smuzhiyun {
707*4882a593Smuzhiyun return 0;
708*4882a593Smuzhiyun }
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun struct kmem_cache kmem_cache_boot = {
711*4882a593Smuzhiyun .name = "kmem_cache",
712*4882a593Smuzhiyun .size = sizeof(struct kmem_cache),
713*4882a593Smuzhiyun .flags = SLAB_PANIC,
714*4882a593Smuzhiyun .align = ARCH_KMALLOC_MINALIGN,
715*4882a593Smuzhiyun };
716*4882a593Smuzhiyun
kmem_cache_init(void)717*4882a593Smuzhiyun void __init kmem_cache_init(void)
718*4882a593Smuzhiyun {
719*4882a593Smuzhiyun kmem_cache = &kmem_cache_boot;
720*4882a593Smuzhiyun slab_state = UP;
721*4882a593Smuzhiyun }
722*4882a593Smuzhiyun
kmem_cache_init_late(void)723*4882a593Smuzhiyun void __init kmem_cache_init_late(void)
724*4882a593Smuzhiyun {
725*4882a593Smuzhiyun slab_state = FULL;
726*4882a593Smuzhiyun }
727