1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef _LINUX_MMZONE_H
3*4882a593Smuzhiyun #define _LINUX_MMZONE_H
4*4882a593Smuzhiyun
5*4882a593Smuzhiyun #ifndef __ASSEMBLY__
6*4882a593Smuzhiyun #ifndef __GENERATING_BOUNDS_H
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/spinlock.h>
9*4882a593Smuzhiyun #include <linux/list.h>
10*4882a593Smuzhiyun #include <linux/wait.h>
11*4882a593Smuzhiyun #include <linux/bitops.h>
12*4882a593Smuzhiyun #include <linux/cache.h>
13*4882a593Smuzhiyun #include <linux/threads.h>
14*4882a593Smuzhiyun #include <linux/numa.h>
15*4882a593Smuzhiyun #include <linux/init.h>
16*4882a593Smuzhiyun #include <linux/seqlock.h>
17*4882a593Smuzhiyun #include <linux/nodemask.h>
18*4882a593Smuzhiyun #include <linux/pageblock-flags.h>
19*4882a593Smuzhiyun #include <linux/page-flags-layout.h>
20*4882a593Smuzhiyun #include <linux/atomic.h>
21*4882a593Smuzhiyun #include <linux/mm_types.h>
22*4882a593Smuzhiyun #include <linux/page-flags.h>
23*4882a593Smuzhiyun #include <linux/android_kabi.h>
24*4882a593Smuzhiyun #include <asm/page.h>
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun /* Free memory management - zoned buddy allocator. */
27*4882a593Smuzhiyun #ifndef CONFIG_FORCE_MAX_ZONEORDER
28*4882a593Smuzhiyun #define MAX_ORDER 11
29*4882a593Smuzhiyun #else
30*4882a593Smuzhiyun #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
31*4882a593Smuzhiyun #endif
32*4882a593Smuzhiyun #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun /*
35*4882a593Smuzhiyun * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
36*4882a593Smuzhiyun * costly to service. That is between allocation orders which should
37*4882a593Smuzhiyun * coalesce naturally under reasonable reclaim pressure and those which
38*4882a593Smuzhiyun * will not.
39*4882a593Smuzhiyun */
40*4882a593Smuzhiyun #define PAGE_ALLOC_COSTLY_ORDER 3
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun #define MAX_KSWAPD_THREADS 16
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun enum migratetype {
45*4882a593Smuzhiyun MIGRATE_UNMOVABLE,
46*4882a593Smuzhiyun MIGRATE_MOVABLE,
47*4882a593Smuzhiyun MIGRATE_RECLAIMABLE,
48*4882a593Smuzhiyun #ifdef CONFIG_CMA
49*4882a593Smuzhiyun /*
50*4882a593Smuzhiyun * MIGRATE_CMA migration type is designed to mimic the way
51*4882a593Smuzhiyun * ZONE_MOVABLE works. Only movable pages can be allocated
52*4882a593Smuzhiyun * from MIGRATE_CMA pageblocks and page allocator never
53*4882a593Smuzhiyun * implicitly change migration type of MIGRATE_CMA pageblock.
54*4882a593Smuzhiyun *
55*4882a593Smuzhiyun * The way to use it is to change migratetype of a range of
56*4882a593Smuzhiyun * pageblocks to MIGRATE_CMA which can be done by
57*4882a593Smuzhiyun * __free_pageblock_cma() function. What is important though
58*4882a593Smuzhiyun * is that a range of pageblocks must be aligned to
59*4882a593Smuzhiyun * MAX_ORDER_NR_PAGES should biggest page be bigger then
60*4882a593Smuzhiyun * a single pageblock.
61*4882a593Smuzhiyun */
62*4882a593Smuzhiyun MIGRATE_CMA,
63*4882a593Smuzhiyun #endif
64*4882a593Smuzhiyun MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
65*4882a593Smuzhiyun MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
66*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_ISOLATION
67*4882a593Smuzhiyun MIGRATE_ISOLATE, /* can't allocate from here */
68*4882a593Smuzhiyun #endif
69*4882a593Smuzhiyun MIGRATE_TYPES
70*4882a593Smuzhiyun };
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */
73*4882a593Smuzhiyun extern const char * const migratetype_names[MIGRATE_TYPES];
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun #ifdef CONFIG_CMA
76*4882a593Smuzhiyun # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
77*4882a593Smuzhiyun # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
78*4882a593Smuzhiyun # define get_cma_migrate_type() MIGRATE_CMA
79*4882a593Smuzhiyun #else
80*4882a593Smuzhiyun # define is_migrate_cma(migratetype) false
81*4882a593Smuzhiyun # define is_migrate_cma_page(_page) false
82*4882a593Smuzhiyun # define get_cma_migrate_type() MIGRATE_MOVABLE
83*4882a593Smuzhiyun #endif
84*4882a593Smuzhiyun
is_migrate_movable(int mt)85*4882a593Smuzhiyun static inline bool is_migrate_movable(int mt)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE;
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun #define for_each_migratetype_order(order, type) \
91*4882a593Smuzhiyun for (order = 0; order < MAX_ORDER; order++) \
92*4882a593Smuzhiyun for (type = 0; type < MIGRATE_TYPES; type++)
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun extern int page_group_by_mobility_disabled;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun #define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun #define get_pageblock_migratetype(page) \
99*4882a593Smuzhiyun get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun struct free_area {
102*4882a593Smuzhiyun struct list_head free_list[MIGRATE_TYPES];
103*4882a593Smuzhiyun unsigned long nr_free;
104*4882a593Smuzhiyun };
105*4882a593Smuzhiyun
get_page_from_free_area(struct free_area * area,int migratetype)106*4882a593Smuzhiyun static inline struct page *get_page_from_free_area(struct free_area *area,
107*4882a593Smuzhiyun int migratetype)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun return list_first_entry_or_null(&area->free_list[migratetype],
110*4882a593Smuzhiyun struct page, lru);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
free_area_empty(struct free_area * area,int migratetype)113*4882a593Smuzhiyun static inline bool free_area_empty(struct free_area *area, int migratetype)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun return list_empty(&area->free_list[migratetype]);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun struct pglist_data;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun /*
121*4882a593Smuzhiyun * zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
122*4882a593Smuzhiyun * So add a wild amount of padding here to ensure that they fall into separate
123*4882a593Smuzhiyun * cachelines. There are very few zone structures in the machine, so space
124*4882a593Smuzhiyun * consumption is not a concern here.
125*4882a593Smuzhiyun */
126*4882a593Smuzhiyun #if defined(CONFIG_SMP)
127*4882a593Smuzhiyun struct zone_padding {
128*4882a593Smuzhiyun char x[0];
129*4882a593Smuzhiyun } ____cacheline_internodealigned_in_smp;
130*4882a593Smuzhiyun #define ZONE_PADDING(name) struct zone_padding name;
131*4882a593Smuzhiyun #else
132*4882a593Smuzhiyun #define ZONE_PADDING(name)
133*4882a593Smuzhiyun #endif
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun #ifdef CONFIG_NUMA
136*4882a593Smuzhiyun enum numa_stat_item {
137*4882a593Smuzhiyun NUMA_HIT, /* allocated in intended node */
138*4882a593Smuzhiyun NUMA_MISS, /* allocated in non intended node */
139*4882a593Smuzhiyun NUMA_FOREIGN, /* was intended here, hit elsewhere */
140*4882a593Smuzhiyun NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
141*4882a593Smuzhiyun NUMA_LOCAL, /* allocation from local node */
142*4882a593Smuzhiyun NUMA_OTHER, /* allocation from other node */
143*4882a593Smuzhiyun NR_VM_NUMA_STAT_ITEMS
144*4882a593Smuzhiyun };
145*4882a593Smuzhiyun #else
146*4882a593Smuzhiyun #define NR_VM_NUMA_STAT_ITEMS 0
147*4882a593Smuzhiyun #endif
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun enum zone_stat_item {
150*4882a593Smuzhiyun /* First 128 byte cacheline (assuming 64 bit words) */
151*4882a593Smuzhiyun NR_FREE_PAGES,
152*4882a593Smuzhiyun NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
153*4882a593Smuzhiyun NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
154*4882a593Smuzhiyun NR_ZONE_ACTIVE_ANON,
155*4882a593Smuzhiyun NR_ZONE_INACTIVE_FILE,
156*4882a593Smuzhiyun NR_ZONE_ACTIVE_FILE,
157*4882a593Smuzhiyun NR_ZONE_UNEVICTABLE,
158*4882a593Smuzhiyun NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
159*4882a593Smuzhiyun NR_MLOCK, /* mlock()ed pages found and moved off LRU */
160*4882a593Smuzhiyun NR_PAGETABLE, /* used for pagetables */
161*4882a593Smuzhiyun /* Second 128 byte cacheline */
162*4882a593Smuzhiyun NR_BOUNCE,
163*4882a593Smuzhiyun NR_ZSPAGES, /* allocated in zsmalloc */
164*4882a593Smuzhiyun NR_FREE_CMA_PAGES,
165*4882a593Smuzhiyun NR_VM_ZONE_STAT_ITEMS };
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun enum node_stat_item {
168*4882a593Smuzhiyun NR_LRU_BASE,
169*4882a593Smuzhiyun NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
170*4882a593Smuzhiyun NR_ACTIVE_ANON, /* " " " " " */
171*4882a593Smuzhiyun NR_INACTIVE_FILE, /* " " " " " */
172*4882a593Smuzhiyun NR_ACTIVE_FILE, /* " " " " " */
173*4882a593Smuzhiyun NR_UNEVICTABLE, /* " " " " " */
174*4882a593Smuzhiyun NR_SLAB_RECLAIMABLE_B,
175*4882a593Smuzhiyun NR_SLAB_UNRECLAIMABLE_B,
176*4882a593Smuzhiyun NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
177*4882a593Smuzhiyun NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
178*4882a593Smuzhiyun WORKINGSET_NODES,
179*4882a593Smuzhiyun WORKINGSET_REFAULT_BASE,
180*4882a593Smuzhiyun WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE,
181*4882a593Smuzhiyun WORKINGSET_REFAULT_FILE,
182*4882a593Smuzhiyun WORKINGSET_ACTIVATE_BASE,
183*4882a593Smuzhiyun WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE,
184*4882a593Smuzhiyun WORKINGSET_ACTIVATE_FILE,
185*4882a593Smuzhiyun WORKINGSET_RESTORE_BASE,
186*4882a593Smuzhiyun WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE,
187*4882a593Smuzhiyun WORKINGSET_RESTORE_FILE,
188*4882a593Smuzhiyun WORKINGSET_NODERECLAIM,
189*4882a593Smuzhiyun NR_ANON_MAPPED, /* Mapped anonymous pages */
190*4882a593Smuzhiyun NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
191*4882a593Smuzhiyun only modified from process context */
192*4882a593Smuzhiyun NR_FILE_PAGES,
193*4882a593Smuzhiyun NR_FILE_DIRTY,
194*4882a593Smuzhiyun NR_WRITEBACK,
195*4882a593Smuzhiyun NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
196*4882a593Smuzhiyun NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
197*4882a593Smuzhiyun NR_SHMEM_THPS,
198*4882a593Smuzhiyun NR_SHMEM_PMDMAPPED,
199*4882a593Smuzhiyun NR_FILE_THPS,
200*4882a593Smuzhiyun NR_FILE_PMDMAPPED,
201*4882a593Smuzhiyun NR_ANON_THPS,
202*4882a593Smuzhiyun NR_VMSCAN_WRITE,
203*4882a593Smuzhiyun NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
204*4882a593Smuzhiyun NR_DIRTIED, /* page dirtyings since bootup */
205*4882a593Smuzhiyun NR_WRITTEN, /* page writings since bootup */
206*4882a593Smuzhiyun NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
207*4882a593Smuzhiyun NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */
208*4882a593Smuzhiyun NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
209*4882a593Smuzhiyun NR_KERNEL_STACK_KB, /* measured in KiB */
210*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
211*4882a593Smuzhiyun NR_KERNEL_SCS_KB, /* measured in KiB */
212*4882a593Smuzhiyun #endif
213*4882a593Smuzhiyun NR_VM_NODE_STAT_ITEMS
214*4882a593Smuzhiyun };
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun /*
217*4882a593Smuzhiyun * Returns true if the value is measured in bytes (most vmstat values are
218*4882a593Smuzhiyun * measured in pages). This defines the API part, the internal representation
219*4882a593Smuzhiyun * might be different.
220*4882a593Smuzhiyun */
vmstat_item_in_bytes(int idx)221*4882a593Smuzhiyun static __always_inline bool vmstat_item_in_bytes(int idx)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun /*
224*4882a593Smuzhiyun * Global and per-node slab counters track slab pages.
225*4882a593Smuzhiyun * It's expected that changes are multiples of PAGE_SIZE.
226*4882a593Smuzhiyun * Internally values are stored in pages.
227*4882a593Smuzhiyun *
228*4882a593Smuzhiyun * Per-memcg and per-lruvec counters track memory, consumed
229*4882a593Smuzhiyun * by individual slab objects. These counters are actually
230*4882a593Smuzhiyun * byte-precise.
231*4882a593Smuzhiyun */
232*4882a593Smuzhiyun return (idx == NR_SLAB_RECLAIMABLE_B ||
233*4882a593Smuzhiyun idx == NR_SLAB_UNRECLAIMABLE_B);
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun /*
237*4882a593Smuzhiyun * We do arithmetic on the LRU lists in various places in the code,
238*4882a593Smuzhiyun * so it is important to keep the active lists LRU_ACTIVE higher in
239*4882a593Smuzhiyun * the array than the corresponding inactive lists, and to keep
240*4882a593Smuzhiyun * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
241*4882a593Smuzhiyun *
242*4882a593Smuzhiyun * This has to be kept in sync with the statistics in zone_stat_item
243*4882a593Smuzhiyun * above and the descriptions in vmstat_text in mm/vmstat.c
244*4882a593Smuzhiyun */
245*4882a593Smuzhiyun #define LRU_BASE 0
246*4882a593Smuzhiyun #define LRU_ACTIVE 1
247*4882a593Smuzhiyun #define LRU_FILE 2
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun enum lru_list {
250*4882a593Smuzhiyun LRU_INACTIVE_ANON = LRU_BASE,
251*4882a593Smuzhiyun LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
252*4882a593Smuzhiyun LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
253*4882a593Smuzhiyun LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
254*4882a593Smuzhiyun LRU_UNEVICTABLE,
255*4882a593Smuzhiyun NR_LRU_LISTS
256*4882a593Smuzhiyun };
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++)
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)
261*4882a593Smuzhiyun
is_file_lru(enum lru_list lru)262*4882a593Smuzhiyun static inline bool is_file_lru(enum lru_list lru)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE);
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun
is_active_lru(enum lru_list lru)267*4882a593Smuzhiyun static inline bool is_active_lru(enum lru_list lru)
268*4882a593Smuzhiyun {
269*4882a593Smuzhiyun return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun #define ANON_AND_FILE 2
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun enum lruvec_flags {
275*4882a593Smuzhiyun LRUVEC_CONGESTED, /* lruvec has many dirty pages
276*4882a593Smuzhiyun * backed by a congested BDI
277*4882a593Smuzhiyun */
278*4882a593Smuzhiyun };
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun struct lruvec {
281*4882a593Smuzhiyun struct list_head lists[NR_LRU_LISTS];
282*4882a593Smuzhiyun /*
283*4882a593Smuzhiyun * These track the cost of reclaiming one LRU - file or anon -
284*4882a593Smuzhiyun * over the other. As the observed cost of reclaiming one LRU
285*4882a593Smuzhiyun * increases, the reclaim scan balance tips toward the other.
286*4882a593Smuzhiyun */
287*4882a593Smuzhiyun unsigned long anon_cost;
288*4882a593Smuzhiyun unsigned long file_cost;
289*4882a593Smuzhiyun /* Non-resident age, driven by LRU movement */
290*4882a593Smuzhiyun atomic_long_t nonresident_age;
291*4882a593Smuzhiyun /* Refaults at the time of last reclaim cycle */
292*4882a593Smuzhiyun unsigned long refaults[ANON_AND_FILE];
293*4882a593Smuzhiyun /* Various lruvec state flags (enum lruvec_flags) */
294*4882a593Smuzhiyun unsigned long flags;
295*4882a593Smuzhiyun #ifdef CONFIG_MEMCG
296*4882a593Smuzhiyun struct pglist_data *pgdat;
297*4882a593Smuzhiyun #endif
298*4882a593Smuzhiyun };
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun /* Isolate unmapped pages */
301*4882a593Smuzhiyun #define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2)
302*4882a593Smuzhiyun /* Isolate for asynchronous migration */
303*4882a593Smuzhiyun #define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
304*4882a593Smuzhiyun /* Isolate unevictable pages */
305*4882a593Smuzhiyun #define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8)
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun /* LRU Isolation modes. */
308*4882a593Smuzhiyun typedef unsigned __bitwise isolate_mode_t;
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun enum zone_watermarks {
311*4882a593Smuzhiyun WMARK_MIN,
312*4882a593Smuzhiyun WMARK_LOW,
313*4882a593Smuzhiyun WMARK_HIGH,
314*4882a593Smuzhiyun NR_WMARK
315*4882a593Smuzhiyun };
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
318*4882a593Smuzhiyun #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
319*4882a593Smuzhiyun #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
320*4882a593Smuzhiyun #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun struct per_cpu_pages {
323*4882a593Smuzhiyun int count; /* number of pages in the list */
324*4882a593Smuzhiyun int high; /* high watermark, emptying needed */
325*4882a593Smuzhiyun int batch; /* chunk size for buddy add/remove */
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun /* Lists of pages, one per migrate type stored on the pcp-lists */
328*4882a593Smuzhiyun struct list_head lists[MIGRATE_PCPTYPES];
329*4882a593Smuzhiyun };
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun struct per_cpu_pageset {
332*4882a593Smuzhiyun struct per_cpu_pages pcp;
333*4882a593Smuzhiyun #ifdef CONFIG_NUMA
334*4882a593Smuzhiyun s8 expire;
335*4882a593Smuzhiyun u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
336*4882a593Smuzhiyun #endif
337*4882a593Smuzhiyun #ifdef CONFIG_SMP
338*4882a593Smuzhiyun s8 stat_threshold;
339*4882a593Smuzhiyun s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
340*4882a593Smuzhiyun #endif
341*4882a593Smuzhiyun };
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun struct per_cpu_nodestat {
344*4882a593Smuzhiyun s8 stat_threshold;
345*4882a593Smuzhiyun s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
346*4882a593Smuzhiyun };
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun #endif /* !__GENERATING_BOUNDS.H */
349*4882a593Smuzhiyun
350*4882a593Smuzhiyun enum zone_type {
351*4882a593Smuzhiyun /*
352*4882a593Smuzhiyun * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able
353*4882a593Smuzhiyun * to DMA to all of the addressable memory (ZONE_NORMAL).
354*4882a593Smuzhiyun * On architectures where this area covers the whole 32 bit address
355*4882a593Smuzhiyun * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller
356*4882a593Smuzhiyun * DMA addressing constraints. This distinction is important as a 32bit
357*4882a593Smuzhiyun * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
358*4882a593Smuzhiyun * platforms may need both zones as they support peripherals with
359*4882a593Smuzhiyun * different DMA addressing limitations.
360*4882a593Smuzhiyun */
361*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA
362*4882a593Smuzhiyun ZONE_DMA,
363*4882a593Smuzhiyun #endif
364*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA32
365*4882a593Smuzhiyun ZONE_DMA32,
366*4882a593Smuzhiyun #endif
367*4882a593Smuzhiyun /*
368*4882a593Smuzhiyun * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
369*4882a593Smuzhiyun * performed on pages in ZONE_NORMAL if the DMA devices support
370*4882a593Smuzhiyun * transfers to all addressable memory.
371*4882a593Smuzhiyun */
372*4882a593Smuzhiyun ZONE_NORMAL,
373*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
374*4882a593Smuzhiyun /*
375*4882a593Smuzhiyun * A memory area that is only addressable by the kernel through
376*4882a593Smuzhiyun * mapping portions into its own address space. This is for example
377*4882a593Smuzhiyun * used by i386 to allow the kernel to address the memory beyond
378*4882a593Smuzhiyun * 900MB. The kernel will set up special mappings (page
379*4882a593Smuzhiyun * table entries on i386) for each page that the kernel needs to
380*4882a593Smuzhiyun * access.
381*4882a593Smuzhiyun */
382*4882a593Smuzhiyun ZONE_HIGHMEM,
383*4882a593Smuzhiyun #endif
384*4882a593Smuzhiyun /*
385*4882a593Smuzhiyun * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains
386*4882a593Smuzhiyun * movable pages with few exceptional cases described below. Main use
387*4882a593Smuzhiyun * cases for ZONE_MOVABLE are to make memory offlining/unplug more
388*4882a593Smuzhiyun * likely to succeed, and to locally limit unmovable allocations - e.g.,
389*4882a593Smuzhiyun * to increase the number of THP/huge pages. Notable special cases are:
390*4882a593Smuzhiyun *
391*4882a593Smuzhiyun * 1. Pinned pages: (long-term) pinning of movable pages might
392*4882a593Smuzhiyun * essentially turn such pages unmovable. Memory offlining might
393*4882a593Smuzhiyun * retry a long time.
394*4882a593Smuzhiyun * 2. memblock allocations: kernelcore/movablecore setups might create
395*4882a593Smuzhiyun * situations where ZONE_MOVABLE contains unmovable allocations
396*4882a593Smuzhiyun * after boot. Memory offlining and allocations fail early.
397*4882a593Smuzhiyun * 3. Memory holes: kernelcore/movablecore setups might create very rare
398*4882a593Smuzhiyun * situations where ZONE_MOVABLE contains memory holes after boot,
399*4882a593Smuzhiyun * for example, if we have sections that are only partially
400*4882a593Smuzhiyun * populated. Memory offlining and allocations fail early.
401*4882a593Smuzhiyun * 4. PG_hwpoison pages: while poisoned pages can be skipped during
402*4882a593Smuzhiyun * memory offlining, such pages cannot be allocated.
403*4882a593Smuzhiyun * 5. Unmovable PG_offline pages: in paravirtualized environments,
404*4882a593Smuzhiyun * hotplugged memory blocks might only partially be managed by the
405*4882a593Smuzhiyun * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The
406*4882a593Smuzhiyun * parts not manged by the buddy are unmovable PG_offline pages. In
407*4882a593Smuzhiyun * some cases (virtio-mem), such pages can be skipped during
408*4882a593Smuzhiyun * memory offlining, however, cannot be moved/allocated. These
409*4882a593Smuzhiyun * techniques might use alloc_contig_range() to hide previously
410*4882a593Smuzhiyun * exposed pages from the buddy again (e.g., to implement some sort
411*4882a593Smuzhiyun * of memory unplug in virtio-mem).
412*4882a593Smuzhiyun *
413*4882a593Smuzhiyun * In general, no unmovable allocations that degrade memory offlining
414*4882a593Smuzhiyun * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range())
415*4882a593Smuzhiyun * have to expect that migrating pages in ZONE_MOVABLE can fail (even
416*4882a593Smuzhiyun * if has_unmovable_pages() states that there are no unmovable pages,
417*4882a593Smuzhiyun * there can be false negatives).
418*4882a593Smuzhiyun */
419*4882a593Smuzhiyun ZONE_MOVABLE,
420*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DEVICE
421*4882a593Smuzhiyun ZONE_DEVICE,
422*4882a593Smuzhiyun #endif
423*4882a593Smuzhiyun __MAX_NR_ZONES
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun };
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun #ifndef __GENERATING_BOUNDS_H
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun #define ASYNC_AND_SYNC 2
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun struct zone {
432*4882a593Smuzhiyun /* Read-mostly fields */
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun /* zone watermarks, access with *_wmark_pages(zone) macros */
435*4882a593Smuzhiyun unsigned long _watermark[NR_WMARK];
436*4882a593Smuzhiyun unsigned long watermark_boost;
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun unsigned long nr_reserved_highatomic;
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun /*
441*4882a593Smuzhiyun * We don't know if the memory that we're going to allocate will be
442*4882a593Smuzhiyun * freeable or/and it will be released eventually, so to avoid totally
443*4882a593Smuzhiyun * wasting several GB of ram we must reserve some of the lower zone
444*4882a593Smuzhiyun * memory (otherwise we risk to run OOM on the lower zones despite
445*4882a593Smuzhiyun * there being tons of freeable ram on the higher zones). This array is
446*4882a593Smuzhiyun * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
447*4882a593Smuzhiyun * changes.
448*4882a593Smuzhiyun */
449*4882a593Smuzhiyun long lowmem_reserve[MAX_NR_ZONES];
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun #ifdef CONFIG_NEED_MULTIPLE_NODES
452*4882a593Smuzhiyun int node;
453*4882a593Smuzhiyun #endif
454*4882a593Smuzhiyun struct pglist_data *zone_pgdat;
455*4882a593Smuzhiyun struct per_cpu_pageset __percpu *pageset;
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun #ifndef CONFIG_SPARSEMEM
458*4882a593Smuzhiyun /*
459*4882a593Smuzhiyun * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
460*4882a593Smuzhiyun * In SPARSEMEM, this map is stored in struct mem_section
461*4882a593Smuzhiyun */
462*4882a593Smuzhiyun unsigned long *pageblock_flags;
463*4882a593Smuzhiyun #endif /* CONFIG_SPARSEMEM */
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
466*4882a593Smuzhiyun unsigned long zone_start_pfn;
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun /*
469*4882a593Smuzhiyun * spanned_pages is the total pages spanned by the zone, including
470*4882a593Smuzhiyun * holes, which is calculated as:
471*4882a593Smuzhiyun * spanned_pages = zone_end_pfn - zone_start_pfn;
472*4882a593Smuzhiyun *
473*4882a593Smuzhiyun * present_pages is physical pages existing within the zone, which
474*4882a593Smuzhiyun * is calculated as:
475*4882a593Smuzhiyun * present_pages = spanned_pages - absent_pages(pages in holes);
476*4882a593Smuzhiyun *
477*4882a593Smuzhiyun * managed_pages is present pages managed by the buddy system, which
478*4882a593Smuzhiyun * is calculated as (reserved_pages includes pages allocated by the
479*4882a593Smuzhiyun * bootmem allocator):
480*4882a593Smuzhiyun * managed_pages = present_pages - reserved_pages;
481*4882a593Smuzhiyun *
482*4882a593Smuzhiyun * cma pages is present pages that are assigned for CMA use
483*4882a593Smuzhiyun * (MIGRATE_CMA).
484*4882a593Smuzhiyun *
485*4882a593Smuzhiyun * So present_pages may be used by memory hotplug or memory power
486*4882a593Smuzhiyun * management logic to figure out unmanaged pages by checking
487*4882a593Smuzhiyun * (present_pages - managed_pages). And managed_pages should be used
488*4882a593Smuzhiyun * by page allocator and vm scanner to calculate all kinds of watermarks
489*4882a593Smuzhiyun * and thresholds.
490*4882a593Smuzhiyun *
491*4882a593Smuzhiyun * Locking rules:
492*4882a593Smuzhiyun *
493*4882a593Smuzhiyun * zone_start_pfn and spanned_pages are protected by span_seqlock.
494*4882a593Smuzhiyun * It is a seqlock because it has to be read outside of zone->lock,
495*4882a593Smuzhiyun * and it is done in the main allocator path. But, it is written
496*4882a593Smuzhiyun * quite infrequently.
497*4882a593Smuzhiyun *
498*4882a593Smuzhiyun * The span_seq lock is declared along with zone->lock because it is
499*4882a593Smuzhiyun * frequently read in proximity to zone->lock. It's good to
500*4882a593Smuzhiyun * give them a chance of being in the same cacheline.
501*4882a593Smuzhiyun *
502*4882a593Smuzhiyun * Write access to present_pages at runtime should be protected by
503*4882a593Smuzhiyun * mem_hotplug_begin/end(). Any reader who can't tolerant drift of
504*4882a593Smuzhiyun * present_pages should get_online_mems() to get a stable value.
505*4882a593Smuzhiyun */
506*4882a593Smuzhiyun atomic_long_t managed_pages;
507*4882a593Smuzhiyun unsigned long spanned_pages;
508*4882a593Smuzhiyun unsigned long present_pages;
509*4882a593Smuzhiyun #ifdef CONFIG_CMA
510*4882a593Smuzhiyun unsigned long cma_pages;
511*4882a593Smuzhiyun #endif
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun const char *name;
514*4882a593Smuzhiyun
515*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_ISOLATION
516*4882a593Smuzhiyun /*
517*4882a593Smuzhiyun * Number of isolated pageblock. It is used to solve incorrect
518*4882a593Smuzhiyun * freepage counting problem due to racy retrieving migratetype
519*4882a593Smuzhiyun * of pageblock. Protected by zone->lock.
520*4882a593Smuzhiyun */
521*4882a593Smuzhiyun unsigned long nr_isolate_pageblock;
522*4882a593Smuzhiyun #endif
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
525*4882a593Smuzhiyun /* see spanned/present_pages for more description */
526*4882a593Smuzhiyun seqlock_t span_seqlock;
527*4882a593Smuzhiyun #endif
528*4882a593Smuzhiyun
529*4882a593Smuzhiyun int initialized;
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun /* Write-intensive fields used from the page allocator */
532*4882a593Smuzhiyun ZONE_PADDING(_pad1_)
533*4882a593Smuzhiyun
534*4882a593Smuzhiyun /* free areas of different sizes */
535*4882a593Smuzhiyun struct free_area free_area[MAX_ORDER];
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun /* zone flags, see below */
538*4882a593Smuzhiyun unsigned long flags;
539*4882a593Smuzhiyun
540*4882a593Smuzhiyun /* Primarily protects free_area */
541*4882a593Smuzhiyun spinlock_t lock;
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun /* Write-intensive fields used by compaction and vmstats. */
544*4882a593Smuzhiyun ZONE_PADDING(_pad2_)
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun /*
547*4882a593Smuzhiyun * When free pages are below this point, additional steps are taken
548*4882a593Smuzhiyun * when reading the number of free pages to avoid per-cpu counter
549*4882a593Smuzhiyun * drift allowing watermarks to be breached
550*4882a593Smuzhiyun */
551*4882a593Smuzhiyun unsigned long percpu_drift_mark;
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun #if defined CONFIG_COMPACTION || defined CONFIG_CMA
554*4882a593Smuzhiyun /* pfn where compaction free scanner should start */
555*4882a593Smuzhiyun unsigned long compact_cached_free_pfn;
556*4882a593Smuzhiyun /* pfn where compaction migration scanner should start */
557*4882a593Smuzhiyun unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC];
558*4882a593Smuzhiyun unsigned long compact_init_migrate_pfn;
559*4882a593Smuzhiyun unsigned long compact_init_free_pfn;
560*4882a593Smuzhiyun #endif
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun #ifdef CONFIG_COMPACTION
563*4882a593Smuzhiyun /*
564*4882a593Smuzhiyun * On compaction failure, 1<<compact_defer_shift compactions
565*4882a593Smuzhiyun * are skipped before trying again. The number attempted since
566*4882a593Smuzhiyun * last failure is tracked with compact_considered.
567*4882a593Smuzhiyun * compact_order_failed is the minimum compaction failed order.
568*4882a593Smuzhiyun */
569*4882a593Smuzhiyun unsigned int compact_considered;
570*4882a593Smuzhiyun unsigned int compact_defer_shift;
571*4882a593Smuzhiyun int compact_order_failed;
572*4882a593Smuzhiyun #endif
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun #if defined CONFIG_COMPACTION || defined CONFIG_CMA
575*4882a593Smuzhiyun /* Set to true when the PG_migrate_skip bits should be cleared */
576*4882a593Smuzhiyun bool compact_blockskip_flush;
577*4882a593Smuzhiyun #endif
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun bool contiguous;
580*4882a593Smuzhiyun
581*4882a593Smuzhiyun ZONE_PADDING(_pad3_)
582*4882a593Smuzhiyun /* Zone statistics */
583*4882a593Smuzhiyun atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
584*4882a593Smuzhiyun atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun ANDROID_KABI_RESERVE(1);
587*4882a593Smuzhiyun ANDROID_KABI_RESERVE(2);
588*4882a593Smuzhiyun ANDROID_KABI_RESERVE(3);
589*4882a593Smuzhiyun ANDROID_KABI_RESERVE(4);
590*4882a593Smuzhiyun } ____cacheline_internodealigned_in_smp;
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun enum pgdat_flags {
593*4882a593Smuzhiyun PGDAT_DIRTY, /* reclaim scanning has recently found
594*4882a593Smuzhiyun * many dirty file pages at the tail
595*4882a593Smuzhiyun * of the LRU.
596*4882a593Smuzhiyun */
597*4882a593Smuzhiyun PGDAT_WRITEBACK, /* reclaim scanning has recently found
598*4882a593Smuzhiyun * many pages under writeback
599*4882a593Smuzhiyun */
600*4882a593Smuzhiyun PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
601*4882a593Smuzhiyun };
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun enum zone_flags {
604*4882a593Smuzhiyun ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks.
605*4882a593Smuzhiyun * Cleared when kswapd is woken.
606*4882a593Smuzhiyun */
607*4882a593Smuzhiyun };
608*4882a593Smuzhiyun
zone_managed_pages(struct zone * zone)609*4882a593Smuzhiyun static inline unsigned long zone_managed_pages(struct zone *zone)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun return (unsigned long)atomic_long_read(&zone->managed_pages);
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun
zone_cma_pages(struct zone * zone)614*4882a593Smuzhiyun static inline unsigned long zone_cma_pages(struct zone *zone)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun #ifdef CONFIG_CMA
617*4882a593Smuzhiyun return zone->cma_pages;
618*4882a593Smuzhiyun #else
619*4882a593Smuzhiyun return 0;
620*4882a593Smuzhiyun #endif
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun
zone_end_pfn(const struct zone * zone)623*4882a593Smuzhiyun static inline unsigned long zone_end_pfn(const struct zone *zone)
624*4882a593Smuzhiyun {
625*4882a593Smuzhiyun return zone->zone_start_pfn + zone->spanned_pages;
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun
zone_spans_pfn(const struct zone * zone,unsigned long pfn)628*4882a593Smuzhiyun static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
629*4882a593Smuzhiyun {
630*4882a593Smuzhiyun return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun
zone_is_initialized(struct zone * zone)633*4882a593Smuzhiyun static inline bool zone_is_initialized(struct zone *zone)
634*4882a593Smuzhiyun {
635*4882a593Smuzhiyun return zone->initialized;
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun
zone_is_empty(struct zone * zone)638*4882a593Smuzhiyun static inline bool zone_is_empty(struct zone *zone)
639*4882a593Smuzhiyun {
640*4882a593Smuzhiyun return zone->spanned_pages == 0;
641*4882a593Smuzhiyun }
642*4882a593Smuzhiyun
643*4882a593Smuzhiyun /*
644*4882a593Smuzhiyun * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
645*4882a593Smuzhiyun * intersection with the given zone
646*4882a593Smuzhiyun */
zone_intersects(struct zone * zone,unsigned long start_pfn,unsigned long nr_pages)647*4882a593Smuzhiyun static inline bool zone_intersects(struct zone *zone,
648*4882a593Smuzhiyun unsigned long start_pfn, unsigned long nr_pages)
649*4882a593Smuzhiyun {
650*4882a593Smuzhiyun if (zone_is_empty(zone))
651*4882a593Smuzhiyun return false;
652*4882a593Smuzhiyun if (start_pfn >= zone_end_pfn(zone) ||
653*4882a593Smuzhiyun start_pfn + nr_pages <= zone->zone_start_pfn)
654*4882a593Smuzhiyun return false;
655*4882a593Smuzhiyun
656*4882a593Smuzhiyun return true;
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun
659*4882a593Smuzhiyun /*
660*4882a593Smuzhiyun * The "priority" of VM scanning is how much of the queues we will scan in one
661*4882a593Smuzhiyun * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
662*4882a593Smuzhiyun * queues ("queue_length >> 12") during an aging round.
663*4882a593Smuzhiyun */
664*4882a593Smuzhiyun #define DEF_PRIORITY 12
665*4882a593Smuzhiyun
666*4882a593Smuzhiyun /* Maximum number of zones on a zonelist */
667*4882a593Smuzhiyun #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES)
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun enum {
670*4882a593Smuzhiyun ZONELIST_FALLBACK, /* zonelist with fallback */
671*4882a593Smuzhiyun #ifdef CONFIG_NUMA
672*4882a593Smuzhiyun /*
673*4882a593Smuzhiyun * The NUMA zonelists are doubled because we need zonelists that
674*4882a593Smuzhiyun * restrict the allocations to a single node for __GFP_THISNODE.
675*4882a593Smuzhiyun */
676*4882a593Smuzhiyun ZONELIST_NOFALLBACK, /* zonelist without fallback (__GFP_THISNODE) */
677*4882a593Smuzhiyun #endif
678*4882a593Smuzhiyun MAX_ZONELISTS
679*4882a593Smuzhiyun };
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun /*
682*4882a593Smuzhiyun * This struct contains information about a zone in a zonelist. It is stored
683*4882a593Smuzhiyun * here to avoid dereferences into large structures and lookups of tables
684*4882a593Smuzhiyun */
685*4882a593Smuzhiyun struct zoneref {
686*4882a593Smuzhiyun struct zone *zone; /* Pointer to actual zone */
687*4882a593Smuzhiyun int zone_idx; /* zone_idx(zoneref->zone) */
688*4882a593Smuzhiyun };
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun /*
691*4882a593Smuzhiyun * One allocation request operates on a zonelist. A zonelist
692*4882a593Smuzhiyun * is a list of zones, the first one is the 'goal' of the
693*4882a593Smuzhiyun * allocation, the other zones are fallback zones, in decreasing
694*4882a593Smuzhiyun * priority.
695*4882a593Smuzhiyun *
696*4882a593Smuzhiyun * To speed the reading of the zonelist, the zonerefs contain the zone index
697*4882a593Smuzhiyun * of the entry being read. Helper functions to access information given
698*4882a593Smuzhiyun * a struct zoneref are
699*4882a593Smuzhiyun *
700*4882a593Smuzhiyun * zonelist_zone() - Return the struct zone * for an entry in _zonerefs
701*4882a593Smuzhiyun * zonelist_zone_idx() - Return the index of the zone for an entry
702*4882a593Smuzhiyun * zonelist_node_idx() - Return the index of the node for an entry
703*4882a593Smuzhiyun */
704*4882a593Smuzhiyun struct zonelist {
705*4882a593Smuzhiyun struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
706*4882a593Smuzhiyun };
707*4882a593Smuzhiyun
708*4882a593Smuzhiyun #ifndef CONFIG_DISCONTIGMEM
709*4882a593Smuzhiyun /* The array of struct pages - for discontigmem use pgdat->lmem_map */
710*4882a593Smuzhiyun extern struct page *mem_map;
711*4882a593Smuzhiyun #endif
712*4882a593Smuzhiyun
713*4882a593Smuzhiyun #ifdef CONFIG_TRANSPARENT_HUGEPAGE
714*4882a593Smuzhiyun struct deferred_split {
715*4882a593Smuzhiyun spinlock_t split_queue_lock;
716*4882a593Smuzhiyun struct list_head split_queue;
717*4882a593Smuzhiyun unsigned long split_queue_len;
718*4882a593Smuzhiyun };
719*4882a593Smuzhiyun #endif
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun /*
722*4882a593Smuzhiyun * On NUMA machines, each NUMA node would have a pg_data_t to describe
723*4882a593Smuzhiyun * it's memory layout. On UMA machines there is a single pglist_data which
724*4882a593Smuzhiyun * describes the whole memory.
725*4882a593Smuzhiyun *
726*4882a593Smuzhiyun * Memory statistics and page replacement data structures are maintained on a
727*4882a593Smuzhiyun * per-zone basis.
728*4882a593Smuzhiyun */
729*4882a593Smuzhiyun typedef struct pglist_data {
730*4882a593Smuzhiyun /*
731*4882a593Smuzhiyun * node_zones contains just the zones for THIS node. Not all of the
732*4882a593Smuzhiyun * zones may be populated, but it is the full list. It is referenced by
733*4882a593Smuzhiyun * this node's node_zonelists as well as other node's node_zonelists.
734*4882a593Smuzhiyun */
735*4882a593Smuzhiyun struct zone node_zones[MAX_NR_ZONES];
736*4882a593Smuzhiyun
737*4882a593Smuzhiyun /*
738*4882a593Smuzhiyun * node_zonelists contains references to all zones in all nodes.
739*4882a593Smuzhiyun * Generally the first zones will be references to this node's
740*4882a593Smuzhiyun * node_zones.
741*4882a593Smuzhiyun */
742*4882a593Smuzhiyun struct zonelist node_zonelists[MAX_ZONELISTS];
743*4882a593Smuzhiyun
744*4882a593Smuzhiyun int nr_zones; /* number of populated zones in this node */
745*4882a593Smuzhiyun #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
746*4882a593Smuzhiyun struct page *node_mem_map;
747*4882a593Smuzhiyun #ifdef CONFIG_PAGE_EXTENSION
748*4882a593Smuzhiyun struct page_ext *node_page_ext;
749*4882a593Smuzhiyun #endif
750*4882a593Smuzhiyun #endif
751*4882a593Smuzhiyun #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
752*4882a593Smuzhiyun /*
753*4882a593Smuzhiyun * Must be held any time you expect node_start_pfn,
754*4882a593Smuzhiyun * node_present_pages, node_spanned_pages or nr_zones to stay constant.
755*4882a593Smuzhiyun * Also synchronizes pgdat->first_deferred_pfn during deferred page
756*4882a593Smuzhiyun * init.
757*4882a593Smuzhiyun *
758*4882a593Smuzhiyun * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
759*4882a593Smuzhiyun * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
760*4882a593Smuzhiyun * or CONFIG_DEFERRED_STRUCT_PAGE_INIT.
761*4882a593Smuzhiyun *
762*4882a593Smuzhiyun * Nests above zone->lock and zone->span_seqlock
763*4882a593Smuzhiyun */
764*4882a593Smuzhiyun spinlock_t node_size_lock;
765*4882a593Smuzhiyun #endif
766*4882a593Smuzhiyun unsigned long node_start_pfn;
767*4882a593Smuzhiyun unsigned long node_present_pages; /* total number of physical pages */
768*4882a593Smuzhiyun unsigned long node_spanned_pages; /* total size of physical page
769*4882a593Smuzhiyun range, including holes */
770*4882a593Smuzhiyun int node_id;
771*4882a593Smuzhiyun wait_queue_head_t kswapd_wait;
772*4882a593Smuzhiyun wait_queue_head_t pfmemalloc_wait;
773*4882a593Smuzhiyun struct task_struct *kswapd; /* Protected by
774*4882a593Smuzhiyun mem_hotplug_begin/end() */
775*4882a593Smuzhiyun struct task_struct *mkswapd[MAX_KSWAPD_THREADS];
776*4882a593Smuzhiyun int kswapd_order;
777*4882a593Smuzhiyun enum zone_type kswapd_highest_zoneidx;
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun int kswapd_failures; /* Number of 'reclaimed == 0' runs */
780*4882a593Smuzhiyun
781*4882a593Smuzhiyun ANDROID_OEM_DATA(1);
782*4882a593Smuzhiyun #ifdef CONFIG_COMPACTION
783*4882a593Smuzhiyun int kcompactd_max_order;
784*4882a593Smuzhiyun enum zone_type kcompactd_highest_zoneidx;
785*4882a593Smuzhiyun wait_queue_head_t kcompactd_wait;
786*4882a593Smuzhiyun struct task_struct *kcompactd;
787*4882a593Smuzhiyun bool proactive_compact_trigger;
788*4882a593Smuzhiyun #endif
789*4882a593Smuzhiyun /*
790*4882a593Smuzhiyun * This is a per-node reserve of pages that are not available
791*4882a593Smuzhiyun * to userspace allocations.
792*4882a593Smuzhiyun */
793*4882a593Smuzhiyun unsigned long totalreserve_pages;
794*4882a593Smuzhiyun
795*4882a593Smuzhiyun #ifdef CONFIG_NUMA
796*4882a593Smuzhiyun /*
797*4882a593Smuzhiyun * node reclaim becomes active if more unmapped pages exist.
798*4882a593Smuzhiyun */
799*4882a593Smuzhiyun unsigned long min_unmapped_pages;
800*4882a593Smuzhiyun unsigned long min_slab_pages;
801*4882a593Smuzhiyun #endif /* CONFIG_NUMA */
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun /* Write-intensive fields used by page reclaim */
804*4882a593Smuzhiyun ZONE_PADDING(_pad1_)
805*4882a593Smuzhiyun spinlock_t lru_lock;
806*4882a593Smuzhiyun
807*4882a593Smuzhiyun #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
808*4882a593Smuzhiyun /*
809*4882a593Smuzhiyun * If memory initialisation on large machines is deferred then this
810*4882a593Smuzhiyun * is the first PFN that needs to be initialised.
811*4882a593Smuzhiyun */
812*4882a593Smuzhiyun unsigned long first_deferred_pfn;
813*4882a593Smuzhiyun #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
814*4882a593Smuzhiyun
815*4882a593Smuzhiyun #ifdef CONFIG_TRANSPARENT_HUGEPAGE
816*4882a593Smuzhiyun struct deferred_split deferred_split_queue;
817*4882a593Smuzhiyun #endif
818*4882a593Smuzhiyun
819*4882a593Smuzhiyun /* Fields commonly accessed by the page reclaim scanner */
820*4882a593Smuzhiyun
821*4882a593Smuzhiyun /*
822*4882a593Smuzhiyun * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED.
823*4882a593Smuzhiyun *
824*4882a593Smuzhiyun * Use mem_cgroup_lruvec() to look up lruvecs.
825*4882a593Smuzhiyun */
826*4882a593Smuzhiyun struct lruvec __lruvec;
827*4882a593Smuzhiyun
828*4882a593Smuzhiyun unsigned long flags;
829*4882a593Smuzhiyun
830*4882a593Smuzhiyun ZONE_PADDING(_pad2_)
831*4882a593Smuzhiyun
832*4882a593Smuzhiyun /* Per-node vmstats */
833*4882a593Smuzhiyun struct per_cpu_nodestat __percpu *per_cpu_nodestats;
834*4882a593Smuzhiyun atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
835*4882a593Smuzhiyun } pg_data_t;
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
838*4882a593Smuzhiyun #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
839*4882a593Smuzhiyun #ifdef CONFIG_FLAT_NODE_MEM_MAP
840*4882a593Smuzhiyun #define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr))
841*4882a593Smuzhiyun #else
842*4882a593Smuzhiyun #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr))
843*4882a593Smuzhiyun #endif
844*4882a593Smuzhiyun #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr))
845*4882a593Smuzhiyun
846*4882a593Smuzhiyun #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
847*4882a593Smuzhiyun #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
848*4882a593Smuzhiyun
pgdat_end_pfn(pg_data_t * pgdat)849*4882a593Smuzhiyun static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
850*4882a593Smuzhiyun {
851*4882a593Smuzhiyun return pgdat->node_start_pfn + pgdat->node_spanned_pages;
852*4882a593Smuzhiyun }
853*4882a593Smuzhiyun
pgdat_is_empty(pg_data_t * pgdat)854*4882a593Smuzhiyun static inline bool pgdat_is_empty(pg_data_t *pgdat)
855*4882a593Smuzhiyun {
856*4882a593Smuzhiyun return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
857*4882a593Smuzhiyun }
858*4882a593Smuzhiyun
859*4882a593Smuzhiyun #include <linux/memory_hotplug.h>
860*4882a593Smuzhiyun
861*4882a593Smuzhiyun void build_all_zonelists(pg_data_t *pgdat);
862*4882a593Smuzhiyun void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
863*4882a593Smuzhiyun enum zone_type highest_zoneidx);
864*4882a593Smuzhiyun bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
865*4882a593Smuzhiyun int highest_zoneidx, unsigned int alloc_flags,
866*4882a593Smuzhiyun long free_pages);
867*4882a593Smuzhiyun bool zone_watermark_ok(struct zone *z, unsigned int order,
868*4882a593Smuzhiyun unsigned long mark, int highest_zoneidx,
869*4882a593Smuzhiyun unsigned int alloc_flags);
870*4882a593Smuzhiyun bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
871*4882a593Smuzhiyun unsigned long mark, int highest_zoneidx);
872*4882a593Smuzhiyun /*
873*4882a593Smuzhiyun * Memory initialization context, use to differentiate memory added by
874*4882a593Smuzhiyun * the platform statically or via memory hotplug interface.
875*4882a593Smuzhiyun */
876*4882a593Smuzhiyun enum meminit_context {
877*4882a593Smuzhiyun MEMINIT_EARLY,
878*4882a593Smuzhiyun MEMINIT_HOTPLUG,
879*4882a593Smuzhiyun };
880*4882a593Smuzhiyun
881*4882a593Smuzhiyun extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
882*4882a593Smuzhiyun unsigned long size);
883*4882a593Smuzhiyun
884*4882a593Smuzhiyun extern void lruvec_init(struct lruvec *lruvec);
885*4882a593Smuzhiyun
lruvec_pgdat(struct lruvec * lruvec)886*4882a593Smuzhiyun static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
887*4882a593Smuzhiyun {
888*4882a593Smuzhiyun #ifdef CONFIG_MEMCG
889*4882a593Smuzhiyun return lruvec->pgdat;
890*4882a593Smuzhiyun #else
891*4882a593Smuzhiyun return container_of(lruvec, struct pglist_data, __lruvec);
892*4882a593Smuzhiyun #endif
893*4882a593Smuzhiyun }
894*4882a593Smuzhiyun
895*4882a593Smuzhiyun extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun #ifdef CONFIG_HAVE_MEMORYLESS_NODES
898*4882a593Smuzhiyun int local_memory_node(int node_id);
899*4882a593Smuzhiyun #else
local_memory_node(int node_id)900*4882a593Smuzhiyun static inline int local_memory_node(int node_id) { return node_id; };
901*4882a593Smuzhiyun #endif
902*4882a593Smuzhiyun
903*4882a593Smuzhiyun /*
904*4882a593Smuzhiyun * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
905*4882a593Smuzhiyun */
906*4882a593Smuzhiyun #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun /*
909*4882a593Smuzhiyun * Returns true if a zone has pages managed by the buddy allocator.
910*4882a593Smuzhiyun * All the reclaim decisions have to use this function rather than
911*4882a593Smuzhiyun * populated_zone(). If the whole zone is reserved then we can easily
912*4882a593Smuzhiyun * end up with populated_zone() && !managed_zone().
913*4882a593Smuzhiyun */
managed_zone(struct zone * zone)914*4882a593Smuzhiyun static inline bool managed_zone(struct zone *zone)
915*4882a593Smuzhiyun {
916*4882a593Smuzhiyun return zone_managed_pages(zone);
917*4882a593Smuzhiyun }
918*4882a593Smuzhiyun
919*4882a593Smuzhiyun /* Returns true if a zone has memory */
populated_zone(struct zone * zone)920*4882a593Smuzhiyun static inline bool populated_zone(struct zone *zone)
921*4882a593Smuzhiyun {
922*4882a593Smuzhiyun return zone->present_pages;
923*4882a593Smuzhiyun }
924*4882a593Smuzhiyun
925*4882a593Smuzhiyun #ifdef CONFIG_NEED_MULTIPLE_NODES
zone_to_nid(struct zone * zone)926*4882a593Smuzhiyun static inline int zone_to_nid(struct zone *zone)
927*4882a593Smuzhiyun {
928*4882a593Smuzhiyun return zone->node;
929*4882a593Smuzhiyun }
930*4882a593Smuzhiyun
zone_set_nid(struct zone * zone,int nid)931*4882a593Smuzhiyun static inline void zone_set_nid(struct zone *zone, int nid)
932*4882a593Smuzhiyun {
933*4882a593Smuzhiyun zone->node = nid;
934*4882a593Smuzhiyun }
935*4882a593Smuzhiyun #else
zone_to_nid(struct zone * zone)936*4882a593Smuzhiyun static inline int zone_to_nid(struct zone *zone)
937*4882a593Smuzhiyun {
938*4882a593Smuzhiyun return 0;
939*4882a593Smuzhiyun }
940*4882a593Smuzhiyun
zone_set_nid(struct zone * zone,int nid)941*4882a593Smuzhiyun static inline void zone_set_nid(struct zone *zone, int nid) {}
942*4882a593Smuzhiyun #endif
943*4882a593Smuzhiyun
944*4882a593Smuzhiyun extern int movable_zone;
945*4882a593Smuzhiyun
946*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
zone_movable_is_highmem(void)947*4882a593Smuzhiyun static inline int zone_movable_is_highmem(void)
948*4882a593Smuzhiyun {
949*4882a593Smuzhiyun #ifdef CONFIG_NEED_MULTIPLE_NODES
950*4882a593Smuzhiyun return movable_zone == ZONE_HIGHMEM;
951*4882a593Smuzhiyun #else
952*4882a593Smuzhiyun return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
953*4882a593Smuzhiyun #endif
954*4882a593Smuzhiyun }
955*4882a593Smuzhiyun #endif
956*4882a593Smuzhiyun
is_highmem_idx(enum zone_type idx)957*4882a593Smuzhiyun static inline int is_highmem_idx(enum zone_type idx)
958*4882a593Smuzhiyun {
959*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
960*4882a593Smuzhiyun return (idx == ZONE_HIGHMEM ||
961*4882a593Smuzhiyun (idx == ZONE_MOVABLE && zone_movable_is_highmem()));
962*4882a593Smuzhiyun #else
963*4882a593Smuzhiyun return 0;
964*4882a593Smuzhiyun #endif
965*4882a593Smuzhiyun }
966*4882a593Smuzhiyun
967*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA
968*4882a593Smuzhiyun bool has_managed_dma(void);
969*4882a593Smuzhiyun #else
has_managed_dma(void)970*4882a593Smuzhiyun static inline bool has_managed_dma(void)
971*4882a593Smuzhiyun {
972*4882a593Smuzhiyun return false;
973*4882a593Smuzhiyun }
974*4882a593Smuzhiyun #endif
975*4882a593Smuzhiyun
976*4882a593Smuzhiyun /**
977*4882a593Smuzhiyun * is_highmem - helper function to quickly check if a struct zone is a
978*4882a593Smuzhiyun * highmem zone or not. This is an attempt to keep references
979*4882a593Smuzhiyun * to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
980*4882a593Smuzhiyun * @zone - pointer to struct zone variable
981*4882a593Smuzhiyun */
is_highmem(struct zone * zone)982*4882a593Smuzhiyun static inline int is_highmem(struct zone *zone)
983*4882a593Smuzhiyun {
984*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
985*4882a593Smuzhiyun return is_highmem_idx(zone_idx(zone));
986*4882a593Smuzhiyun #else
987*4882a593Smuzhiyun return 0;
988*4882a593Smuzhiyun #endif
989*4882a593Smuzhiyun }
990*4882a593Smuzhiyun
991*4882a593Smuzhiyun /* These two functions are used to setup the per zone pages min values */
992*4882a593Smuzhiyun struct ctl_table;
993*4882a593Smuzhiyun
994*4882a593Smuzhiyun int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *,
995*4882a593Smuzhiyun loff_t *);
996*4882a593Smuzhiyun int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
997*4882a593Smuzhiyun size_t *, loff_t *);
998*4882a593Smuzhiyun extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
999*4882a593Smuzhiyun int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
1000*4882a593Smuzhiyun size_t *, loff_t *);
1001*4882a593Smuzhiyun int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
1002*4882a593Smuzhiyun void *, size_t *, loff_t *);
1003*4882a593Smuzhiyun int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
1004*4882a593Smuzhiyun void *, size_t *, loff_t *);
1005*4882a593Smuzhiyun int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
1006*4882a593Smuzhiyun void *, size_t *, loff_t *);
1007*4882a593Smuzhiyun int numa_zonelist_order_handler(struct ctl_table *, int,
1008*4882a593Smuzhiyun void *, size_t *, loff_t *);
1009*4882a593Smuzhiyun extern int percpu_pagelist_fraction;
1010*4882a593Smuzhiyun extern char numa_zonelist_order[];
1011*4882a593Smuzhiyun #define NUMA_ZONELIST_ORDER_LEN 16
1012*4882a593Smuzhiyun
1013*4882a593Smuzhiyun #ifndef CONFIG_NEED_MULTIPLE_NODES
1014*4882a593Smuzhiyun
1015*4882a593Smuzhiyun extern struct pglist_data contig_page_data;
1016*4882a593Smuzhiyun #define NODE_DATA(nid) (&contig_page_data)
1017*4882a593Smuzhiyun #define NODE_MEM_MAP(nid) mem_map
1018*4882a593Smuzhiyun
1019*4882a593Smuzhiyun #else /* CONFIG_NEED_MULTIPLE_NODES */
1020*4882a593Smuzhiyun
1021*4882a593Smuzhiyun #include <asm/mmzone.h>
1022*4882a593Smuzhiyun
1023*4882a593Smuzhiyun #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1024*4882a593Smuzhiyun
1025*4882a593Smuzhiyun extern struct pglist_data *first_online_pgdat(void);
1026*4882a593Smuzhiyun extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
1027*4882a593Smuzhiyun extern struct zone *next_zone(struct zone *zone);
1028*4882a593Smuzhiyun extern int isolate_anon_lru_page(struct page *page);
1029*4882a593Smuzhiyun
1030*4882a593Smuzhiyun /**
1031*4882a593Smuzhiyun * for_each_online_pgdat - helper macro to iterate over all online nodes
1032*4882a593Smuzhiyun * @pgdat - pointer to a pg_data_t variable
1033*4882a593Smuzhiyun */
1034*4882a593Smuzhiyun #define for_each_online_pgdat(pgdat) \
1035*4882a593Smuzhiyun for (pgdat = first_online_pgdat(); \
1036*4882a593Smuzhiyun pgdat; \
1037*4882a593Smuzhiyun pgdat = next_online_pgdat(pgdat))
1038*4882a593Smuzhiyun /**
1039*4882a593Smuzhiyun * for_each_zone - helper macro to iterate over all memory zones
1040*4882a593Smuzhiyun * @zone - pointer to struct zone variable
1041*4882a593Smuzhiyun *
1042*4882a593Smuzhiyun * The user only needs to declare the zone variable, for_each_zone
1043*4882a593Smuzhiyun * fills it in.
1044*4882a593Smuzhiyun */
1045*4882a593Smuzhiyun #define for_each_zone(zone) \
1046*4882a593Smuzhiyun for (zone = (first_online_pgdat())->node_zones; \
1047*4882a593Smuzhiyun zone; \
1048*4882a593Smuzhiyun zone = next_zone(zone))
1049*4882a593Smuzhiyun
1050*4882a593Smuzhiyun #define for_each_populated_zone(zone) \
1051*4882a593Smuzhiyun for (zone = (first_online_pgdat())->node_zones; \
1052*4882a593Smuzhiyun zone; \
1053*4882a593Smuzhiyun zone = next_zone(zone)) \
1054*4882a593Smuzhiyun if (!populated_zone(zone)) \
1055*4882a593Smuzhiyun ; /* do nothing */ \
1056*4882a593Smuzhiyun else
1057*4882a593Smuzhiyun
zonelist_zone(struct zoneref * zoneref)1058*4882a593Smuzhiyun static inline struct zone *zonelist_zone(struct zoneref *zoneref)
1059*4882a593Smuzhiyun {
1060*4882a593Smuzhiyun return zoneref->zone;
1061*4882a593Smuzhiyun }
1062*4882a593Smuzhiyun
zonelist_zone_idx(struct zoneref * zoneref)1063*4882a593Smuzhiyun static inline int zonelist_zone_idx(struct zoneref *zoneref)
1064*4882a593Smuzhiyun {
1065*4882a593Smuzhiyun return zoneref->zone_idx;
1066*4882a593Smuzhiyun }
1067*4882a593Smuzhiyun
zonelist_node_idx(struct zoneref * zoneref)1068*4882a593Smuzhiyun static inline int zonelist_node_idx(struct zoneref *zoneref)
1069*4882a593Smuzhiyun {
1070*4882a593Smuzhiyun return zone_to_nid(zoneref->zone);
1071*4882a593Smuzhiyun }
1072*4882a593Smuzhiyun
1073*4882a593Smuzhiyun struct zoneref *__next_zones_zonelist(struct zoneref *z,
1074*4882a593Smuzhiyun enum zone_type highest_zoneidx,
1075*4882a593Smuzhiyun nodemask_t *nodes);
1076*4882a593Smuzhiyun
1077*4882a593Smuzhiyun /**
1078*4882a593Smuzhiyun * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
1079*4882a593Smuzhiyun * @z - The cursor used as a starting point for the search
1080*4882a593Smuzhiyun * @highest_zoneidx - The zone index of the highest zone to return
1081*4882a593Smuzhiyun * @nodes - An optional nodemask to filter the zonelist with
1082*4882a593Smuzhiyun *
1083*4882a593Smuzhiyun * This function returns the next zone at or below a given zone index that is
1084*4882a593Smuzhiyun * within the allowed nodemask using a cursor as the starting point for the
1085*4882a593Smuzhiyun * search. The zoneref returned is a cursor that represents the current zone
1086*4882a593Smuzhiyun * being examined. It should be advanced by one before calling
1087*4882a593Smuzhiyun * next_zones_zonelist again.
1088*4882a593Smuzhiyun */
next_zones_zonelist(struct zoneref * z,enum zone_type highest_zoneidx,nodemask_t * nodes)1089*4882a593Smuzhiyun static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
1090*4882a593Smuzhiyun enum zone_type highest_zoneidx,
1091*4882a593Smuzhiyun nodemask_t *nodes)
1092*4882a593Smuzhiyun {
1093*4882a593Smuzhiyun if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx))
1094*4882a593Smuzhiyun return z;
1095*4882a593Smuzhiyun return __next_zones_zonelist(z, highest_zoneidx, nodes);
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun
1098*4882a593Smuzhiyun /**
1099*4882a593Smuzhiyun * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
1100*4882a593Smuzhiyun * @zonelist - The zonelist to search for a suitable zone
1101*4882a593Smuzhiyun * @highest_zoneidx - The zone index of the highest zone to return
1102*4882a593Smuzhiyun * @nodes - An optional nodemask to filter the zonelist with
1103*4882a593Smuzhiyun * @return - Zoneref pointer for the first suitable zone found (see below)
1104*4882a593Smuzhiyun *
1105*4882a593Smuzhiyun * This function returns the first zone at or below a given zone index that is
1106*4882a593Smuzhiyun * within the allowed nodemask. The zoneref returned is a cursor that can be
1107*4882a593Smuzhiyun * used to iterate the zonelist with next_zones_zonelist by advancing it by
1108*4882a593Smuzhiyun * one before calling.
1109*4882a593Smuzhiyun *
1110*4882a593Smuzhiyun * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
1111*4882a593Smuzhiyun * never NULL). This may happen either genuinely, or due to concurrent nodemask
1112*4882a593Smuzhiyun * update due to cpuset modification.
1113*4882a593Smuzhiyun */
first_zones_zonelist(struct zonelist * zonelist,enum zone_type highest_zoneidx,nodemask_t * nodes)1114*4882a593Smuzhiyun static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
1115*4882a593Smuzhiyun enum zone_type highest_zoneidx,
1116*4882a593Smuzhiyun nodemask_t *nodes)
1117*4882a593Smuzhiyun {
1118*4882a593Smuzhiyun return next_zones_zonelist(zonelist->_zonerefs,
1119*4882a593Smuzhiyun highest_zoneidx, nodes);
1120*4882a593Smuzhiyun }
1121*4882a593Smuzhiyun
1122*4882a593Smuzhiyun /**
1123*4882a593Smuzhiyun * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
1124*4882a593Smuzhiyun * @zone - The current zone in the iterator
1125*4882a593Smuzhiyun * @z - The current pointer within zonelist->_zonerefs being iterated
1126*4882a593Smuzhiyun * @zlist - The zonelist being iterated
1127*4882a593Smuzhiyun * @highidx - The zone index of the highest zone to return
1128*4882a593Smuzhiyun * @nodemask - Nodemask allowed by the allocator
1129*4882a593Smuzhiyun *
1130*4882a593Smuzhiyun * This iterator iterates though all zones at or below a given zone index and
1131*4882a593Smuzhiyun * within a given nodemask
1132*4882a593Smuzhiyun */
1133*4882a593Smuzhiyun #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
1134*4882a593Smuzhiyun for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z); \
1135*4882a593Smuzhiyun zone; \
1136*4882a593Smuzhiyun z = next_zones_zonelist(++z, highidx, nodemask), \
1137*4882a593Smuzhiyun zone = zonelist_zone(z))
1138*4882a593Smuzhiyun
1139*4882a593Smuzhiyun #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \
1140*4882a593Smuzhiyun for (zone = z->zone; \
1141*4882a593Smuzhiyun zone; \
1142*4882a593Smuzhiyun z = next_zones_zonelist(++z, highidx, nodemask), \
1143*4882a593Smuzhiyun zone = zonelist_zone(z))
1144*4882a593Smuzhiyun
1145*4882a593Smuzhiyun
1146*4882a593Smuzhiyun /**
1147*4882a593Smuzhiyun * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
1148*4882a593Smuzhiyun * @zone - The current zone in the iterator
1149*4882a593Smuzhiyun * @z - The current pointer within zonelist->zones being iterated
1150*4882a593Smuzhiyun * @zlist - The zonelist being iterated
1151*4882a593Smuzhiyun * @highidx - The zone index of the highest zone to return
1152*4882a593Smuzhiyun *
1153*4882a593Smuzhiyun * This iterator iterates though all zones at or below a given zone index.
1154*4882a593Smuzhiyun */
1155*4882a593Smuzhiyun #define for_each_zone_zonelist(zone, z, zlist, highidx) \
1156*4882a593Smuzhiyun for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
1157*4882a593Smuzhiyun
1158*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM
1159*4882a593Smuzhiyun #include <asm/sparsemem.h>
1160*4882a593Smuzhiyun #endif
1161*4882a593Smuzhiyun
1162*4882a593Smuzhiyun #ifdef CONFIG_FLATMEM
1163*4882a593Smuzhiyun #define pfn_to_nid(pfn) (0)
1164*4882a593Smuzhiyun #endif
1165*4882a593Smuzhiyun
1166*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM
1167*4882a593Smuzhiyun
1168*4882a593Smuzhiyun /*
1169*4882a593Smuzhiyun * SECTION_SHIFT #bits space required to store a section #
1170*4882a593Smuzhiyun *
1171*4882a593Smuzhiyun * PA_SECTION_SHIFT physical address to/from section number
1172*4882a593Smuzhiyun * PFN_SECTION_SHIFT pfn to/from section number
1173*4882a593Smuzhiyun */
1174*4882a593Smuzhiyun #define PA_SECTION_SHIFT (SECTION_SIZE_BITS)
1175*4882a593Smuzhiyun #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT)
1176*4882a593Smuzhiyun
1177*4882a593Smuzhiyun #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT)
1178*4882a593Smuzhiyun
1179*4882a593Smuzhiyun #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT)
1180*4882a593Smuzhiyun #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1))
1181*4882a593Smuzhiyun
1182*4882a593Smuzhiyun #define SECTION_BLOCKFLAGS_BITS \
1183*4882a593Smuzhiyun ((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
1184*4882a593Smuzhiyun
1185*4882a593Smuzhiyun #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
1186*4882a593Smuzhiyun #error Allocator MAX_ORDER exceeds SECTION_SIZE
1187*4882a593Smuzhiyun #endif
1188*4882a593Smuzhiyun
pfn_to_section_nr(unsigned long pfn)1189*4882a593Smuzhiyun static inline unsigned long pfn_to_section_nr(unsigned long pfn)
1190*4882a593Smuzhiyun {
1191*4882a593Smuzhiyun return pfn >> PFN_SECTION_SHIFT;
1192*4882a593Smuzhiyun }
section_nr_to_pfn(unsigned long sec)1193*4882a593Smuzhiyun static inline unsigned long section_nr_to_pfn(unsigned long sec)
1194*4882a593Smuzhiyun {
1195*4882a593Smuzhiyun return sec << PFN_SECTION_SHIFT;
1196*4882a593Smuzhiyun }
1197*4882a593Smuzhiyun
1198*4882a593Smuzhiyun #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
1199*4882a593Smuzhiyun #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
1200*4882a593Smuzhiyun
1201*4882a593Smuzhiyun #define SUBSECTION_SHIFT 21
1202*4882a593Smuzhiyun #define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)
1203*4882a593Smuzhiyun
1204*4882a593Smuzhiyun #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
1205*4882a593Smuzhiyun #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
1206*4882a593Smuzhiyun #define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1))
1207*4882a593Smuzhiyun
1208*4882a593Smuzhiyun #if SUBSECTION_SHIFT > SECTION_SIZE_BITS
1209*4882a593Smuzhiyun #error Subsection size exceeds section size
1210*4882a593Smuzhiyun #else
1211*4882a593Smuzhiyun #define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT))
1212*4882a593Smuzhiyun #endif
1213*4882a593Smuzhiyun
1214*4882a593Smuzhiyun #define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION)
1215*4882a593Smuzhiyun #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
1216*4882a593Smuzhiyun
1217*4882a593Smuzhiyun struct mem_section_usage {
1218*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
1219*4882a593Smuzhiyun DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
1220*4882a593Smuzhiyun #endif
1221*4882a593Smuzhiyun /* See declaration of similar field in struct zone */
1222*4882a593Smuzhiyun unsigned long pageblock_flags[0];
1223*4882a593Smuzhiyun };
1224*4882a593Smuzhiyun
1225*4882a593Smuzhiyun void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
1226*4882a593Smuzhiyun
1227*4882a593Smuzhiyun struct page;
1228*4882a593Smuzhiyun struct page_ext;
1229*4882a593Smuzhiyun struct mem_section {
1230*4882a593Smuzhiyun /*
1231*4882a593Smuzhiyun * This is, logically, a pointer to an array of struct
1232*4882a593Smuzhiyun * pages. However, it is stored with some other magic.
1233*4882a593Smuzhiyun * (see sparse.c::sparse_init_one_section())
1234*4882a593Smuzhiyun *
1235*4882a593Smuzhiyun * Additionally during early boot we encode node id of
1236*4882a593Smuzhiyun * the location of the section here to guide allocation.
1237*4882a593Smuzhiyun * (see sparse.c::memory_present())
1238*4882a593Smuzhiyun *
1239*4882a593Smuzhiyun * Making it a UL at least makes someone do a cast
1240*4882a593Smuzhiyun * before using it wrong.
1241*4882a593Smuzhiyun */
1242*4882a593Smuzhiyun unsigned long section_mem_map;
1243*4882a593Smuzhiyun
1244*4882a593Smuzhiyun struct mem_section_usage *usage;
1245*4882a593Smuzhiyun #ifdef CONFIG_PAGE_EXTENSION
1246*4882a593Smuzhiyun /*
1247*4882a593Smuzhiyun * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
1248*4882a593Smuzhiyun * section. (see page_ext.h about this.)
1249*4882a593Smuzhiyun */
1250*4882a593Smuzhiyun struct page_ext *page_ext;
1251*4882a593Smuzhiyun unsigned long pad;
1252*4882a593Smuzhiyun #endif
1253*4882a593Smuzhiyun /*
1254*4882a593Smuzhiyun * WARNING: mem_section must be a power-of-2 in size for the
1255*4882a593Smuzhiyun * calculation and use of SECTION_ROOT_MASK to make sense.
1256*4882a593Smuzhiyun */
1257*4882a593Smuzhiyun };
1258*4882a593Smuzhiyun
1259*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_EXTREME
1260*4882a593Smuzhiyun #define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section))
1261*4882a593Smuzhiyun #else
1262*4882a593Smuzhiyun #define SECTIONS_PER_ROOT 1
1263*4882a593Smuzhiyun #endif
1264*4882a593Smuzhiyun
1265*4882a593Smuzhiyun #define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT)
1266*4882a593Smuzhiyun #define NR_SECTION_ROOTS DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT)
1267*4882a593Smuzhiyun #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
1268*4882a593Smuzhiyun
1269*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_EXTREME
1270*4882a593Smuzhiyun extern struct mem_section **mem_section;
1271*4882a593Smuzhiyun #else
1272*4882a593Smuzhiyun extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
1273*4882a593Smuzhiyun #endif
1274*4882a593Smuzhiyun
section_to_usemap(struct mem_section * ms)1275*4882a593Smuzhiyun static inline unsigned long *section_to_usemap(struct mem_section *ms)
1276*4882a593Smuzhiyun {
1277*4882a593Smuzhiyun return ms->usage->pageblock_flags;
1278*4882a593Smuzhiyun }
1279*4882a593Smuzhiyun
__nr_to_section(unsigned long nr)1280*4882a593Smuzhiyun static inline struct mem_section *__nr_to_section(unsigned long nr)
1281*4882a593Smuzhiyun {
1282*4882a593Smuzhiyun unsigned long root = SECTION_NR_TO_ROOT(nr);
1283*4882a593Smuzhiyun
1284*4882a593Smuzhiyun if (unlikely(root >= NR_SECTION_ROOTS))
1285*4882a593Smuzhiyun return NULL;
1286*4882a593Smuzhiyun
1287*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_EXTREME
1288*4882a593Smuzhiyun if (!mem_section || !mem_section[root])
1289*4882a593Smuzhiyun return NULL;
1290*4882a593Smuzhiyun #endif
1291*4882a593Smuzhiyun return &mem_section[root][nr & SECTION_ROOT_MASK];
1292*4882a593Smuzhiyun }
1293*4882a593Smuzhiyun extern unsigned long __section_nr(struct mem_section *ms);
1294*4882a593Smuzhiyun extern size_t mem_section_usage_size(void);
1295*4882a593Smuzhiyun
1296*4882a593Smuzhiyun /*
1297*4882a593Smuzhiyun * We use the lower bits of the mem_map pointer to store
1298*4882a593Smuzhiyun * a little bit of information. The pointer is calculated
1299*4882a593Smuzhiyun * as mem_map - section_nr_to_pfn(pnum). The result is
1300*4882a593Smuzhiyun * aligned to the minimum alignment of the two values:
1301*4882a593Smuzhiyun * 1. All mem_map arrays are page-aligned.
1302*4882a593Smuzhiyun * 2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT
1303*4882a593Smuzhiyun * lowest bits. PFN_SECTION_SHIFT is arch-specific
1304*4882a593Smuzhiyun * (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
1305*4882a593Smuzhiyun * worst combination is powerpc with 256k pages,
1306*4882a593Smuzhiyun * which results in PFN_SECTION_SHIFT equal 6.
1307*4882a593Smuzhiyun * To sum it up, at least 6 bits are available.
1308*4882a593Smuzhiyun */
1309*4882a593Smuzhiyun #define SECTION_MARKED_PRESENT (1UL<<0)
1310*4882a593Smuzhiyun #define SECTION_HAS_MEM_MAP (1UL<<1)
1311*4882a593Smuzhiyun #define SECTION_IS_ONLINE (1UL<<2)
1312*4882a593Smuzhiyun #define SECTION_IS_EARLY (1UL<<3)
1313*4882a593Smuzhiyun #define SECTION_MAP_LAST_BIT (1UL<<4)
1314*4882a593Smuzhiyun #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
1315*4882a593Smuzhiyun #define SECTION_NID_SHIFT 3
1316*4882a593Smuzhiyun
__section_mem_map_addr(struct mem_section * section)1317*4882a593Smuzhiyun static inline struct page *__section_mem_map_addr(struct mem_section *section)
1318*4882a593Smuzhiyun {
1319*4882a593Smuzhiyun unsigned long map = section->section_mem_map;
1320*4882a593Smuzhiyun map &= SECTION_MAP_MASK;
1321*4882a593Smuzhiyun return (struct page *)map;
1322*4882a593Smuzhiyun }
1323*4882a593Smuzhiyun
present_section(struct mem_section * section)1324*4882a593Smuzhiyun static inline int present_section(struct mem_section *section)
1325*4882a593Smuzhiyun {
1326*4882a593Smuzhiyun return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
1327*4882a593Smuzhiyun }
1328*4882a593Smuzhiyun
present_section_nr(unsigned long nr)1329*4882a593Smuzhiyun static inline int present_section_nr(unsigned long nr)
1330*4882a593Smuzhiyun {
1331*4882a593Smuzhiyun return present_section(__nr_to_section(nr));
1332*4882a593Smuzhiyun }
1333*4882a593Smuzhiyun
valid_section(struct mem_section * section)1334*4882a593Smuzhiyun static inline int valid_section(struct mem_section *section)
1335*4882a593Smuzhiyun {
1336*4882a593Smuzhiyun return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
1337*4882a593Smuzhiyun }
1338*4882a593Smuzhiyun
early_section(struct mem_section * section)1339*4882a593Smuzhiyun static inline int early_section(struct mem_section *section)
1340*4882a593Smuzhiyun {
1341*4882a593Smuzhiyun return (section && (section->section_mem_map & SECTION_IS_EARLY));
1342*4882a593Smuzhiyun }
1343*4882a593Smuzhiyun
valid_section_nr(unsigned long nr)1344*4882a593Smuzhiyun static inline int valid_section_nr(unsigned long nr)
1345*4882a593Smuzhiyun {
1346*4882a593Smuzhiyun return valid_section(__nr_to_section(nr));
1347*4882a593Smuzhiyun }
1348*4882a593Smuzhiyun
online_section(struct mem_section * section)1349*4882a593Smuzhiyun static inline int online_section(struct mem_section *section)
1350*4882a593Smuzhiyun {
1351*4882a593Smuzhiyun return (section && (section->section_mem_map & SECTION_IS_ONLINE));
1352*4882a593Smuzhiyun }
1353*4882a593Smuzhiyun
online_section_nr(unsigned long nr)1354*4882a593Smuzhiyun static inline int online_section_nr(unsigned long nr)
1355*4882a593Smuzhiyun {
1356*4882a593Smuzhiyun return online_section(__nr_to_section(nr));
1357*4882a593Smuzhiyun }
1358*4882a593Smuzhiyun
1359*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
1360*4882a593Smuzhiyun void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
1361*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTREMOVE
1362*4882a593Smuzhiyun void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
1363*4882a593Smuzhiyun #endif
1364*4882a593Smuzhiyun #endif
1365*4882a593Smuzhiyun
__pfn_to_section(unsigned long pfn)1366*4882a593Smuzhiyun static inline struct mem_section *__pfn_to_section(unsigned long pfn)
1367*4882a593Smuzhiyun {
1368*4882a593Smuzhiyun return __nr_to_section(pfn_to_section_nr(pfn));
1369*4882a593Smuzhiyun }
1370*4882a593Smuzhiyun
1371*4882a593Smuzhiyun extern unsigned long __highest_present_section_nr;
1372*4882a593Smuzhiyun
subsection_map_index(unsigned long pfn)1373*4882a593Smuzhiyun static inline int subsection_map_index(unsigned long pfn)
1374*4882a593Smuzhiyun {
1375*4882a593Smuzhiyun return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION;
1376*4882a593Smuzhiyun }
1377*4882a593Smuzhiyun
1378*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
pfn_section_valid(struct mem_section * ms,unsigned long pfn)1379*4882a593Smuzhiyun static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
1380*4882a593Smuzhiyun {
1381*4882a593Smuzhiyun int idx = subsection_map_index(pfn);
1382*4882a593Smuzhiyun
1383*4882a593Smuzhiyun return test_bit(idx, ms->usage->subsection_map);
1384*4882a593Smuzhiyun }
1385*4882a593Smuzhiyun #else
pfn_section_valid(struct mem_section * ms,unsigned long pfn)1386*4882a593Smuzhiyun static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
1387*4882a593Smuzhiyun {
1388*4882a593Smuzhiyun return 1;
1389*4882a593Smuzhiyun }
1390*4882a593Smuzhiyun #endif
1391*4882a593Smuzhiyun
1392*4882a593Smuzhiyun #ifndef CONFIG_HAVE_ARCH_PFN_VALID
pfn_valid(unsigned long pfn)1393*4882a593Smuzhiyun static inline int pfn_valid(unsigned long pfn)
1394*4882a593Smuzhiyun {
1395*4882a593Smuzhiyun struct mem_section *ms;
1396*4882a593Smuzhiyun
1397*4882a593Smuzhiyun if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
1398*4882a593Smuzhiyun return 0;
1399*4882a593Smuzhiyun ms = __nr_to_section(pfn_to_section_nr(pfn));
1400*4882a593Smuzhiyun if (!valid_section(ms))
1401*4882a593Smuzhiyun return 0;
1402*4882a593Smuzhiyun /*
1403*4882a593Smuzhiyun * Traditionally early sections always returned pfn_valid() for
1404*4882a593Smuzhiyun * the entire section-sized span.
1405*4882a593Smuzhiyun */
1406*4882a593Smuzhiyun return early_section(ms) || pfn_section_valid(ms, pfn);
1407*4882a593Smuzhiyun }
1408*4882a593Smuzhiyun #endif
1409*4882a593Smuzhiyun
pfn_in_present_section(unsigned long pfn)1410*4882a593Smuzhiyun static inline int pfn_in_present_section(unsigned long pfn)
1411*4882a593Smuzhiyun {
1412*4882a593Smuzhiyun if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
1413*4882a593Smuzhiyun return 0;
1414*4882a593Smuzhiyun return present_section(__nr_to_section(pfn_to_section_nr(pfn)));
1415*4882a593Smuzhiyun }
1416*4882a593Smuzhiyun
next_present_section_nr(unsigned long section_nr)1417*4882a593Smuzhiyun static inline unsigned long next_present_section_nr(unsigned long section_nr)
1418*4882a593Smuzhiyun {
1419*4882a593Smuzhiyun while (++section_nr <= __highest_present_section_nr) {
1420*4882a593Smuzhiyun if (present_section_nr(section_nr))
1421*4882a593Smuzhiyun return section_nr;
1422*4882a593Smuzhiyun }
1423*4882a593Smuzhiyun
1424*4882a593Smuzhiyun return -1;
1425*4882a593Smuzhiyun }
1426*4882a593Smuzhiyun
1427*4882a593Smuzhiyun /*
1428*4882a593Smuzhiyun * These are _only_ used during initialisation, therefore they
1429*4882a593Smuzhiyun * can use __initdata ... They could have names to indicate
1430*4882a593Smuzhiyun * this restriction.
1431*4882a593Smuzhiyun */
1432*4882a593Smuzhiyun #ifdef CONFIG_NUMA
1433*4882a593Smuzhiyun #define pfn_to_nid(pfn) \
1434*4882a593Smuzhiyun ({ \
1435*4882a593Smuzhiyun unsigned long __pfn_to_nid_pfn = (pfn); \
1436*4882a593Smuzhiyun page_to_nid(pfn_to_page(__pfn_to_nid_pfn)); \
1437*4882a593Smuzhiyun })
1438*4882a593Smuzhiyun #else
1439*4882a593Smuzhiyun #define pfn_to_nid(pfn) (0)
1440*4882a593Smuzhiyun #endif
1441*4882a593Smuzhiyun
1442*4882a593Smuzhiyun void sparse_init(void);
1443*4882a593Smuzhiyun #else
1444*4882a593Smuzhiyun #define sparse_init() do {} while (0)
1445*4882a593Smuzhiyun #define sparse_index_init(_sec, _nid) do {} while (0)
1446*4882a593Smuzhiyun #define pfn_in_present_section pfn_valid
1447*4882a593Smuzhiyun #define subsection_map_init(_pfn, _nr_pages) do {} while (0)
1448*4882a593Smuzhiyun #endif /* CONFIG_SPARSEMEM */
1449*4882a593Smuzhiyun
1450*4882a593Smuzhiyun /*
1451*4882a593Smuzhiyun * During memory init memblocks map pfns to nids. The search is expensive and
1452*4882a593Smuzhiyun * this caches recent lookups. The implementation of __early_pfn_to_nid
1453*4882a593Smuzhiyun * may treat start/end as pfns or sections.
1454*4882a593Smuzhiyun */
1455*4882a593Smuzhiyun struct mminit_pfnnid_cache {
1456*4882a593Smuzhiyun unsigned long last_start;
1457*4882a593Smuzhiyun unsigned long last_end;
1458*4882a593Smuzhiyun int last_nid;
1459*4882a593Smuzhiyun };
1460*4882a593Smuzhiyun
1461*4882a593Smuzhiyun /*
1462*4882a593Smuzhiyun * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
1463*4882a593Smuzhiyun * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
1464*4882a593Smuzhiyun * pfn_valid_within() should be used in this case; we optimise this away
1465*4882a593Smuzhiyun * when we have no holes within a MAX_ORDER_NR_PAGES block.
1466*4882a593Smuzhiyun */
1467*4882a593Smuzhiyun #ifdef CONFIG_HOLES_IN_ZONE
1468*4882a593Smuzhiyun #define pfn_valid_within(pfn) pfn_valid(pfn)
1469*4882a593Smuzhiyun #else
1470*4882a593Smuzhiyun #define pfn_valid_within(pfn) (1)
1471*4882a593Smuzhiyun #endif
1472*4882a593Smuzhiyun
1473*4882a593Smuzhiyun #endif /* !__GENERATING_BOUNDS.H */
1474*4882a593Smuzhiyun #endif /* !__ASSEMBLY__ */
1475*4882a593Smuzhiyun #endif /* _LINUX_MMZONE_H */
1476