xref: /OK3568_Linux_fs/kernel/include/linux/mmzone.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 */
2*4882a593Smuzhiyun #ifndef _LINUX_MMZONE_H
3*4882a593Smuzhiyun #define _LINUX_MMZONE_H
4*4882a593Smuzhiyun 
5*4882a593Smuzhiyun #ifndef __ASSEMBLY__
6*4882a593Smuzhiyun #ifndef __GENERATING_BOUNDS_H
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/spinlock.h>
9*4882a593Smuzhiyun #include <linux/list.h>
10*4882a593Smuzhiyun #include <linux/wait.h>
11*4882a593Smuzhiyun #include <linux/bitops.h>
12*4882a593Smuzhiyun #include <linux/cache.h>
13*4882a593Smuzhiyun #include <linux/threads.h>
14*4882a593Smuzhiyun #include <linux/numa.h>
15*4882a593Smuzhiyun #include <linux/init.h>
16*4882a593Smuzhiyun #include <linux/seqlock.h>
17*4882a593Smuzhiyun #include <linux/nodemask.h>
18*4882a593Smuzhiyun #include <linux/pageblock-flags.h>
19*4882a593Smuzhiyun #include <linux/page-flags-layout.h>
20*4882a593Smuzhiyun #include <linux/atomic.h>
21*4882a593Smuzhiyun #include <linux/mm_types.h>
22*4882a593Smuzhiyun #include <linux/page-flags.h>
23*4882a593Smuzhiyun #include <linux/android_kabi.h>
24*4882a593Smuzhiyun #include <asm/page.h>
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun /* Free memory management - zoned buddy allocator.  */
27*4882a593Smuzhiyun #ifndef CONFIG_FORCE_MAX_ZONEORDER
28*4882a593Smuzhiyun #define MAX_ORDER 11
29*4882a593Smuzhiyun #else
30*4882a593Smuzhiyun #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
31*4882a593Smuzhiyun #endif
32*4882a593Smuzhiyun #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun /*
35*4882a593Smuzhiyun  * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
36*4882a593Smuzhiyun  * costly to service.  That is between allocation orders which should
37*4882a593Smuzhiyun  * coalesce naturally under reasonable reclaim pressure and those which
38*4882a593Smuzhiyun  * will not.
39*4882a593Smuzhiyun  */
40*4882a593Smuzhiyun #define PAGE_ALLOC_COSTLY_ORDER 3
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun #define MAX_KSWAPD_THREADS 16
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun enum migratetype {
45*4882a593Smuzhiyun 	MIGRATE_UNMOVABLE,
46*4882a593Smuzhiyun 	MIGRATE_MOVABLE,
47*4882a593Smuzhiyun 	MIGRATE_RECLAIMABLE,
48*4882a593Smuzhiyun #ifdef CONFIG_CMA
49*4882a593Smuzhiyun 	/*
50*4882a593Smuzhiyun 	 * MIGRATE_CMA migration type is designed to mimic the way
51*4882a593Smuzhiyun 	 * ZONE_MOVABLE works.  Only movable pages can be allocated
52*4882a593Smuzhiyun 	 * from MIGRATE_CMA pageblocks and page allocator never
53*4882a593Smuzhiyun 	 * implicitly change migration type of MIGRATE_CMA pageblock.
54*4882a593Smuzhiyun 	 *
55*4882a593Smuzhiyun 	 * The way to use it is to change migratetype of a range of
56*4882a593Smuzhiyun 	 * pageblocks to MIGRATE_CMA which can be done by
57*4882a593Smuzhiyun 	 * __free_pageblock_cma() function.  What is important though
58*4882a593Smuzhiyun 	 * is that a range of pageblocks must be aligned to
59*4882a593Smuzhiyun 	 * MAX_ORDER_NR_PAGES should biggest page be bigger then
60*4882a593Smuzhiyun 	 * a single pageblock.
61*4882a593Smuzhiyun 	 */
62*4882a593Smuzhiyun 	MIGRATE_CMA,
63*4882a593Smuzhiyun #endif
64*4882a593Smuzhiyun 	MIGRATE_PCPTYPES, /* the number of types on the pcp lists */
65*4882a593Smuzhiyun 	MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES,
66*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_ISOLATION
67*4882a593Smuzhiyun 	MIGRATE_ISOLATE,	/* can't allocate from here */
68*4882a593Smuzhiyun #endif
69*4882a593Smuzhiyun 	MIGRATE_TYPES
70*4882a593Smuzhiyun };
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun /* In mm/page_alloc.c; keep in sync also with show_migration_types() there */
73*4882a593Smuzhiyun extern const char * const migratetype_names[MIGRATE_TYPES];
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun #ifdef CONFIG_CMA
76*4882a593Smuzhiyun #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
77*4882a593Smuzhiyun #  define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
78*4882a593Smuzhiyun #  define get_cma_migrate_type() MIGRATE_CMA
79*4882a593Smuzhiyun #else
80*4882a593Smuzhiyun #  define is_migrate_cma(migratetype) false
81*4882a593Smuzhiyun #  define is_migrate_cma_page(_page) false
82*4882a593Smuzhiyun #  define get_cma_migrate_type() MIGRATE_MOVABLE
83*4882a593Smuzhiyun #endif
84*4882a593Smuzhiyun 
is_migrate_movable(int mt)85*4882a593Smuzhiyun static inline bool is_migrate_movable(int mt)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun 	return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE;
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun #define for_each_migratetype_order(order, type) \
91*4882a593Smuzhiyun 	for (order = 0; order < MAX_ORDER; order++) \
92*4882a593Smuzhiyun 		for (type = 0; type < MIGRATE_TYPES; type++)
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun extern int page_group_by_mobility_disabled;
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun #define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun #define get_pageblock_migratetype(page)					\
99*4882a593Smuzhiyun 	get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun struct free_area {
102*4882a593Smuzhiyun 	struct list_head	free_list[MIGRATE_TYPES];
103*4882a593Smuzhiyun 	unsigned long		nr_free;
104*4882a593Smuzhiyun };
105*4882a593Smuzhiyun 
get_page_from_free_area(struct free_area * area,int migratetype)106*4882a593Smuzhiyun static inline struct page *get_page_from_free_area(struct free_area *area,
107*4882a593Smuzhiyun 					    int migratetype)
108*4882a593Smuzhiyun {
109*4882a593Smuzhiyun 	return list_first_entry_or_null(&area->free_list[migratetype],
110*4882a593Smuzhiyun 					struct page, lru);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
free_area_empty(struct free_area * area,int migratetype)113*4882a593Smuzhiyun static inline bool free_area_empty(struct free_area *area, int migratetype)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun 	return list_empty(&area->free_list[migratetype]);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun struct pglist_data;
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun /*
121*4882a593Smuzhiyun  * zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
122*4882a593Smuzhiyun  * So add a wild amount of padding here to ensure that they fall into separate
123*4882a593Smuzhiyun  * cachelines.  There are very few zone structures in the machine, so space
124*4882a593Smuzhiyun  * consumption is not a concern here.
125*4882a593Smuzhiyun  */
126*4882a593Smuzhiyun #if defined(CONFIG_SMP)
127*4882a593Smuzhiyun struct zone_padding {
128*4882a593Smuzhiyun 	char x[0];
129*4882a593Smuzhiyun } ____cacheline_internodealigned_in_smp;
130*4882a593Smuzhiyun #define ZONE_PADDING(name)	struct zone_padding name;
131*4882a593Smuzhiyun #else
132*4882a593Smuzhiyun #define ZONE_PADDING(name)
133*4882a593Smuzhiyun #endif
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun #ifdef CONFIG_NUMA
136*4882a593Smuzhiyun enum numa_stat_item {
137*4882a593Smuzhiyun 	NUMA_HIT,		/* allocated in intended node */
138*4882a593Smuzhiyun 	NUMA_MISS,		/* allocated in non intended node */
139*4882a593Smuzhiyun 	NUMA_FOREIGN,		/* was intended here, hit elsewhere */
140*4882a593Smuzhiyun 	NUMA_INTERLEAVE_HIT,	/* interleaver preferred this zone */
141*4882a593Smuzhiyun 	NUMA_LOCAL,		/* allocation from local node */
142*4882a593Smuzhiyun 	NUMA_OTHER,		/* allocation from other node */
143*4882a593Smuzhiyun 	NR_VM_NUMA_STAT_ITEMS
144*4882a593Smuzhiyun };
145*4882a593Smuzhiyun #else
146*4882a593Smuzhiyun #define NR_VM_NUMA_STAT_ITEMS 0
147*4882a593Smuzhiyun #endif
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun enum zone_stat_item {
150*4882a593Smuzhiyun 	/* First 128 byte cacheline (assuming 64 bit words) */
151*4882a593Smuzhiyun 	NR_FREE_PAGES,
152*4882a593Smuzhiyun 	NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
153*4882a593Smuzhiyun 	NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
154*4882a593Smuzhiyun 	NR_ZONE_ACTIVE_ANON,
155*4882a593Smuzhiyun 	NR_ZONE_INACTIVE_FILE,
156*4882a593Smuzhiyun 	NR_ZONE_ACTIVE_FILE,
157*4882a593Smuzhiyun 	NR_ZONE_UNEVICTABLE,
158*4882a593Smuzhiyun 	NR_ZONE_WRITE_PENDING,	/* Count of dirty, writeback and unstable pages */
159*4882a593Smuzhiyun 	NR_MLOCK,		/* mlock()ed pages found and moved off LRU */
160*4882a593Smuzhiyun 	NR_PAGETABLE,		/* used for pagetables */
161*4882a593Smuzhiyun 	/* Second 128 byte cacheline */
162*4882a593Smuzhiyun 	NR_BOUNCE,
163*4882a593Smuzhiyun 	NR_ZSPAGES,		/* allocated in zsmalloc */
164*4882a593Smuzhiyun 	NR_FREE_CMA_PAGES,
165*4882a593Smuzhiyun 	NR_VM_ZONE_STAT_ITEMS };
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun enum node_stat_item {
168*4882a593Smuzhiyun 	NR_LRU_BASE,
169*4882a593Smuzhiyun 	NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
170*4882a593Smuzhiyun 	NR_ACTIVE_ANON,		/*  "     "     "   "       "         */
171*4882a593Smuzhiyun 	NR_INACTIVE_FILE,	/*  "     "     "   "       "         */
172*4882a593Smuzhiyun 	NR_ACTIVE_FILE,		/*  "     "     "   "       "         */
173*4882a593Smuzhiyun 	NR_UNEVICTABLE,		/*  "     "     "   "       "         */
174*4882a593Smuzhiyun 	NR_SLAB_RECLAIMABLE_B,
175*4882a593Smuzhiyun 	NR_SLAB_UNRECLAIMABLE_B,
176*4882a593Smuzhiyun 	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
177*4882a593Smuzhiyun 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
178*4882a593Smuzhiyun 	WORKINGSET_NODES,
179*4882a593Smuzhiyun 	WORKINGSET_REFAULT_BASE,
180*4882a593Smuzhiyun 	WORKINGSET_REFAULT_ANON = WORKINGSET_REFAULT_BASE,
181*4882a593Smuzhiyun 	WORKINGSET_REFAULT_FILE,
182*4882a593Smuzhiyun 	WORKINGSET_ACTIVATE_BASE,
183*4882a593Smuzhiyun 	WORKINGSET_ACTIVATE_ANON = WORKINGSET_ACTIVATE_BASE,
184*4882a593Smuzhiyun 	WORKINGSET_ACTIVATE_FILE,
185*4882a593Smuzhiyun 	WORKINGSET_RESTORE_BASE,
186*4882a593Smuzhiyun 	WORKINGSET_RESTORE_ANON = WORKINGSET_RESTORE_BASE,
187*4882a593Smuzhiyun 	WORKINGSET_RESTORE_FILE,
188*4882a593Smuzhiyun 	WORKINGSET_NODERECLAIM,
189*4882a593Smuzhiyun 	NR_ANON_MAPPED,	/* Mapped anonymous pages */
190*4882a593Smuzhiyun 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
191*4882a593Smuzhiyun 			   only modified from process context */
192*4882a593Smuzhiyun 	NR_FILE_PAGES,
193*4882a593Smuzhiyun 	NR_FILE_DIRTY,
194*4882a593Smuzhiyun 	NR_WRITEBACK,
195*4882a593Smuzhiyun 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
196*4882a593Smuzhiyun 	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
197*4882a593Smuzhiyun 	NR_SHMEM_THPS,
198*4882a593Smuzhiyun 	NR_SHMEM_PMDMAPPED,
199*4882a593Smuzhiyun 	NR_FILE_THPS,
200*4882a593Smuzhiyun 	NR_FILE_PMDMAPPED,
201*4882a593Smuzhiyun 	NR_ANON_THPS,
202*4882a593Smuzhiyun 	NR_VMSCAN_WRITE,
203*4882a593Smuzhiyun 	NR_VMSCAN_IMMEDIATE,	/* Prioritise for reclaim when writeback ends */
204*4882a593Smuzhiyun 	NR_DIRTIED,		/* page dirtyings since bootup */
205*4882a593Smuzhiyun 	NR_WRITTEN,		/* page writings since bootup */
206*4882a593Smuzhiyun 	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
207*4882a593Smuzhiyun 	NR_FOLL_PIN_ACQUIRED,	/* via: pin_user_page(), gup flag: FOLL_PIN */
208*4882a593Smuzhiyun 	NR_FOLL_PIN_RELEASED,	/* pages returned via unpin_user_page() */
209*4882a593Smuzhiyun 	NR_KERNEL_STACK_KB,	/* measured in KiB */
210*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
211*4882a593Smuzhiyun 	NR_KERNEL_SCS_KB,	/* measured in KiB */
212*4882a593Smuzhiyun #endif
213*4882a593Smuzhiyun 	NR_VM_NODE_STAT_ITEMS
214*4882a593Smuzhiyun };
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun /*
217*4882a593Smuzhiyun  * Returns true if the value is measured in bytes (most vmstat values are
218*4882a593Smuzhiyun  * measured in pages). This defines the API part, the internal representation
219*4882a593Smuzhiyun  * might be different.
220*4882a593Smuzhiyun  */
vmstat_item_in_bytes(int idx)221*4882a593Smuzhiyun static __always_inline bool vmstat_item_in_bytes(int idx)
222*4882a593Smuzhiyun {
223*4882a593Smuzhiyun 	/*
224*4882a593Smuzhiyun 	 * Global and per-node slab counters track slab pages.
225*4882a593Smuzhiyun 	 * It's expected that changes are multiples of PAGE_SIZE.
226*4882a593Smuzhiyun 	 * Internally values are stored in pages.
227*4882a593Smuzhiyun 	 *
228*4882a593Smuzhiyun 	 * Per-memcg and per-lruvec counters track memory, consumed
229*4882a593Smuzhiyun 	 * by individual slab objects. These counters are actually
230*4882a593Smuzhiyun 	 * byte-precise.
231*4882a593Smuzhiyun 	 */
232*4882a593Smuzhiyun 	return (idx == NR_SLAB_RECLAIMABLE_B ||
233*4882a593Smuzhiyun 		idx == NR_SLAB_UNRECLAIMABLE_B);
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun /*
237*4882a593Smuzhiyun  * We do arithmetic on the LRU lists in various places in the code,
238*4882a593Smuzhiyun  * so it is important to keep the active lists LRU_ACTIVE higher in
239*4882a593Smuzhiyun  * the array than the corresponding inactive lists, and to keep
240*4882a593Smuzhiyun  * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
241*4882a593Smuzhiyun  *
242*4882a593Smuzhiyun  * This has to be kept in sync with the statistics in zone_stat_item
243*4882a593Smuzhiyun  * above and the descriptions in vmstat_text in mm/vmstat.c
244*4882a593Smuzhiyun  */
245*4882a593Smuzhiyun #define LRU_BASE 0
246*4882a593Smuzhiyun #define LRU_ACTIVE 1
247*4882a593Smuzhiyun #define LRU_FILE 2
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun enum lru_list {
250*4882a593Smuzhiyun 	LRU_INACTIVE_ANON = LRU_BASE,
251*4882a593Smuzhiyun 	LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
252*4882a593Smuzhiyun 	LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
253*4882a593Smuzhiyun 	LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
254*4882a593Smuzhiyun 	LRU_UNEVICTABLE,
255*4882a593Smuzhiyun 	NR_LRU_LISTS
256*4882a593Smuzhiyun };
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++)
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)
261*4882a593Smuzhiyun 
is_file_lru(enum lru_list lru)262*4882a593Smuzhiyun static inline bool is_file_lru(enum lru_list lru)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun 	return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE);
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun 
is_active_lru(enum lru_list lru)267*4882a593Smuzhiyun static inline bool is_active_lru(enum lru_list lru)
268*4882a593Smuzhiyun {
269*4882a593Smuzhiyun 	return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun 
272*4882a593Smuzhiyun #define ANON_AND_FILE 2
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun enum lruvec_flags {
275*4882a593Smuzhiyun 	LRUVEC_CONGESTED,		/* lruvec has many dirty pages
276*4882a593Smuzhiyun 					 * backed by a congested BDI
277*4882a593Smuzhiyun 					 */
278*4882a593Smuzhiyun };
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun struct lruvec {
281*4882a593Smuzhiyun 	struct list_head		lists[NR_LRU_LISTS];
282*4882a593Smuzhiyun 	/*
283*4882a593Smuzhiyun 	 * These track the cost of reclaiming one LRU - file or anon -
284*4882a593Smuzhiyun 	 * over the other. As the observed cost of reclaiming one LRU
285*4882a593Smuzhiyun 	 * increases, the reclaim scan balance tips toward the other.
286*4882a593Smuzhiyun 	 */
287*4882a593Smuzhiyun 	unsigned long			anon_cost;
288*4882a593Smuzhiyun 	unsigned long			file_cost;
289*4882a593Smuzhiyun 	/* Non-resident age, driven by LRU movement */
290*4882a593Smuzhiyun 	atomic_long_t			nonresident_age;
291*4882a593Smuzhiyun 	/* Refaults at the time of last reclaim cycle */
292*4882a593Smuzhiyun 	unsigned long			refaults[ANON_AND_FILE];
293*4882a593Smuzhiyun 	/* Various lruvec state flags (enum lruvec_flags) */
294*4882a593Smuzhiyun 	unsigned long			flags;
295*4882a593Smuzhiyun #ifdef CONFIG_MEMCG
296*4882a593Smuzhiyun 	struct pglist_data *pgdat;
297*4882a593Smuzhiyun #endif
298*4882a593Smuzhiyun };
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun /* Isolate unmapped pages */
301*4882a593Smuzhiyun #define ISOLATE_UNMAPPED	((__force isolate_mode_t)0x2)
302*4882a593Smuzhiyun /* Isolate for asynchronous migration */
303*4882a593Smuzhiyun #define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x4)
304*4882a593Smuzhiyun /* Isolate unevictable pages */
305*4882a593Smuzhiyun #define ISOLATE_UNEVICTABLE	((__force isolate_mode_t)0x8)
306*4882a593Smuzhiyun 
307*4882a593Smuzhiyun /* LRU Isolation modes. */
308*4882a593Smuzhiyun typedef unsigned __bitwise isolate_mode_t;
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun enum zone_watermarks {
311*4882a593Smuzhiyun 	WMARK_MIN,
312*4882a593Smuzhiyun 	WMARK_LOW,
313*4882a593Smuzhiyun 	WMARK_HIGH,
314*4882a593Smuzhiyun 	NR_WMARK
315*4882a593Smuzhiyun };
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
318*4882a593Smuzhiyun #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
319*4882a593Smuzhiyun #define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
320*4882a593Smuzhiyun #define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun struct per_cpu_pages {
323*4882a593Smuzhiyun 	int count;		/* number of pages in the list */
324*4882a593Smuzhiyun 	int high;		/* high watermark, emptying needed */
325*4882a593Smuzhiyun 	int batch;		/* chunk size for buddy add/remove */
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	/* Lists of pages, one per migrate type stored on the pcp-lists */
328*4882a593Smuzhiyun 	struct list_head lists[MIGRATE_PCPTYPES];
329*4882a593Smuzhiyun };
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun struct per_cpu_pageset {
332*4882a593Smuzhiyun 	struct per_cpu_pages pcp;
333*4882a593Smuzhiyun #ifdef CONFIG_NUMA
334*4882a593Smuzhiyun 	s8 expire;
335*4882a593Smuzhiyun 	u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
336*4882a593Smuzhiyun #endif
337*4882a593Smuzhiyun #ifdef CONFIG_SMP
338*4882a593Smuzhiyun 	s8 stat_threshold;
339*4882a593Smuzhiyun 	s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
340*4882a593Smuzhiyun #endif
341*4882a593Smuzhiyun };
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun struct per_cpu_nodestat {
344*4882a593Smuzhiyun 	s8 stat_threshold;
345*4882a593Smuzhiyun 	s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
346*4882a593Smuzhiyun };
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun #endif /* !__GENERATING_BOUNDS.H */
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun enum zone_type {
351*4882a593Smuzhiyun 	/*
352*4882a593Smuzhiyun 	 * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able
353*4882a593Smuzhiyun 	 * to DMA to all of the addressable memory (ZONE_NORMAL).
354*4882a593Smuzhiyun 	 * On architectures where this area covers the whole 32 bit address
355*4882a593Smuzhiyun 	 * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller
356*4882a593Smuzhiyun 	 * DMA addressing constraints. This distinction is important as a 32bit
357*4882a593Smuzhiyun 	 * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
358*4882a593Smuzhiyun 	 * platforms may need both zones as they support peripherals with
359*4882a593Smuzhiyun 	 * different DMA addressing limitations.
360*4882a593Smuzhiyun 	 */
361*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA
362*4882a593Smuzhiyun 	ZONE_DMA,
363*4882a593Smuzhiyun #endif
364*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA32
365*4882a593Smuzhiyun 	ZONE_DMA32,
366*4882a593Smuzhiyun #endif
367*4882a593Smuzhiyun 	/*
368*4882a593Smuzhiyun 	 * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
369*4882a593Smuzhiyun 	 * performed on pages in ZONE_NORMAL if the DMA devices support
370*4882a593Smuzhiyun 	 * transfers to all addressable memory.
371*4882a593Smuzhiyun 	 */
372*4882a593Smuzhiyun 	ZONE_NORMAL,
373*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
374*4882a593Smuzhiyun 	/*
375*4882a593Smuzhiyun 	 * A memory area that is only addressable by the kernel through
376*4882a593Smuzhiyun 	 * mapping portions into its own address space. This is for example
377*4882a593Smuzhiyun 	 * used by i386 to allow the kernel to address the memory beyond
378*4882a593Smuzhiyun 	 * 900MB. The kernel will set up special mappings (page
379*4882a593Smuzhiyun 	 * table entries on i386) for each page that the kernel needs to
380*4882a593Smuzhiyun 	 * access.
381*4882a593Smuzhiyun 	 */
382*4882a593Smuzhiyun 	ZONE_HIGHMEM,
383*4882a593Smuzhiyun #endif
384*4882a593Smuzhiyun 	/*
385*4882a593Smuzhiyun 	 * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains
386*4882a593Smuzhiyun 	 * movable pages with few exceptional cases described below. Main use
387*4882a593Smuzhiyun 	 * cases for ZONE_MOVABLE are to make memory offlining/unplug more
388*4882a593Smuzhiyun 	 * likely to succeed, and to locally limit unmovable allocations - e.g.,
389*4882a593Smuzhiyun 	 * to increase the number of THP/huge pages. Notable special cases are:
390*4882a593Smuzhiyun 	 *
391*4882a593Smuzhiyun 	 * 1. Pinned pages: (long-term) pinning of movable pages might
392*4882a593Smuzhiyun 	 *    essentially turn such pages unmovable. Memory offlining might
393*4882a593Smuzhiyun 	 *    retry a long time.
394*4882a593Smuzhiyun 	 * 2. memblock allocations: kernelcore/movablecore setups might create
395*4882a593Smuzhiyun 	 *    situations where ZONE_MOVABLE contains unmovable allocations
396*4882a593Smuzhiyun 	 *    after boot. Memory offlining and allocations fail early.
397*4882a593Smuzhiyun 	 * 3. Memory holes: kernelcore/movablecore setups might create very rare
398*4882a593Smuzhiyun 	 *    situations where ZONE_MOVABLE contains memory holes after boot,
399*4882a593Smuzhiyun 	 *    for example, if we have sections that are only partially
400*4882a593Smuzhiyun 	 *    populated. Memory offlining and allocations fail early.
401*4882a593Smuzhiyun 	 * 4. PG_hwpoison pages: while poisoned pages can be skipped during
402*4882a593Smuzhiyun 	 *    memory offlining, such pages cannot be allocated.
403*4882a593Smuzhiyun 	 * 5. Unmovable PG_offline pages: in paravirtualized environments,
404*4882a593Smuzhiyun 	 *    hotplugged memory blocks might only partially be managed by the
405*4882a593Smuzhiyun 	 *    buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The
406*4882a593Smuzhiyun 	 *    parts not manged by the buddy are unmovable PG_offline pages. In
407*4882a593Smuzhiyun 	 *    some cases (virtio-mem), such pages can be skipped during
408*4882a593Smuzhiyun 	 *    memory offlining, however, cannot be moved/allocated. These
409*4882a593Smuzhiyun 	 *    techniques might use alloc_contig_range() to hide previously
410*4882a593Smuzhiyun 	 *    exposed pages from the buddy again (e.g., to implement some sort
411*4882a593Smuzhiyun 	 *    of memory unplug in virtio-mem).
412*4882a593Smuzhiyun 	 *
413*4882a593Smuzhiyun 	 * In general, no unmovable allocations that degrade memory offlining
414*4882a593Smuzhiyun 	 * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range())
415*4882a593Smuzhiyun 	 * have to expect that migrating pages in ZONE_MOVABLE can fail (even
416*4882a593Smuzhiyun 	 * if has_unmovable_pages() states that there are no unmovable pages,
417*4882a593Smuzhiyun 	 * there can be false negatives).
418*4882a593Smuzhiyun 	 */
419*4882a593Smuzhiyun 	ZONE_MOVABLE,
420*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DEVICE
421*4882a593Smuzhiyun 	ZONE_DEVICE,
422*4882a593Smuzhiyun #endif
423*4882a593Smuzhiyun 	__MAX_NR_ZONES
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun };
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun #ifndef __GENERATING_BOUNDS_H
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun #define ASYNC_AND_SYNC 2
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun struct zone {
432*4882a593Smuzhiyun 	/* Read-mostly fields */
433*4882a593Smuzhiyun 
434*4882a593Smuzhiyun 	/* zone watermarks, access with *_wmark_pages(zone) macros */
435*4882a593Smuzhiyun 	unsigned long _watermark[NR_WMARK];
436*4882a593Smuzhiyun 	unsigned long watermark_boost;
437*4882a593Smuzhiyun 
438*4882a593Smuzhiyun 	unsigned long nr_reserved_highatomic;
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	/*
441*4882a593Smuzhiyun 	 * We don't know if the memory that we're going to allocate will be
442*4882a593Smuzhiyun 	 * freeable or/and it will be released eventually, so to avoid totally
443*4882a593Smuzhiyun 	 * wasting several GB of ram we must reserve some of the lower zone
444*4882a593Smuzhiyun 	 * memory (otherwise we risk to run OOM on the lower zones despite
445*4882a593Smuzhiyun 	 * there being tons of freeable ram on the higher zones).  This array is
446*4882a593Smuzhiyun 	 * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
447*4882a593Smuzhiyun 	 * changes.
448*4882a593Smuzhiyun 	 */
449*4882a593Smuzhiyun 	long lowmem_reserve[MAX_NR_ZONES];
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun #ifdef CONFIG_NEED_MULTIPLE_NODES
452*4882a593Smuzhiyun 	int node;
453*4882a593Smuzhiyun #endif
454*4882a593Smuzhiyun 	struct pglist_data	*zone_pgdat;
455*4882a593Smuzhiyun 	struct per_cpu_pageset __percpu *pageset;
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun #ifndef CONFIG_SPARSEMEM
458*4882a593Smuzhiyun 	/*
459*4882a593Smuzhiyun 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
460*4882a593Smuzhiyun 	 * In SPARSEMEM, this map is stored in struct mem_section
461*4882a593Smuzhiyun 	 */
462*4882a593Smuzhiyun 	unsigned long		*pageblock_flags;
463*4882a593Smuzhiyun #endif /* CONFIG_SPARSEMEM */
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun 	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
466*4882a593Smuzhiyun 	unsigned long		zone_start_pfn;
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	/*
469*4882a593Smuzhiyun 	 * spanned_pages is the total pages spanned by the zone, including
470*4882a593Smuzhiyun 	 * holes, which is calculated as:
471*4882a593Smuzhiyun 	 * 	spanned_pages = zone_end_pfn - zone_start_pfn;
472*4882a593Smuzhiyun 	 *
473*4882a593Smuzhiyun 	 * present_pages is physical pages existing within the zone, which
474*4882a593Smuzhiyun 	 * is calculated as:
475*4882a593Smuzhiyun 	 *	present_pages = spanned_pages - absent_pages(pages in holes);
476*4882a593Smuzhiyun 	 *
477*4882a593Smuzhiyun 	 * managed_pages is present pages managed by the buddy system, which
478*4882a593Smuzhiyun 	 * is calculated as (reserved_pages includes pages allocated by the
479*4882a593Smuzhiyun 	 * bootmem allocator):
480*4882a593Smuzhiyun 	 *	managed_pages = present_pages - reserved_pages;
481*4882a593Smuzhiyun 	 *
482*4882a593Smuzhiyun 	 * cma pages is present pages that are assigned for CMA use
483*4882a593Smuzhiyun 	 * (MIGRATE_CMA).
484*4882a593Smuzhiyun 	 *
485*4882a593Smuzhiyun 	 * So present_pages may be used by memory hotplug or memory power
486*4882a593Smuzhiyun 	 * management logic to figure out unmanaged pages by checking
487*4882a593Smuzhiyun 	 * (present_pages - managed_pages). And managed_pages should be used
488*4882a593Smuzhiyun 	 * by page allocator and vm scanner to calculate all kinds of watermarks
489*4882a593Smuzhiyun 	 * and thresholds.
490*4882a593Smuzhiyun 	 *
491*4882a593Smuzhiyun 	 * Locking rules:
492*4882a593Smuzhiyun 	 *
493*4882a593Smuzhiyun 	 * zone_start_pfn and spanned_pages are protected by span_seqlock.
494*4882a593Smuzhiyun 	 * It is a seqlock because it has to be read outside of zone->lock,
495*4882a593Smuzhiyun 	 * and it is done in the main allocator path.  But, it is written
496*4882a593Smuzhiyun 	 * quite infrequently.
497*4882a593Smuzhiyun 	 *
498*4882a593Smuzhiyun 	 * The span_seq lock is declared along with zone->lock because it is
499*4882a593Smuzhiyun 	 * frequently read in proximity to zone->lock.  It's good to
500*4882a593Smuzhiyun 	 * give them a chance of being in the same cacheline.
501*4882a593Smuzhiyun 	 *
502*4882a593Smuzhiyun 	 * Write access to present_pages at runtime should be protected by
503*4882a593Smuzhiyun 	 * mem_hotplug_begin/end(). Any reader who can't tolerant drift of
504*4882a593Smuzhiyun 	 * present_pages should get_online_mems() to get a stable value.
505*4882a593Smuzhiyun 	 */
506*4882a593Smuzhiyun 	atomic_long_t		managed_pages;
507*4882a593Smuzhiyun 	unsigned long		spanned_pages;
508*4882a593Smuzhiyun 	unsigned long		present_pages;
509*4882a593Smuzhiyun #ifdef CONFIG_CMA
510*4882a593Smuzhiyun 	unsigned long		cma_pages;
511*4882a593Smuzhiyun #endif
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun 	const char		*name;
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_ISOLATION
516*4882a593Smuzhiyun 	/*
517*4882a593Smuzhiyun 	 * Number of isolated pageblock. It is used to solve incorrect
518*4882a593Smuzhiyun 	 * freepage counting problem due to racy retrieving migratetype
519*4882a593Smuzhiyun 	 * of pageblock. Protected by zone->lock.
520*4882a593Smuzhiyun 	 */
521*4882a593Smuzhiyun 	unsigned long		nr_isolate_pageblock;
522*4882a593Smuzhiyun #endif
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
525*4882a593Smuzhiyun 	/* see spanned/present_pages for more description */
526*4882a593Smuzhiyun 	seqlock_t		span_seqlock;
527*4882a593Smuzhiyun #endif
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 	int initialized;
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 	/* Write-intensive fields used from the page allocator */
532*4882a593Smuzhiyun 	ZONE_PADDING(_pad1_)
533*4882a593Smuzhiyun 
534*4882a593Smuzhiyun 	/* free areas of different sizes */
535*4882a593Smuzhiyun 	struct free_area	free_area[MAX_ORDER];
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	/* zone flags, see below */
538*4882a593Smuzhiyun 	unsigned long		flags;
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	/* Primarily protects free_area */
541*4882a593Smuzhiyun 	spinlock_t		lock;
542*4882a593Smuzhiyun 
543*4882a593Smuzhiyun 	/* Write-intensive fields used by compaction and vmstats. */
544*4882a593Smuzhiyun 	ZONE_PADDING(_pad2_)
545*4882a593Smuzhiyun 
546*4882a593Smuzhiyun 	/*
547*4882a593Smuzhiyun 	 * When free pages are below this point, additional steps are taken
548*4882a593Smuzhiyun 	 * when reading the number of free pages to avoid per-cpu counter
549*4882a593Smuzhiyun 	 * drift allowing watermarks to be breached
550*4882a593Smuzhiyun 	 */
551*4882a593Smuzhiyun 	unsigned long percpu_drift_mark;
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun #if defined CONFIG_COMPACTION || defined CONFIG_CMA
554*4882a593Smuzhiyun 	/* pfn where compaction free scanner should start */
555*4882a593Smuzhiyun 	unsigned long		compact_cached_free_pfn;
556*4882a593Smuzhiyun 	/* pfn where compaction migration scanner should start */
557*4882a593Smuzhiyun 	unsigned long		compact_cached_migrate_pfn[ASYNC_AND_SYNC];
558*4882a593Smuzhiyun 	unsigned long		compact_init_migrate_pfn;
559*4882a593Smuzhiyun 	unsigned long		compact_init_free_pfn;
560*4882a593Smuzhiyun #endif
561*4882a593Smuzhiyun 
562*4882a593Smuzhiyun #ifdef CONFIG_COMPACTION
563*4882a593Smuzhiyun 	/*
564*4882a593Smuzhiyun 	 * On compaction failure, 1<<compact_defer_shift compactions
565*4882a593Smuzhiyun 	 * are skipped before trying again. The number attempted since
566*4882a593Smuzhiyun 	 * last failure is tracked with compact_considered.
567*4882a593Smuzhiyun 	 * compact_order_failed is the minimum compaction failed order.
568*4882a593Smuzhiyun 	 */
569*4882a593Smuzhiyun 	unsigned int		compact_considered;
570*4882a593Smuzhiyun 	unsigned int		compact_defer_shift;
571*4882a593Smuzhiyun 	int			compact_order_failed;
572*4882a593Smuzhiyun #endif
573*4882a593Smuzhiyun 
574*4882a593Smuzhiyun #if defined CONFIG_COMPACTION || defined CONFIG_CMA
575*4882a593Smuzhiyun 	/* Set to true when the PG_migrate_skip bits should be cleared */
576*4882a593Smuzhiyun 	bool			compact_blockskip_flush;
577*4882a593Smuzhiyun #endif
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun 	bool			contiguous;
580*4882a593Smuzhiyun 
581*4882a593Smuzhiyun 	ZONE_PADDING(_pad3_)
582*4882a593Smuzhiyun 	/* Zone statistics */
583*4882a593Smuzhiyun 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
584*4882a593Smuzhiyun 	atomic_long_t		vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	ANDROID_KABI_RESERVE(1);
587*4882a593Smuzhiyun 	ANDROID_KABI_RESERVE(2);
588*4882a593Smuzhiyun 	ANDROID_KABI_RESERVE(3);
589*4882a593Smuzhiyun 	ANDROID_KABI_RESERVE(4);
590*4882a593Smuzhiyun } ____cacheline_internodealigned_in_smp;
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun enum pgdat_flags {
593*4882a593Smuzhiyun 	PGDAT_DIRTY,			/* reclaim scanning has recently found
594*4882a593Smuzhiyun 					 * many dirty file pages at the tail
595*4882a593Smuzhiyun 					 * of the LRU.
596*4882a593Smuzhiyun 					 */
597*4882a593Smuzhiyun 	PGDAT_WRITEBACK,		/* reclaim scanning has recently found
598*4882a593Smuzhiyun 					 * many pages under writeback
599*4882a593Smuzhiyun 					 */
600*4882a593Smuzhiyun 	PGDAT_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
601*4882a593Smuzhiyun };
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun enum zone_flags {
604*4882a593Smuzhiyun 	ZONE_BOOSTED_WATERMARK,		/* zone recently boosted watermarks.
605*4882a593Smuzhiyun 					 * Cleared when kswapd is woken.
606*4882a593Smuzhiyun 					 */
607*4882a593Smuzhiyun };
608*4882a593Smuzhiyun 
zone_managed_pages(struct zone * zone)609*4882a593Smuzhiyun static inline unsigned long zone_managed_pages(struct zone *zone)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun 	return (unsigned long)atomic_long_read(&zone->managed_pages);
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun 
zone_cma_pages(struct zone * zone)614*4882a593Smuzhiyun static inline unsigned long zone_cma_pages(struct zone *zone)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun #ifdef CONFIG_CMA
617*4882a593Smuzhiyun 	return zone->cma_pages;
618*4882a593Smuzhiyun #else
619*4882a593Smuzhiyun 	return 0;
620*4882a593Smuzhiyun #endif
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun 
zone_end_pfn(const struct zone * zone)623*4882a593Smuzhiyun static inline unsigned long zone_end_pfn(const struct zone *zone)
624*4882a593Smuzhiyun {
625*4882a593Smuzhiyun 	return zone->zone_start_pfn + zone->spanned_pages;
626*4882a593Smuzhiyun }
627*4882a593Smuzhiyun 
zone_spans_pfn(const struct zone * zone,unsigned long pfn)628*4882a593Smuzhiyun static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
629*4882a593Smuzhiyun {
630*4882a593Smuzhiyun 	return zone->zone_start_pfn <= pfn && pfn < zone_end_pfn(zone);
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun 
zone_is_initialized(struct zone * zone)633*4882a593Smuzhiyun static inline bool zone_is_initialized(struct zone *zone)
634*4882a593Smuzhiyun {
635*4882a593Smuzhiyun 	return zone->initialized;
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun 
zone_is_empty(struct zone * zone)638*4882a593Smuzhiyun static inline bool zone_is_empty(struct zone *zone)
639*4882a593Smuzhiyun {
640*4882a593Smuzhiyun 	return zone->spanned_pages == 0;
641*4882a593Smuzhiyun }
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun /*
644*4882a593Smuzhiyun  * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty
645*4882a593Smuzhiyun  * intersection with the given zone
646*4882a593Smuzhiyun  */
zone_intersects(struct zone * zone,unsigned long start_pfn,unsigned long nr_pages)647*4882a593Smuzhiyun static inline bool zone_intersects(struct zone *zone,
648*4882a593Smuzhiyun 		unsigned long start_pfn, unsigned long nr_pages)
649*4882a593Smuzhiyun {
650*4882a593Smuzhiyun 	if (zone_is_empty(zone))
651*4882a593Smuzhiyun 		return false;
652*4882a593Smuzhiyun 	if (start_pfn >= zone_end_pfn(zone) ||
653*4882a593Smuzhiyun 	    start_pfn + nr_pages <= zone->zone_start_pfn)
654*4882a593Smuzhiyun 		return false;
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun 	return true;
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun /*
660*4882a593Smuzhiyun  * The "priority" of VM scanning is how much of the queues we will scan in one
661*4882a593Smuzhiyun  * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
662*4882a593Smuzhiyun  * queues ("queue_length >> 12") during an aging round.
663*4882a593Smuzhiyun  */
664*4882a593Smuzhiyun #define DEF_PRIORITY 12
665*4882a593Smuzhiyun 
666*4882a593Smuzhiyun /* Maximum number of zones on a zonelist */
667*4882a593Smuzhiyun #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES)
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun enum {
670*4882a593Smuzhiyun 	ZONELIST_FALLBACK,	/* zonelist with fallback */
671*4882a593Smuzhiyun #ifdef CONFIG_NUMA
672*4882a593Smuzhiyun 	/*
673*4882a593Smuzhiyun 	 * The NUMA zonelists are doubled because we need zonelists that
674*4882a593Smuzhiyun 	 * restrict the allocations to a single node for __GFP_THISNODE.
675*4882a593Smuzhiyun 	 */
676*4882a593Smuzhiyun 	ZONELIST_NOFALLBACK,	/* zonelist without fallback (__GFP_THISNODE) */
677*4882a593Smuzhiyun #endif
678*4882a593Smuzhiyun 	MAX_ZONELISTS
679*4882a593Smuzhiyun };
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun /*
682*4882a593Smuzhiyun  * This struct contains information about a zone in a zonelist. It is stored
683*4882a593Smuzhiyun  * here to avoid dereferences into large structures and lookups of tables
684*4882a593Smuzhiyun  */
685*4882a593Smuzhiyun struct zoneref {
686*4882a593Smuzhiyun 	struct zone *zone;	/* Pointer to actual zone */
687*4882a593Smuzhiyun 	int zone_idx;		/* zone_idx(zoneref->zone) */
688*4882a593Smuzhiyun };
689*4882a593Smuzhiyun 
690*4882a593Smuzhiyun /*
691*4882a593Smuzhiyun  * One allocation request operates on a zonelist. A zonelist
692*4882a593Smuzhiyun  * is a list of zones, the first one is the 'goal' of the
693*4882a593Smuzhiyun  * allocation, the other zones are fallback zones, in decreasing
694*4882a593Smuzhiyun  * priority.
695*4882a593Smuzhiyun  *
696*4882a593Smuzhiyun  * To speed the reading of the zonelist, the zonerefs contain the zone index
697*4882a593Smuzhiyun  * of the entry being read. Helper functions to access information given
698*4882a593Smuzhiyun  * a struct zoneref are
699*4882a593Smuzhiyun  *
700*4882a593Smuzhiyun  * zonelist_zone()	- Return the struct zone * for an entry in _zonerefs
701*4882a593Smuzhiyun  * zonelist_zone_idx()	- Return the index of the zone for an entry
702*4882a593Smuzhiyun  * zonelist_node_idx()	- Return the index of the node for an entry
703*4882a593Smuzhiyun  */
704*4882a593Smuzhiyun struct zonelist {
705*4882a593Smuzhiyun 	struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
706*4882a593Smuzhiyun };
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun #ifndef CONFIG_DISCONTIGMEM
709*4882a593Smuzhiyun /* The array of struct pages - for discontigmem use pgdat->lmem_map */
710*4882a593Smuzhiyun extern struct page *mem_map;
711*4882a593Smuzhiyun #endif
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun #ifdef CONFIG_TRANSPARENT_HUGEPAGE
714*4882a593Smuzhiyun struct deferred_split {
715*4882a593Smuzhiyun 	spinlock_t split_queue_lock;
716*4882a593Smuzhiyun 	struct list_head split_queue;
717*4882a593Smuzhiyun 	unsigned long split_queue_len;
718*4882a593Smuzhiyun };
719*4882a593Smuzhiyun #endif
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun /*
722*4882a593Smuzhiyun  * On NUMA machines, each NUMA node would have a pg_data_t to describe
723*4882a593Smuzhiyun  * it's memory layout. On UMA machines there is a single pglist_data which
724*4882a593Smuzhiyun  * describes the whole memory.
725*4882a593Smuzhiyun  *
726*4882a593Smuzhiyun  * Memory statistics and page replacement data structures are maintained on a
727*4882a593Smuzhiyun  * per-zone basis.
728*4882a593Smuzhiyun  */
729*4882a593Smuzhiyun typedef struct pglist_data {
730*4882a593Smuzhiyun 	/*
731*4882a593Smuzhiyun 	 * node_zones contains just the zones for THIS node. Not all of the
732*4882a593Smuzhiyun 	 * zones may be populated, but it is the full list. It is referenced by
733*4882a593Smuzhiyun 	 * this node's node_zonelists as well as other node's node_zonelists.
734*4882a593Smuzhiyun 	 */
735*4882a593Smuzhiyun 	struct zone node_zones[MAX_NR_ZONES];
736*4882a593Smuzhiyun 
737*4882a593Smuzhiyun 	/*
738*4882a593Smuzhiyun 	 * node_zonelists contains references to all zones in all nodes.
739*4882a593Smuzhiyun 	 * Generally the first zones will be references to this node's
740*4882a593Smuzhiyun 	 * node_zones.
741*4882a593Smuzhiyun 	 */
742*4882a593Smuzhiyun 	struct zonelist node_zonelists[MAX_ZONELISTS];
743*4882a593Smuzhiyun 
744*4882a593Smuzhiyun 	int nr_zones; /* number of populated zones in this node */
745*4882a593Smuzhiyun #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
746*4882a593Smuzhiyun 	struct page *node_mem_map;
747*4882a593Smuzhiyun #ifdef CONFIG_PAGE_EXTENSION
748*4882a593Smuzhiyun 	struct page_ext *node_page_ext;
749*4882a593Smuzhiyun #endif
750*4882a593Smuzhiyun #endif
751*4882a593Smuzhiyun #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
752*4882a593Smuzhiyun 	/*
753*4882a593Smuzhiyun 	 * Must be held any time you expect node_start_pfn,
754*4882a593Smuzhiyun 	 * node_present_pages, node_spanned_pages or nr_zones to stay constant.
755*4882a593Smuzhiyun 	 * Also synchronizes pgdat->first_deferred_pfn during deferred page
756*4882a593Smuzhiyun 	 * init.
757*4882a593Smuzhiyun 	 *
758*4882a593Smuzhiyun 	 * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
759*4882a593Smuzhiyun 	 * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
760*4882a593Smuzhiyun 	 * or CONFIG_DEFERRED_STRUCT_PAGE_INIT.
761*4882a593Smuzhiyun 	 *
762*4882a593Smuzhiyun 	 * Nests above zone->lock and zone->span_seqlock
763*4882a593Smuzhiyun 	 */
764*4882a593Smuzhiyun 	spinlock_t node_size_lock;
765*4882a593Smuzhiyun #endif
766*4882a593Smuzhiyun 	unsigned long node_start_pfn;
767*4882a593Smuzhiyun 	unsigned long node_present_pages; /* total number of physical pages */
768*4882a593Smuzhiyun 	unsigned long node_spanned_pages; /* total size of physical page
769*4882a593Smuzhiyun 					     range, including holes */
770*4882a593Smuzhiyun 	int node_id;
771*4882a593Smuzhiyun 	wait_queue_head_t kswapd_wait;
772*4882a593Smuzhiyun 	wait_queue_head_t pfmemalloc_wait;
773*4882a593Smuzhiyun 	struct task_struct *kswapd;	/* Protected by
774*4882a593Smuzhiyun 					   mem_hotplug_begin/end() */
775*4882a593Smuzhiyun 	struct task_struct *mkswapd[MAX_KSWAPD_THREADS];
776*4882a593Smuzhiyun 	int kswapd_order;
777*4882a593Smuzhiyun 	enum zone_type kswapd_highest_zoneidx;
778*4882a593Smuzhiyun 
779*4882a593Smuzhiyun 	int kswapd_failures;		/* Number of 'reclaimed == 0' runs */
780*4882a593Smuzhiyun 
781*4882a593Smuzhiyun 	ANDROID_OEM_DATA(1);
782*4882a593Smuzhiyun #ifdef CONFIG_COMPACTION
783*4882a593Smuzhiyun 	int kcompactd_max_order;
784*4882a593Smuzhiyun 	enum zone_type kcompactd_highest_zoneidx;
785*4882a593Smuzhiyun 	wait_queue_head_t kcompactd_wait;
786*4882a593Smuzhiyun 	struct task_struct *kcompactd;
787*4882a593Smuzhiyun 	bool proactive_compact_trigger;
788*4882a593Smuzhiyun #endif
789*4882a593Smuzhiyun 	/*
790*4882a593Smuzhiyun 	 * This is a per-node reserve of pages that are not available
791*4882a593Smuzhiyun 	 * to userspace allocations.
792*4882a593Smuzhiyun 	 */
793*4882a593Smuzhiyun 	unsigned long		totalreserve_pages;
794*4882a593Smuzhiyun 
795*4882a593Smuzhiyun #ifdef CONFIG_NUMA
796*4882a593Smuzhiyun 	/*
797*4882a593Smuzhiyun 	 * node reclaim becomes active if more unmapped pages exist.
798*4882a593Smuzhiyun 	 */
799*4882a593Smuzhiyun 	unsigned long		min_unmapped_pages;
800*4882a593Smuzhiyun 	unsigned long		min_slab_pages;
801*4882a593Smuzhiyun #endif /* CONFIG_NUMA */
802*4882a593Smuzhiyun 
803*4882a593Smuzhiyun 	/* Write-intensive fields used by page reclaim */
804*4882a593Smuzhiyun 	ZONE_PADDING(_pad1_)
805*4882a593Smuzhiyun 	spinlock_t		lru_lock;
806*4882a593Smuzhiyun 
807*4882a593Smuzhiyun #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
808*4882a593Smuzhiyun 	/*
809*4882a593Smuzhiyun 	 * If memory initialisation on large machines is deferred then this
810*4882a593Smuzhiyun 	 * is the first PFN that needs to be initialised.
811*4882a593Smuzhiyun 	 */
812*4882a593Smuzhiyun 	unsigned long first_deferred_pfn;
813*4882a593Smuzhiyun #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
814*4882a593Smuzhiyun 
815*4882a593Smuzhiyun #ifdef CONFIG_TRANSPARENT_HUGEPAGE
816*4882a593Smuzhiyun 	struct deferred_split deferred_split_queue;
817*4882a593Smuzhiyun #endif
818*4882a593Smuzhiyun 
819*4882a593Smuzhiyun 	/* Fields commonly accessed by the page reclaim scanner */
820*4882a593Smuzhiyun 
821*4882a593Smuzhiyun 	/*
822*4882a593Smuzhiyun 	 * NOTE: THIS IS UNUSED IF MEMCG IS ENABLED.
823*4882a593Smuzhiyun 	 *
824*4882a593Smuzhiyun 	 * Use mem_cgroup_lruvec() to look up lruvecs.
825*4882a593Smuzhiyun 	 */
826*4882a593Smuzhiyun 	struct lruvec		__lruvec;
827*4882a593Smuzhiyun 
828*4882a593Smuzhiyun 	unsigned long		flags;
829*4882a593Smuzhiyun 
830*4882a593Smuzhiyun 	ZONE_PADDING(_pad2_)
831*4882a593Smuzhiyun 
832*4882a593Smuzhiyun 	/* Per-node vmstats */
833*4882a593Smuzhiyun 	struct per_cpu_nodestat __percpu *per_cpu_nodestats;
834*4882a593Smuzhiyun 	atomic_long_t		vm_stat[NR_VM_NODE_STAT_ITEMS];
835*4882a593Smuzhiyun } pg_data_t;
836*4882a593Smuzhiyun 
837*4882a593Smuzhiyun #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
838*4882a593Smuzhiyun #define node_spanned_pages(nid)	(NODE_DATA(nid)->node_spanned_pages)
839*4882a593Smuzhiyun #ifdef CONFIG_FLAT_NODE_MEM_MAP
840*4882a593Smuzhiyun #define pgdat_page_nr(pgdat, pagenr)	((pgdat)->node_mem_map + (pagenr))
841*4882a593Smuzhiyun #else
842*4882a593Smuzhiyun #define pgdat_page_nr(pgdat, pagenr)	pfn_to_page((pgdat)->node_start_pfn + (pagenr))
843*4882a593Smuzhiyun #endif
844*4882a593Smuzhiyun #define nid_page_nr(nid, pagenr) 	pgdat_page_nr(NODE_DATA(nid),(pagenr))
845*4882a593Smuzhiyun 
846*4882a593Smuzhiyun #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
847*4882a593Smuzhiyun #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
848*4882a593Smuzhiyun 
pgdat_end_pfn(pg_data_t * pgdat)849*4882a593Smuzhiyun static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
850*4882a593Smuzhiyun {
851*4882a593Smuzhiyun 	return pgdat->node_start_pfn + pgdat->node_spanned_pages;
852*4882a593Smuzhiyun }
853*4882a593Smuzhiyun 
pgdat_is_empty(pg_data_t * pgdat)854*4882a593Smuzhiyun static inline bool pgdat_is_empty(pg_data_t *pgdat)
855*4882a593Smuzhiyun {
856*4882a593Smuzhiyun 	return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
857*4882a593Smuzhiyun }
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun #include <linux/memory_hotplug.h>
860*4882a593Smuzhiyun 
861*4882a593Smuzhiyun void build_all_zonelists(pg_data_t *pgdat);
862*4882a593Smuzhiyun void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
863*4882a593Smuzhiyun 		   enum zone_type highest_zoneidx);
864*4882a593Smuzhiyun bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
865*4882a593Smuzhiyun 			 int highest_zoneidx, unsigned int alloc_flags,
866*4882a593Smuzhiyun 			 long free_pages);
867*4882a593Smuzhiyun bool zone_watermark_ok(struct zone *z, unsigned int order,
868*4882a593Smuzhiyun 		unsigned long mark, int highest_zoneidx,
869*4882a593Smuzhiyun 		unsigned int alloc_flags);
870*4882a593Smuzhiyun bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
871*4882a593Smuzhiyun 		unsigned long mark, int highest_zoneidx);
872*4882a593Smuzhiyun /*
873*4882a593Smuzhiyun  * Memory initialization context, use to differentiate memory added by
874*4882a593Smuzhiyun  * the platform statically or via memory hotplug interface.
875*4882a593Smuzhiyun  */
876*4882a593Smuzhiyun enum meminit_context {
877*4882a593Smuzhiyun 	MEMINIT_EARLY,
878*4882a593Smuzhiyun 	MEMINIT_HOTPLUG,
879*4882a593Smuzhiyun };
880*4882a593Smuzhiyun 
881*4882a593Smuzhiyun extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
882*4882a593Smuzhiyun 				     unsigned long size);
883*4882a593Smuzhiyun 
884*4882a593Smuzhiyun extern void lruvec_init(struct lruvec *lruvec);
885*4882a593Smuzhiyun 
lruvec_pgdat(struct lruvec * lruvec)886*4882a593Smuzhiyun static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
887*4882a593Smuzhiyun {
888*4882a593Smuzhiyun #ifdef CONFIG_MEMCG
889*4882a593Smuzhiyun 	return lruvec->pgdat;
890*4882a593Smuzhiyun #else
891*4882a593Smuzhiyun 	return container_of(lruvec, struct pglist_data, __lruvec);
892*4882a593Smuzhiyun #endif
893*4882a593Smuzhiyun }
894*4882a593Smuzhiyun 
895*4882a593Smuzhiyun extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
896*4882a593Smuzhiyun 
897*4882a593Smuzhiyun #ifdef CONFIG_HAVE_MEMORYLESS_NODES
898*4882a593Smuzhiyun int local_memory_node(int node_id);
899*4882a593Smuzhiyun #else
local_memory_node(int node_id)900*4882a593Smuzhiyun static inline int local_memory_node(int node_id) { return node_id; };
901*4882a593Smuzhiyun #endif
902*4882a593Smuzhiyun 
903*4882a593Smuzhiyun /*
904*4882a593Smuzhiyun  * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
905*4882a593Smuzhiyun  */
906*4882a593Smuzhiyun #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
907*4882a593Smuzhiyun 
908*4882a593Smuzhiyun /*
909*4882a593Smuzhiyun  * Returns true if a zone has pages managed by the buddy allocator.
910*4882a593Smuzhiyun  * All the reclaim decisions have to use this function rather than
911*4882a593Smuzhiyun  * populated_zone(). If the whole zone is reserved then we can easily
912*4882a593Smuzhiyun  * end up with populated_zone() && !managed_zone().
913*4882a593Smuzhiyun  */
managed_zone(struct zone * zone)914*4882a593Smuzhiyun static inline bool managed_zone(struct zone *zone)
915*4882a593Smuzhiyun {
916*4882a593Smuzhiyun 	return zone_managed_pages(zone);
917*4882a593Smuzhiyun }
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun /* Returns true if a zone has memory */
populated_zone(struct zone * zone)920*4882a593Smuzhiyun static inline bool populated_zone(struct zone *zone)
921*4882a593Smuzhiyun {
922*4882a593Smuzhiyun 	return zone->present_pages;
923*4882a593Smuzhiyun }
924*4882a593Smuzhiyun 
925*4882a593Smuzhiyun #ifdef CONFIG_NEED_MULTIPLE_NODES
zone_to_nid(struct zone * zone)926*4882a593Smuzhiyun static inline int zone_to_nid(struct zone *zone)
927*4882a593Smuzhiyun {
928*4882a593Smuzhiyun 	return zone->node;
929*4882a593Smuzhiyun }
930*4882a593Smuzhiyun 
zone_set_nid(struct zone * zone,int nid)931*4882a593Smuzhiyun static inline void zone_set_nid(struct zone *zone, int nid)
932*4882a593Smuzhiyun {
933*4882a593Smuzhiyun 	zone->node = nid;
934*4882a593Smuzhiyun }
935*4882a593Smuzhiyun #else
zone_to_nid(struct zone * zone)936*4882a593Smuzhiyun static inline int zone_to_nid(struct zone *zone)
937*4882a593Smuzhiyun {
938*4882a593Smuzhiyun 	return 0;
939*4882a593Smuzhiyun }
940*4882a593Smuzhiyun 
zone_set_nid(struct zone * zone,int nid)941*4882a593Smuzhiyun static inline void zone_set_nid(struct zone *zone, int nid) {}
942*4882a593Smuzhiyun #endif
943*4882a593Smuzhiyun 
944*4882a593Smuzhiyun extern int movable_zone;
945*4882a593Smuzhiyun 
946*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
zone_movable_is_highmem(void)947*4882a593Smuzhiyun static inline int zone_movable_is_highmem(void)
948*4882a593Smuzhiyun {
949*4882a593Smuzhiyun #ifdef CONFIG_NEED_MULTIPLE_NODES
950*4882a593Smuzhiyun 	return movable_zone == ZONE_HIGHMEM;
951*4882a593Smuzhiyun #else
952*4882a593Smuzhiyun 	return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
953*4882a593Smuzhiyun #endif
954*4882a593Smuzhiyun }
955*4882a593Smuzhiyun #endif
956*4882a593Smuzhiyun 
is_highmem_idx(enum zone_type idx)957*4882a593Smuzhiyun static inline int is_highmem_idx(enum zone_type idx)
958*4882a593Smuzhiyun {
959*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
960*4882a593Smuzhiyun 	return (idx == ZONE_HIGHMEM ||
961*4882a593Smuzhiyun 		(idx == ZONE_MOVABLE && zone_movable_is_highmem()));
962*4882a593Smuzhiyun #else
963*4882a593Smuzhiyun 	return 0;
964*4882a593Smuzhiyun #endif
965*4882a593Smuzhiyun }
966*4882a593Smuzhiyun 
967*4882a593Smuzhiyun #ifdef CONFIG_ZONE_DMA
968*4882a593Smuzhiyun bool has_managed_dma(void);
969*4882a593Smuzhiyun #else
has_managed_dma(void)970*4882a593Smuzhiyun static inline bool has_managed_dma(void)
971*4882a593Smuzhiyun {
972*4882a593Smuzhiyun 	return false;
973*4882a593Smuzhiyun }
974*4882a593Smuzhiyun #endif
975*4882a593Smuzhiyun 
976*4882a593Smuzhiyun /**
977*4882a593Smuzhiyun  * is_highmem - helper function to quickly check if a struct zone is a
978*4882a593Smuzhiyun  *              highmem zone or not.  This is an attempt to keep references
979*4882a593Smuzhiyun  *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
980*4882a593Smuzhiyun  * @zone - pointer to struct zone variable
981*4882a593Smuzhiyun  */
is_highmem(struct zone * zone)982*4882a593Smuzhiyun static inline int is_highmem(struct zone *zone)
983*4882a593Smuzhiyun {
984*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
985*4882a593Smuzhiyun 	return is_highmem_idx(zone_idx(zone));
986*4882a593Smuzhiyun #else
987*4882a593Smuzhiyun 	return 0;
988*4882a593Smuzhiyun #endif
989*4882a593Smuzhiyun }
990*4882a593Smuzhiyun 
991*4882a593Smuzhiyun /* These two functions are used to setup the per zone pages min values */
992*4882a593Smuzhiyun struct ctl_table;
993*4882a593Smuzhiyun 
994*4882a593Smuzhiyun int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *,
995*4882a593Smuzhiyun 		loff_t *);
996*4882a593Smuzhiyun int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
997*4882a593Smuzhiyun 		size_t *, loff_t *);
998*4882a593Smuzhiyun extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
999*4882a593Smuzhiyun int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
1000*4882a593Smuzhiyun 		size_t *, loff_t *);
1001*4882a593Smuzhiyun int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
1002*4882a593Smuzhiyun 		void *, size_t *, loff_t *);
1003*4882a593Smuzhiyun int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
1004*4882a593Smuzhiyun 		void *, size_t *, loff_t *);
1005*4882a593Smuzhiyun int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
1006*4882a593Smuzhiyun 		void *, size_t *, loff_t *);
1007*4882a593Smuzhiyun int numa_zonelist_order_handler(struct ctl_table *, int,
1008*4882a593Smuzhiyun 		void *, size_t *, loff_t *);
1009*4882a593Smuzhiyun extern int percpu_pagelist_fraction;
1010*4882a593Smuzhiyun extern char numa_zonelist_order[];
1011*4882a593Smuzhiyun #define NUMA_ZONELIST_ORDER_LEN	16
1012*4882a593Smuzhiyun 
1013*4882a593Smuzhiyun #ifndef CONFIG_NEED_MULTIPLE_NODES
1014*4882a593Smuzhiyun 
1015*4882a593Smuzhiyun extern struct pglist_data contig_page_data;
1016*4882a593Smuzhiyun #define NODE_DATA(nid)		(&contig_page_data)
1017*4882a593Smuzhiyun #define NODE_MEM_MAP(nid)	mem_map
1018*4882a593Smuzhiyun 
1019*4882a593Smuzhiyun #else /* CONFIG_NEED_MULTIPLE_NODES */
1020*4882a593Smuzhiyun 
1021*4882a593Smuzhiyun #include <asm/mmzone.h>
1022*4882a593Smuzhiyun 
1023*4882a593Smuzhiyun #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1024*4882a593Smuzhiyun 
1025*4882a593Smuzhiyun extern struct pglist_data *first_online_pgdat(void);
1026*4882a593Smuzhiyun extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
1027*4882a593Smuzhiyun extern struct zone *next_zone(struct zone *zone);
1028*4882a593Smuzhiyun extern int isolate_anon_lru_page(struct page *page);
1029*4882a593Smuzhiyun 
1030*4882a593Smuzhiyun /**
1031*4882a593Smuzhiyun  * for_each_online_pgdat - helper macro to iterate over all online nodes
1032*4882a593Smuzhiyun  * @pgdat - pointer to a pg_data_t variable
1033*4882a593Smuzhiyun  */
1034*4882a593Smuzhiyun #define for_each_online_pgdat(pgdat)			\
1035*4882a593Smuzhiyun 	for (pgdat = first_online_pgdat();		\
1036*4882a593Smuzhiyun 	     pgdat;					\
1037*4882a593Smuzhiyun 	     pgdat = next_online_pgdat(pgdat))
1038*4882a593Smuzhiyun /**
1039*4882a593Smuzhiyun  * for_each_zone - helper macro to iterate over all memory zones
1040*4882a593Smuzhiyun  * @zone - pointer to struct zone variable
1041*4882a593Smuzhiyun  *
1042*4882a593Smuzhiyun  * The user only needs to declare the zone variable, for_each_zone
1043*4882a593Smuzhiyun  * fills it in.
1044*4882a593Smuzhiyun  */
1045*4882a593Smuzhiyun #define for_each_zone(zone)			        \
1046*4882a593Smuzhiyun 	for (zone = (first_online_pgdat())->node_zones; \
1047*4882a593Smuzhiyun 	     zone;					\
1048*4882a593Smuzhiyun 	     zone = next_zone(zone))
1049*4882a593Smuzhiyun 
1050*4882a593Smuzhiyun #define for_each_populated_zone(zone)		        \
1051*4882a593Smuzhiyun 	for (zone = (first_online_pgdat())->node_zones; \
1052*4882a593Smuzhiyun 	     zone;					\
1053*4882a593Smuzhiyun 	     zone = next_zone(zone))			\
1054*4882a593Smuzhiyun 		if (!populated_zone(zone))		\
1055*4882a593Smuzhiyun 			; /* do nothing */		\
1056*4882a593Smuzhiyun 		else
1057*4882a593Smuzhiyun 
zonelist_zone(struct zoneref * zoneref)1058*4882a593Smuzhiyun static inline struct zone *zonelist_zone(struct zoneref *zoneref)
1059*4882a593Smuzhiyun {
1060*4882a593Smuzhiyun 	return zoneref->zone;
1061*4882a593Smuzhiyun }
1062*4882a593Smuzhiyun 
zonelist_zone_idx(struct zoneref * zoneref)1063*4882a593Smuzhiyun static inline int zonelist_zone_idx(struct zoneref *zoneref)
1064*4882a593Smuzhiyun {
1065*4882a593Smuzhiyun 	return zoneref->zone_idx;
1066*4882a593Smuzhiyun }
1067*4882a593Smuzhiyun 
zonelist_node_idx(struct zoneref * zoneref)1068*4882a593Smuzhiyun static inline int zonelist_node_idx(struct zoneref *zoneref)
1069*4882a593Smuzhiyun {
1070*4882a593Smuzhiyun 	return zone_to_nid(zoneref->zone);
1071*4882a593Smuzhiyun }
1072*4882a593Smuzhiyun 
1073*4882a593Smuzhiyun struct zoneref *__next_zones_zonelist(struct zoneref *z,
1074*4882a593Smuzhiyun 					enum zone_type highest_zoneidx,
1075*4882a593Smuzhiyun 					nodemask_t *nodes);
1076*4882a593Smuzhiyun 
1077*4882a593Smuzhiyun /**
1078*4882a593Smuzhiyun  * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
1079*4882a593Smuzhiyun  * @z - The cursor used as a starting point for the search
1080*4882a593Smuzhiyun  * @highest_zoneidx - The zone index of the highest zone to return
1081*4882a593Smuzhiyun  * @nodes - An optional nodemask to filter the zonelist with
1082*4882a593Smuzhiyun  *
1083*4882a593Smuzhiyun  * This function returns the next zone at or below a given zone index that is
1084*4882a593Smuzhiyun  * within the allowed nodemask using a cursor as the starting point for the
1085*4882a593Smuzhiyun  * search. The zoneref returned is a cursor that represents the current zone
1086*4882a593Smuzhiyun  * being examined. It should be advanced by one before calling
1087*4882a593Smuzhiyun  * next_zones_zonelist again.
1088*4882a593Smuzhiyun  */
next_zones_zonelist(struct zoneref * z,enum zone_type highest_zoneidx,nodemask_t * nodes)1089*4882a593Smuzhiyun static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
1090*4882a593Smuzhiyun 					enum zone_type highest_zoneidx,
1091*4882a593Smuzhiyun 					nodemask_t *nodes)
1092*4882a593Smuzhiyun {
1093*4882a593Smuzhiyun 	if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx))
1094*4882a593Smuzhiyun 		return z;
1095*4882a593Smuzhiyun 	return __next_zones_zonelist(z, highest_zoneidx, nodes);
1096*4882a593Smuzhiyun }
1097*4882a593Smuzhiyun 
1098*4882a593Smuzhiyun /**
1099*4882a593Smuzhiyun  * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist
1100*4882a593Smuzhiyun  * @zonelist - The zonelist to search for a suitable zone
1101*4882a593Smuzhiyun  * @highest_zoneidx - The zone index of the highest zone to return
1102*4882a593Smuzhiyun  * @nodes - An optional nodemask to filter the zonelist with
1103*4882a593Smuzhiyun  * @return - Zoneref pointer for the first suitable zone found (see below)
1104*4882a593Smuzhiyun  *
1105*4882a593Smuzhiyun  * This function returns the first zone at or below a given zone index that is
1106*4882a593Smuzhiyun  * within the allowed nodemask. The zoneref returned is a cursor that can be
1107*4882a593Smuzhiyun  * used to iterate the zonelist with next_zones_zonelist by advancing it by
1108*4882a593Smuzhiyun  * one before calling.
1109*4882a593Smuzhiyun  *
1110*4882a593Smuzhiyun  * When no eligible zone is found, zoneref->zone is NULL (zoneref itself is
1111*4882a593Smuzhiyun  * never NULL). This may happen either genuinely, or due to concurrent nodemask
1112*4882a593Smuzhiyun  * update due to cpuset modification.
1113*4882a593Smuzhiyun  */
first_zones_zonelist(struct zonelist * zonelist,enum zone_type highest_zoneidx,nodemask_t * nodes)1114*4882a593Smuzhiyun static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
1115*4882a593Smuzhiyun 					enum zone_type highest_zoneidx,
1116*4882a593Smuzhiyun 					nodemask_t *nodes)
1117*4882a593Smuzhiyun {
1118*4882a593Smuzhiyun 	return next_zones_zonelist(zonelist->_zonerefs,
1119*4882a593Smuzhiyun 							highest_zoneidx, nodes);
1120*4882a593Smuzhiyun }
1121*4882a593Smuzhiyun 
1122*4882a593Smuzhiyun /**
1123*4882a593Smuzhiyun  * for_each_zone_zonelist_nodemask - helper macro to iterate over valid zones in a zonelist at or below a given zone index and within a nodemask
1124*4882a593Smuzhiyun  * @zone - The current zone in the iterator
1125*4882a593Smuzhiyun  * @z - The current pointer within zonelist->_zonerefs being iterated
1126*4882a593Smuzhiyun  * @zlist - The zonelist being iterated
1127*4882a593Smuzhiyun  * @highidx - The zone index of the highest zone to return
1128*4882a593Smuzhiyun  * @nodemask - Nodemask allowed by the allocator
1129*4882a593Smuzhiyun  *
1130*4882a593Smuzhiyun  * This iterator iterates though all zones at or below a given zone index and
1131*4882a593Smuzhiyun  * within a given nodemask
1132*4882a593Smuzhiyun  */
1133*4882a593Smuzhiyun #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \
1134*4882a593Smuzhiyun 	for (z = first_zones_zonelist(zlist, highidx, nodemask), zone = zonelist_zone(z);	\
1135*4882a593Smuzhiyun 		zone;							\
1136*4882a593Smuzhiyun 		z = next_zones_zonelist(++z, highidx, nodemask),	\
1137*4882a593Smuzhiyun 			zone = zonelist_zone(z))
1138*4882a593Smuzhiyun 
1139*4882a593Smuzhiyun #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \
1140*4882a593Smuzhiyun 	for (zone = z->zone;	\
1141*4882a593Smuzhiyun 		zone;							\
1142*4882a593Smuzhiyun 		z = next_zones_zonelist(++z, highidx, nodemask),	\
1143*4882a593Smuzhiyun 			zone = zonelist_zone(z))
1144*4882a593Smuzhiyun 
1145*4882a593Smuzhiyun 
1146*4882a593Smuzhiyun /**
1147*4882a593Smuzhiyun  * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index
1148*4882a593Smuzhiyun  * @zone - The current zone in the iterator
1149*4882a593Smuzhiyun  * @z - The current pointer within zonelist->zones being iterated
1150*4882a593Smuzhiyun  * @zlist - The zonelist being iterated
1151*4882a593Smuzhiyun  * @highidx - The zone index of the highest zone to return
1152*4882a593Smuzhiyun  *
1153*4882a593Smuzhiyun  * This iterator iterates though all zones at or below a given zone index.
1154*4882a593Smuzhiyun  */
1155*4882a593Smuzhiyun #define for_each_zone_zonelist(zone, z, zlist, highidx) \
1156*4882a593Smuzhiyun 	for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
1157*4882a593Smuzhiyun 
1158*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM
1159*4882a593Smuzhiyun #include <asm/sparsemem.h>
1160*4882a593Smuzhiyun #endif
1161*4882a593Smuzhiyun 
1162*4882a593Smuzhiyun #ifdef CONFIG_FLATMEM
1163*4882a593Smuzhiyun #define pfn_to_nid(pfn)		(0)
1164*4882a593Smuzhiyun #endif
1165*4882a593Smuzhiyun 
1166*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM
1167*4882a593Smuzhiyun 
1168*4882a593Smuzhiyun /*
1169*4882a593Smuzhiyun  * SECTION_SHIFT    		#bits space required to store a section #
1170*4882a593Smuzhiyun  *
1171*4882a593Smuzhiyun  * PA_SECTION_SHIFT		physical address to/from section number
1172*4882a593Smuzhiyun  * PFN_SECTION_SHIFT		pfn to/from section number
1173*4882a593Smuzhiyun  */
1174*4882a593Smuzhiyun #define PA_SECTION_SHIFT	(SECTION_SIZE_BITS)
1175*4882a593Smuzhiyun #define PFN_SECTION_SHIFT	(SECTION_SIZE_BITS - PAGE_SHIFT)
1176*4882a593Smuzhiyun 
1177*4882a593Smuzhiyun #define NR_MEM_SECTIONS		(1UL << SECTIONS_SHIFT)
1178*4882a593Smuzhiyun 
1179*4882a593Smuzhiyun #define PAGES_PER_SECTION       (1UL << PFN_SECTION_SHIFT)
1180*4882a593Smuzhiyun #define PAGE_SECTION_MASK	(~(PAGES_PER_SECTION-1))
1181*4882a593Smuzhiyun 
1182*4882a593Smuzhiyun #define SECTION_BLOCKFLAGS_BITS \
1183*4882a593Smuzhiyun 	((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
1184*4882a593Smuzhiyun 
1185*4882a593Smuzhiyun #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
1186*4882a593Smuzhiyun #error Allocator MAX_ORDER exceeds SECTION_SIZE
1187*4882a593Smuzhiyun #endif
1188*4882a593Smuzhiyun 
pfn_to_section_nr(unsigned long pfn)1189*4882a593Smuzhiyun static inline unsigned long pfn_to_section_nr(unsigned long pfn)
1190*4882a593Smuzhiyun {
1191*4882a593Smuzhiyun 	return pfn >> PFN_SECTION_SHIFT;
1192*4882a593Smuzhiyun }
section_nr_to_pfn(unsigned long sec)1193*4882a593Smuzhiyun static inline unsigned long section_nr_to_pfn(unsigned long sec)
1194*4882a593Smuzhiyun {
1195*4882a593Smuzhiyun 	return sec << PFN_SECTION_SHIFT;
1196*4882a593Smuzhiyun }
1197*4882a593Smuzhiyun 
1198*4882a593Smuzhiyun #define SECTION_ALIGN_UP(pfn)	(((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
1199*4882a593Smuzhiyun #define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
1200*4882a593Smuzhiyun 
1201*4882a593Smuzhiyun #define SUBSECTION_SHIFT 21
1202*4882a593Smuzhiyun #define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)
1203*4882a593Smuzhiyun 
1204*4882a593Smuzhiyun #define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
1205*4882a593Smuzhiyun #define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
1206*4882a593Smuzhiyun #define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1))
1207*4882a593Smuzhiyun 
1208*4882a593Smuzhiyun #if SUBSECTION_SHIFT > SECTION_SIZE_BITS
1209*4882a593Smuzhiyun #error Subsection size exceeds section size
1210*4882a593Smuzhiyun #else
1211*4882a593Smuzhiyun #define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT))
1212*4882a593Smuzhiyun #endif
1213*4882a593Smuzhiyun 
1214*4882a593Smuzhiyun #define SUBSECTION_ALIGN_UP(pfn) ALIGN((pfn), PAGES_PER_SUBSECTION)
1215*4882a593Smuzhiyun #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK)
1216*4882a593Smuzhiyun 
1217*4882a593Smuzhiyun struct mem_section_usage {
1218*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
1219*4882a593Smuzhiyun 	DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION);
1220*4882a593Smuzhiyun #endif
1221*4882a593Smuzhiyun 	/* See declaration of similar field in struct zone */
1222*4882a593Smuzhiyun 	unsigned long pageblock_flags[0];
1223*4882a593Smuzhiyun };
1224*4882a593Smuzhiyun 
1225*4882a593Smuzhiyun void subsection_map_init(unsigned long pfn, unsigned long nr_pages);
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun struct page;
1228*4882a593Smuzhiyun struct page_ext;
1229*4882a593Smuzhiyun struct mem_section {
1230*4882a593Smuzhiyun 	/*
1231*4882a593Smuzhiyun 	 * This is, logically, a pointer to an array of struct
1232*4882a593Smuzhiyun 	 * pages.  However, it is stored with some other magic.
1233*4882a593Smuzhiyun 	 * (see sparse.c::sparse_init_one_section())
1234*4882a593Smuzhiyun 	 *
1235*4882a593Smuzhiyun 	 * Additionally during early boot we encode node id of
1236*4882a593Smuzhiyun 	 * the location of the section here to guide allocation.
1237*4882a593Smuzhiyun 	 * (see sparse.c::memory_present())
1238*4882a593Smuzhiyun 	 *
1239*4882a593Smuzhiyun 	 * Making it a UL at least makes someone do a cast
1240*4882a593Smuzhiyun 	 * before using it wrong.
1241*4882a593Smuzhiyun 	 */
1242*4882a593Smuzhiyun 	unsigned long section_mem_map;
1243*4882a593Smuzhiyun 
1244*4882a593Smuzhiyun 	struct mem_section_usage *usage;
1245*4882a593Smuzhiyun #ifdef CONFIG_PAGE_EXTENSION
1246*4882a593Smuzhiyun 	/*
1247*4882a593Smuzhiyun 	 * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use
1248*4882a593Smuzhiyun 	 * section. (see page_ext.h about this.)
1249*4882a593Smuzhiyun 	 */
1250*4882a593Smuzhiyun 	struct page_ext *page_ext;
1251*4882a593Smuzhiyun 	unsigned long pad;
1252*4882a593Smuzhiyun #endif
1253*4882a593Smuzhiyun 	/*
1254*4882a593Smuzhiyun 	 * WARNING: mem_section must be a power-of-2 in size for the
1255*4882a593Smuzhiyun 	 * calculation and use of SECTION_ROOT_MASK to make sense.
1256*4882a593Smuzhiyun 	 */
1257*4882a593Smuzhiyun };
1258*4882a593Smuzhiyun 
1259*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_EXTREME
1260*4882a593Smuzhiyun #define SECTIONS_PER_ROOT       (PAGE_SIZE / sizeof (struct mem_section))
1261*4882a593Smuzhiyun #else
1262*4882a593Smuzhiyun #define SECTIONS_PER_ROOT	1
1263*4882a593Smuzhiyun #endif
1264*4882a593Smuzhiyun 
1265*4882a593Smuzhiyun #define SECTION_NR_TO_ROOT(sec)	((sec) / SECTIONS_PER_ROOT)
1266*4882a593Smuzhiyun #define NR_SECTION_ROOTS	DIV_ROUND_UP(NR_MEM_SECTIONS, SECTIONS_PER_ROOT)
1267*4882a593Smuzhiyun #define SECTION_ROOT_MASK	(SECTIONS_PER_ROOT - 1)
1268*4882a593Smuzhiyun 
1269*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_EXTREME
1270*4882a593Smuzhiyun extern struct mem_section **mem_section;
1271*4882a593Smuzhiyun #else
1272*4882a593Smuzhiyun extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
1273*4882a593Smuzhiyun #endif
1274*4882a593Smuzhiyun 
section_to_usemap(struct mem_section * ms)1275*4882a593Smuzhiyun static inline unsigned long *section_to_usemap(struct mem_section *ms)
1276*4882a593Smuzhiyun {
1277*4882a593Smuzhiyun 	return ms->usage->pageblock_flags;
1278*4882a593Smuzhiyun }
1279*4882a593Smuzhiyun 
__nr_to_section(unsigned long nr)1280*4882a593Smuzhiyun static inline struct mem_section *__nr_to_section(unsigned long nr)
1281*4882a593Smuzhiyun {
1282*4882a593Smuzhiyun 	unsigned long root = SECTION_NR_TO_ROOT(nr);
1283*4882a593Smuzhiyun 
1284*4882a593Smuzhiyun 	if (unlikely(root >= NR_SECTION_ROOTS))
1285*4882a593Smuzhiyun 		return NULL;
1286*4882a593Smuzhiyun 
1287*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_EXTREME
1288*4882a593Smuzhiyun 	if (!mem_section || !mem_section[root])
1289*4882a593Smuzhiyun 		return NULL;
1290*4882a593Smuzhiyun #endif
1291*4882a593Smuzhiyun 	return &mem_section[root][nr & SECTION_ROOT_MASK];
1292*4882a593Smuzhiyun }
1293*4882a593Smuzhiyun extern unsigned long __section_nr(struct mem_section *ms);
1294*4882a593Smuzhiyun extern size_t mem_section_usage_size(void);
1295*4882a593Smuzhiyun 
1296*4882a593Smuzhiyun /*
1297*4882a593Smuzhiyun  * We use the lower bits of the mem_map pointer to store
1298*4882a593Smuzhiyun  * a little bit of information.  The pointer is calculated
1299*4882a593Smuzhiyun  * as mem_map - section_nr_to_pfn(pnum).  The result is
1300*4882a593Smuzhiyun  * aligned to the minimum alignment of the two values:
1301*4882a593Smuzhiyun  *   1. All mem_map arrays are page-aligned.
1302*4882a593Smuzhiyun  *   2. section_nr_to_pfn() always clears PFN_SECTION_SHIFT
1303*4882a593Smuzhiyun  *      lowest bits.  PFN_SECTION_SHIFT is arch-specific
1304*4882a593Smuzhiyun  *      (equal SECTION_SIZE_BITS - PAGE_SHIFT), and the
1305*4882a593Smuzhiyun  *      worst combination is powerpc with 256k pages,
1306*4882a593Smuzhiyun  *      which results in PFN_SECTION_SHIFT equal 6.
1307*4882a593Smuzhiyun  * To sum it up, at least 6 bits are available.
1308*4882a593Smuzhiyun  */
1309*4882a593Smuzhiyun #define	SECTION_MARKED_PRESENT	(1UL<<0)
1310*4882a593Smuzhiyun #define SECTION_HAS_MEM_MAP	(1UL<<1)
1311*4882a593Smuzhiyun #define SECTION_IS_ONLINE	(1UL<<2)
1312*4882a593Smuzhiyun #define SECTION_IS_EARLY	(1UL<<3)
1313*4882a593Smuzhiyun #define SECTION_MAP_LAST_BIT	(1UL<<4)
1314*4882a593Smuzhiyun #define SECTION_MAP_MASK	(~(SECTION_MAP_LAST_BIT-1))
1315*4882a593Smuzhiyun #define SECTION_NID_SHIFT	3
1316*4882a593Smuzhiyun 
__section_mem_map_addr(struct mem_section * section)1317*4882a593Smuzhiyun static inline struct page *__section_mem_map_addr(struct mem_section *section)
1318*4882a593Smuzhiyun {
1319*4882a593Smuzhiyun 	unsigned long map = section->section_mem_map;
1320*4882a593Smuzhiyun 	map &= SECTION_MAP_MASK;
1321*4882a593Smuzhiyun 	return (struct page *)map;
1322*4882a593Smuzhiyun }
1323*4882a593Smuzhiyun 
present_section(struct mem_section * section)1324*4882a593Smuzhiyun static inline int present_section(struct mem_section *section)
1325*4882a593Smuzhiyun {
1326*4882a593Smuzhiyun 	return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
1327*4882a593Smuzhiyun }
1328*4882a593Smuzhiyun 
present_section_nr(unsigned long nr)1329*4882a593Smuzhiyun static inline int present_section_nr(unsigned long nr)
1330*4882a593Smuzhiyun {
1331*4882a593Smuzhiyun 	return present_section(__nr_to_section(nr));
1332*4882a593Smuzhiyun }
1333*4882a593Smuzhiyun 
valid_section(struct mem_section * section)1334*4882a593Smuzhiyun static inline int valid_section(struct mem_section *section)
1335*4882a593Smuzhiyun {
1336*4882a593Smuzhiyun 	return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
1337*4882a593Smuzhiyun }
1338*4882a593Smuzhiyun 
early_section(struct mem_section * section)1339*4882a593Smuzhiyun static inline int early_section(struct mem_section *section)
1340*4882a593Smuzhiyun {
1341*4882a593Smuzhiyun 	return (section && (section->section_mem_map & SECTION_IS_EARLY));
1342*4882a593Smuzhiyun }
1343*4882a593Smuzhiyun 
valid_section_nr(unsigned long nr)1344*4882a593Smuzhiyun static inline int valid_section_nr(unsigned long nr)
1345*4882a593Smuzhiyun {
1346*4882a593Smuzhiyun 	return valid_section(__nr_to_section(nr));
1347*4882a593Smuzhiyun }
1348*4882a593Smuzhiyun 
online_section(struct mem_section * section)1349*4882a593Smuzhiyun static inline int online_section(struct mem_section *section)
1350*4882a593Smuzhiyun {
1351*4882a593Smuzhiyun 	return (section && (section->section_mem_map & SECTION_IS_ONLINE));
1352*4882a593Smuzhiyun }
1353*4882a593Smuzhiyun 
online_section_nr(unsigned long nr)1354*4882a593Smuzhiyun static inline int online_section_nr(unsigned long nr)
1355*4882a593Smuzhiyun {
1356*4882a593Smuzhiyun 	return online_section(__nr_to_section(nr));
1357*4882a593Smuzhiyun }
1358*4882a593Smuzhiyun 
1359*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
1360*4882a593Smuzhiyun void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
1361*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTREMOVE
1362*4882a593Smuzhiyun void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
1363*4882a593Smuzhiyun #endif
1364*4882a593Smuzhiyun #endif
1365*4882a593Smuzhiyun 
__pfn_to_section(unsigned long pfn)1366*4882a593Smuzhiyun static inline struct mem_section *__pfn_to_section(unsigned long pfn)
1367*4882a593Smuzhiyun {
1368*4882a593Smuzhiyun 	return __nr_to_section(pfn_to_section_nr(pfn));
1369*4882a593Smuzhiyun }
1370*4882a593Smuzhiyun 
1371*4882a593Smuzhiyun extern unsigned long __highest_present_section_nr;
1372*4882a593Smuzhiyun 
subsection_map_index(unsigned long pfn)1373*4882a593Smuzhiyun static inline int subsection_map_index(unsigned long pfn)
1374*4882a593Smuzhiyun {
1375*4882a593Smuzhiyun 	return (pfn & ~(PAGE_SECTION_MASK)) / PAGES_PER_SUBSECTION;
1376*4882a593Smuzhiyun }
1377*4882a593Smuzhiyun 
1378*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
pfn_section_valid(struct mem_section * ms,unsigned long pfn)1379*4882a593Smuzhiyun static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
1380*4882a593Smuzhiyun {
1381*4882a593Smuzhiyun 	int idx = subsection_map_index(pfn);
1382*4882a593Smuzhiyun 
1383*4882a593Smuzhiyun 	return test_bit(idx, ms->usage->subsection_map);
1384*4882a593Smuzhiyun }
1385*4882a593Smuzhiyun #else
pfn_section_valid(struct mem_section * ms,unsigned long pfn)1386*4882a593Smuzhiyun static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
1387*4882a593Smuzhiyun {
1388*4882a593Smuzhiyun 	return 1;
1389*4882a593Smuzhiyun }
1390*4882a593Smuzhiyun #endif
1391*4882a593Smuzhiyun 
1392*4882a593Smuzhiyun #ifndef CONFIG_HAVE_ARCH_PFN_VALID
pfn_valid(unsigned long pfn)1393*4882a593Smuzhiyun static inline int pfn_valid(unsigned long pfn)
1394*4882a593Smuzhiyun {
1395*4882a593Smuzhiyun 	struct mem_section *ms;
1396*4882a593Smuzhiyun 
1397*4882a593Smuzhiyun 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
1398*4882a593Smuzhiyun 		return 0;
1399*4882a593Smuzhiyun 	ms = __nr_to_section(pfn_to_section_nr(pfn));
1400*4882a593Smuzhiyun 	if (!valid_section(ms))
1401*4882a593Smuzhiyun 		return 0;
1402*4882a593Smuzhiyun 	/*
1403*4882a593Smuzhiyun 	 * Traditionally early sections always returned pfn_valid() for
1404*4882a593Smuzhiyun 	 * the entire section-sized span.
1405*4882a593Smuzhiyun 	 */
1406*4882a593Smuzhiyun 	return early_section(ms) || pfn_section_valid(ms, pfn);
1407*4882a593Smuzhiyun }
1408*4882a593Smuzhiyun #endif
1409*4882a593Smuzhiyun 
pfn_in_present_section(unsigned long pfn)1410*4882a593Smuzhiyun static inline int pfn_in_present_section(unsigned long pfn)
1411*4882a593Smuzhiyun {
1412*4882a593Smuzhiyun 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
1413*4882a593Smuzhiyun 		return 0;
1414*4882a593Smuzhiyun 	return present_section(__nr_to_section(pfn_to_section_nr(pfn)));
1415*4882a593Smuzhiyun }
1416*4882a593Smuzhiyun 
next_present_section_nr(unsigned long section_nr)1417*4882a593Smuzhiyun static inline unsigned long next_present_section_nr(unsigned long section_nr)
1418*4882a593Smuzhiyun {
1419*4882a593Smuzhiyun 	while (++section_nr <= __highest_present_section_nr) {
1420*4882a593Smuzhiyun 		if (present_section_nr(section_nr))
1421*4882a593Smuzhiyun 			return section_nr;
1422*4882a593Smuzhiyun 	}
1423*4882a593Smuzhiyun 
1424*4882a593Smuzhiyun 	return -1;
1425*4882a593Smuzhiyun }
1426*4882a593Smuzhiyun 
1427*4882a593Smuzhiyun /*
1428*4882a593Smuzhiyun  * These are _only_ used during initialisation, therefore they
1429*4882a593Smuzhiyun  * can use __initdata ...  They could have names to indicate
1430*4882a593Smuzhiyun  * this restriction.
1431*4882a593Smuzhiyun  */
1432*4882a593Smuzhiyun #ifdef CONFIG_NUMA
1433*4882a593Smuzhiyun #define pfn_to_nid(pfn)							\
1434*4882a593Smuzhiyun ({									\
1435*4882a593Smuzhiyun 	unsigned long __pfn_to_nid_pfn = (pfn);				\
1436*4882a593Smuzhiyun 	page_to_nid(pfn_to_page(__pfn_to_nid_pfn));			\
1437*4882a593Smuzhiyun })
1438*4882a593Smuzhiyun #else
1439*4882a593Smuzhiyun #define pfn_to_nid(pfn)		(0)
1440*4882a593Smuzhiyun #endif
1441*4882a593Smuzhiyun 
1442*4882a593Smuzhiyun void sparse_init(void);
1443*4882a593Smuzhiyun #else
1444*4882a593Smuzhiyun #define sparse_init()	do {} while (0)
1445*4882a593Smuzhiyun #define sparse_index_init(_sec, _nid)  do {} while (0)
1446*4882a593Smuzhiyun #define pfn_in_present_section pfn_valid
1447*4882a593Smuzhiyun #define subsection_map_init(_pfn, _nr_pages) do {} while (0)
1448*4882a593Smuzhiyun #endif /* CONFIG_SPARSEMEM */
1449*4882a593Smuzhiyun 
1450*4882a593Smuzhiyun /*
1451*4882a593Smuzhiyun  * During memory init memblocks map pfns to nids. The search is expensive and
1452*4882a593Smuzhiyun  * this caches recent lookups. The implementation of __early_pfn_to_nid
1453*4882a593Smuzhiyun  * may treat start/end as pfns or sections.
1454*4882a593Smuzhiyun  */
1455*4882a593Smuzhiyun struct mminit_pfnnid_cache {
1456*4882a593Smuzhiyun 	unsigned long last_start;
1457*4882a593Smuzhiyun 	unsigned long last_end;
1458*4882a593Smuzhiyun 	int last_nid;
1459*4882a593Smuzhiyun };
1460*4882a593Smuzhiyun 
1461*4882a593Smuzhiyun /*
1462*4882a593Smuzhiyun  * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
1463*4882a593Smuzhiyun  * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
1464*4882a593Smuzhiyun  * pfn_valid_within() should be used in this case; we optimise this away
1465*4882a593Smuzhiyun  * when we have no holes within a MAX_ORDER_NR_PAGES block.
1466*4882a593Smuzhiyun  */
1467*4882a593Smuzhiyun #ifdef CONFIG_HOLES_IN_ZONE
1468*4882a593Smuzhiyun #define pfn_valid_within(pfn) pfn_valid(pfn)
1469*4882a593Smuzhiyun #else
1470*4882a593Smuzhiyun #define pfn_valid_within(pfn) (1)
1471*4882a593Smuzhiyun #endif
1472*4882a593Smuzhiyun 
1473*4882a593Smuzhiyun #endif /* !__GENERATING_BOUNDS.H */
1474*4882a593Smuzhiyun #endif /* !__ASSEMBLY__ */
1475*4882a593Smuzhiyun #endif /* _LINUX_MMZONE_H */
1476