Lines Matching +full:fine +full:- +full:granular

1 // SPDX-License-Identifier: GPL-2.0-only
50 #include <linux/backing-dev.h>
51 #include <linux/fault-inject.h>
52 #include <linux/page-isolation.h>
84 /* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
93 * reporting it and marking it "reported" - it only skips notifying
102 * page shuffling (relevant code - e.g., memory onlining - is expected to
105 * Note: No code should rely on this flag for correctness - it's purely
113 * Don't poison memory with KASAN (only for the tag-based modes).
114 * During boot, all non-reserved memblock memory is exposed to page_alloc.
117 * This is only done for the tag-based KASAN modes, as those are able to
123 /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
145 /* work_structs for global per-cpu drains */
211 * other index - this ensures that it will be put on the correct CMA freelist.
215 return page->index; in get_pcppage_migratetype()
220 page->index = migratetype; in set_pcppage_migratetype()
270 * 1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
271 * 1G machine -> (16M dma, 784M normal, 224M high)
276 * TBD: should special case ZONE_DMA32 machines here - in those we normally
340 int user_min_free_kbytes = -1;
347 * many cases very high-order allocations like THP are likely to be
348 * unsupported and the premature reclaim offsets the advantage of long-term
392 * During boot we initialize deferred pages on-demand, as needed, but once
408 * on-demand allocation and then freed again before the deferred pages
424 if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn) in early_page_uninitialised()
448 /* Always populate low zones for address-constrained allocations */ in defer_init()
452 if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX) in defer_init()
460 (pfn & (PAGES_PER_SECTION - 1)) == 0) { in defer_init()
461 NODE_DATA(nid)->first_deferred_pfn = pfn; in defer_init()
492 return page_zone(page)->pageblock_flags; in get_pageblock_bitmap()
499 pfn &= (PAGES_PER_SECTION-1); in pfn_to_bitidx()
501 pfn = pfn - round_down(page_zone(page)->zone_start_pfn, pageblock_nr_pages); in pfn_to_bitidx()
507 …* get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block o…
526 bitidx &= (BITS_PER_LONG-1); in __get_pfnblock_flags_mask()
544 return -EINVAL; in isolate_anon_lru_page()
547 return -EINVAL; in isolate_anon_lru_page()
562 …* set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pag…
582 bitidx &= (BITS_PER_LONG-1); in set_pfnblock_flags_mask()
618 start_pfn = zone->zone_start_pfn; in page_outside_zone_boundaries()
619 sp = zone->spanned_pages; in page_outside_zone_boundaries()
625 pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n", in page_outside_zone_boundaries()
626 pfn, zone_to_nid(zone), zone->name, in page_outside_zone_boundaries()
687 current->comm, page_to_pfn(page)); in bad_page()
700 * Higher-order pages are called "compound pages". They are structured thusly:
705 * in bit 0 of page->compound_head. The rest of bits is pointer to head page.
707 * The first tail page's ->compound_dtor holds the offset in array of compound
710 * The first tail page's ->compound_order holds the order of allocation.
711 * This usage means that zero-order pages may not be compound.
729 p->mapping = TAIL_MAPPING; in prep_compound_page()
735 atomic_set(compound_mapcount_ptr(page), -1); in prep_compound_page()
781 INIT_LIST_HEAD(&page->lru); in set_page_guard()
784 __mod_zone_freepage_state(zone, -(1 << order), migratetype); in set_page_guard()
833 pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " in init_mem_debugging_and_hardening()
840 pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " in init_mem_debugging_and_hardening()
874 * Setting, clearing, and testing PageBuddy is serialized by zone->lock.
902 struct capture_control *capc = current->capture_control; in task_capc()
905 !(current->flags & PF_KTHREAD) && in task_capc()
906 !capc->page && in task_capc()
907 capc->cc->zone == zone ? capc : NULL; in task_capc()
914 if (!capc || order != capc->cc->order) in compaction_capture()
925 * and vice-versa but no more than normal fallback logic which can in compaction_capture()
926 * have trouble finding a high-order free page. in compaction_capture()
931 capc->page = page; in compaction_capture()
953 struct free_area *area = &zone->free_area[order]; in add_to_free_list()
955 list_add(&page->lru, &area->free_list[migratetype]); in add_to_free_list()
956 area->nr_free++; in add_to_free_list()
963 struct free_area *area = &zone->free_area[order]; in add_to_free_list_tail()
965 list_add_tail(&page->lru, &area->free_list[migratetype]); in add_to_free_list_tail()
966 area->nr_free++; in add_to_free_list_tail()
971 * of the list - so the moved pages won't immediately be considered for
977 struct free_area *area = &zone->free_area[order]; in move_to_free_list()
979 list_move_tail(&page->lru, &area->free_list[migratetype]); in move_to_free_list()
989 list_del(&page->lru); in del_page_from_free_list()
992 zone->free_area[order].nr_free--; in del_page_from_free_list()
997 * of the next-highest order is free. If it is, it's possible
1010 if (order >= MAX_ORDER - 2) in buddy_merge_likely()
1017 higher_page = page + (combined_pfn - pfn); in buddy_merge_likely()
1019 higher_buddy = higher_page + (buddy_pfn - combined_pfn); in buddy_merge_likely()
1028 * The concept of a buddy system is to maintain direct-mapped table
1046 * -- nyc
1061 max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order); in __free_one_page()
1064 VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); in __free_one_page()
1066 VM_BUG_ON(migratetype == -1); in __free_one_page()
1070 VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); in __free_one_page()
1076 __mod_zone_freepage_state(zone, -(1 << order), in __free_one_page()
1081 buddy = page + (buddy_pfn - pfn); in __free_one_page()
1096 page = page + (combined_pfn - pfn); in __free_one_page()
1100 if (order < MAX_ORDER - 1) { in __free_one_page()
1107 * low-order merging. in __free_one_page()
1113 buddy = page + (buddy_pfn - pfn); in __free_one_page()
1153 if (unlikely(atomic_read(&page->_mapcount) != -1)) in page_expected_state()
1156 if (unlikely((unsigned long)page->mapping | in page_expected_state()
1159 (unsigned long)page->mem_cgroup | in page_expected_state()
1161 (page->flags & check_flags))) in page_expected_state()
1171 if (unlikely(atomic_read(&page->_mapcount) != -1)) in page_bad_reason()
1173 if (unlikely(page->mapping != NULL)) in page_bad_reason()
1174 bad_reason = "non-NULL mapping"; in page_bad_reason()
1177 if (unlikely(page->flags & flags)) { in page_bad_reason()
1184 if (unlikely(page->mem_cgroup)) in page_bad_reason()
1211 * We rely page->lru.next never has bit 0 set, unless the page in free_tail_pages_check()
1212 * is PageTail(). Let's make sure that's true even for poisoned ->lru. in free_tail_pages_check()
1220 switch (page - head_page) { in free_tail_pages_check()
1222 /* the first tail page: ->mapping may be compound_mapcount() */ in free_tail_pages_check()
1230 * the second tail page: ->mapping is in free_tail_pages_check()
1231 * deferred_list.next -- ignore value. in free_tail_pages_check()
1235 if (page->mapping != TAIL_MAPPING) { in free_tail_pages_check()
1251 page->mapping = NULL; in free_tail_pages_check()
1301 * avoid checking PageCompound for order-0 pages. in free_pages_prepare()
1318 (page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; in free_pages_prepare()
1322 page->mapping = NULL; in free_pages_prepare()
1331 page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; in free_pages_prepare()
1349 * With hardware tag-based KASAN, memory tags must be set before the in free_pages_prepare()
1378 * With DEBUG_VM enabled, order-0 pages are checked immediately when being freed
1396 * With DEBUG_VM disabled, order-0 pages being freed are checked only when
1419 struct page *buddy = page + (buddy_pfn - pfn); in prefetch_buddy()
1449 count = min(pcp->count, count); in free_pcppages_bulk()
1454 * Remove pages from lists in a round-robin fashion. A in free_pcppages_bulk()
1464 list = &pcp->lists[migratetype]; in free_pcppages_bulk()
1467 /* This is the only non-empty list. Free them all. */ in free_pcppages_bulk()
1474 list_del(&page->lru); in free_pcppages_bulk()
1475 pcp->count--; in free_pcppages_bulk()
1480 list_add_tail(&page->lru, &head); in free_pcppages_bulk()
1485 * under zone->lock. It is believed the overhead of in free_pcppages_bulk()
1489 * prefetch buddy for the first pcp->batch nr of pages. in free_pcppages_bulk()
1491 if (prefetch_nr++ < pcp->batch) in free_pcppages_bulk()
1493 } while (--count && --batch_free && !list_empty(list)); in free_pcppages_bulk()
1496 spin_lock(&zone->lock); in free_pcppages_bulk()
1501 * page->lru.next will not point to original list. in free_pcppages_bulk()
1514 spin_unlock(&zone->lock); in free_pcppages_bulk()
1522 spin_lock(&zone->lock); in free_one_page()
1528 spin_unlock(&zone->lock); in free_one_page()
1547 INIT_LIST_HEAD(&page->lru); in __init_single_page()
1568 struct zone *zone = &pgdat->node_zones[zid]; in init_reserved_page()
1570 if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) in init_reserved_page()
1598 /* Avoid false-positive PageTail() */ in reserve_bootmem_region()
1599 INIT_LIST_HEAD(&page->lru); in reserve_bootmem_region()
1641 for (loop = 0; loop < (nr_pages - 1); loop++, p++) { in __free_pages_core()
1649 atomic_long_add(nr_pages, &page_zone(page)->managed_pages); in __free_pages_core()
1673 if (state->last_start <= pfn && pfn < state->last_end) in __early_pfn_to_nid()
1674 return state->last_nid; in __early_pfn_to_nid()
1678 state->last_start = start_pfn; in __early_pfn_to_nid()
1679 state->last_end = end_pfn; in __early_pfn_to_nid()
1680 state->last_nid = nid; in __early_pfn_to_nid()
1734 end_pfn--; in __pageblock_pfn_to_page()
1757 unsigned long block_start_pfn = zone->zone_start_pfn; in set_zone_contiguous()
1774 zone->contiguous = true; in set_zone_contiguous()
1779 zone->contiguous = false; in clear_zone_contiguous()
1794 /* Free a large naturally-aligned chunk if possible */ in deferred_free_range()
1796 (pfn & (pageblock_nr_pages - 1)) == 0) { in deferred_free_range()
1803 if ((pfn & (pageblock_nr_pages - 1)) == 0) in deferred_free_range()
1833 if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn)) in deferred_pfn_valid()
1845 unsigned long nr_pgmask = pageblock_nr_pages - 1; in deferred_free_pages()
1850 deferred_free_range(pfn - nr_free, nr_free); in deferred_free_pages()
1853 deferred_free_range(pfn - nr_free, nr_free); in deferred_free_pages()
1860 deferred_free_range(pfn - nr_free, nr_free); in deferred_free_pages()
1872 unsigned long nr_pgmask = pageblock_nr_pages - 1; in deferred_init_pages()
1894 * This function is meant to pre-load the iterator for the zone init.
2008 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); in deferred_init_memmap()
2021 first_init_pfn = pgdat->first_deferred_pfn; in deferred_init_memmap()
2029 BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn); in deferred_init_memmap()
2030 BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); in deferred_init_memmap()
2031 pgdat->first_deferred_pfn = ULONG_MAX; in deferred_init_memmap()
2036 * pre-grown prior to start of deferred page initialization. in deferred_init_memmap()
2042 zone = pgdat->node_zones + zid; in deferred_init_memmap()
2060 .size = epfn_align - spfn, in deferred_init_memmap()
2075 pgdat->node_id, jiffies_to_msecs(jiffies - start)); in deferred_init_memmap()
2100 pg_data_t *pgdat = zone->zone_pgdat; in deferred_grow_zone()
2101 unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; in deferred_grow_zone()
2116 if (first_deferred_pfn != pgdat->first_deferred_pfn) { in deferred_grow_zone()
2124 pgdat->first_deferred_pfn = ULONG_MAX; in deferred_grow_zone()
2151 pgdat->first_deferred_pfn = spfn; in deferred_grow_zone()
2197 * on-demand struct page initialization. in page_alloc_init_late()
2225 } while (++p, --i); in init_cma_reserved_pageblock()
2234 __free_pages(p, MAX_ORDER - 1); in init_cma_reserved_pageblock()
2236 } while (i -= MAX_ORDER_NR_PAGES); in init_cma_reserved_pageblock()
2243 page_zone(page)->cma_pages += pageblock_nr_pages; in init_cma_reserved_pageblock()
2259 * -- nyc
2267 high--; in expand()
2287 if (unlikely(page->flags & __PG_HWPOISON)) { in check_new_page_bad()
2312 * With DEBUG_VM enabled, order-0 pages are checked for expected state when
2330 * With DEBUG_VM disabled, free order-0 pages are checked for expected state
2430 area = &(zone->free_area[current_order]); in __rmqueue_smallest()
2527 start_pfn = start_pfn & ~(pageblock_nr_pages-1); in move_freepages_block()
2529 end_page = start_page + pageblock_nr_pages - 1; in move_freepages_block()
2530 end_pfn = start_pfn + pageblock_nr_pages - 1; in move_freepages_block()
2545 int nr_pageblocks = 1 << (start_order - pageblock_order); in change_pageblock_range()
2547 while (nr_pageblocks--) { in change_pageblock_range()
2601 max_boost = mult_frac(zone->_watermark[WMARK_HIGH], in boost_watermark()
2617 zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, in boost_watermark()
2626 * pageblock to our migratetype and determine how many already-allocated pages
2659 set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); in steal_suitable_fallback()
2677 * to MOVABLE pageblock, consider all non-movable pages as in steal_suitable_fallback()
2680 * exact migratetype of non-movable pages. in steal_suitable_fallback()
2684 - (free_pages + movable_pages); in steal_suitable_fallback()
2697 if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || in steal_suitable_fallback()
2719 if (area->nr_free == 0) in find_suitable_fallback()
2720 return -1; in find_suitable_fallback()
2741 return -1; in find_suitable_fallback()
2745 * Reserve a pageblock for exclusive use of high-order atomic allocations if
2756 * Check is race-prone but harmless. in reserve_highatomic_pageblock()
2759 if (zone->nr_reserved_highatomic >= max_managed) in reserve_highatomic_pageblock()
2762 spin_lock_irqsave(&zone->lock, flags); in reserve_highatomic_pageblock()
2765 if (zone->nr_reserved_highatomic >= max_managed) in reserve_highatomic_pageblock()
2772 zone->nr_reserved_highatomic += pageblock_nr_pages; in reserve_highatomic_pageblock()
2778 spin_unlock_irqrestore(&zone->lock, flags); in reserve_highatomic_pageblock()
2783 * potentially hurts the reliability of high-order allocations when under
2793 struct zonelist *zonelist = ac->zonelist; in unreserve_highatomic_pageblock()
2801 for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, in unreserve_highatomic_pageblock()
2802 ac->nodemask) { in unreserve_highatomic_pageblock()
2807 if (!force && zone->nr_reserved_highatomic <= in unreserve_highatomic_pageblock()
2811 spin_lock_irqsave(&zone->lock, flags); in unreserve_highatomic_pageblock()
2813 struct free_area *area = &(zone->free_area[order]); in unreserve_highatomic_pageblock()
2823 * from highatomic to ac->migratetype. So we should in unreserve_highatomic_pageblock()
2829 * locking could inadvertently allow a per-cpu in unreserve_highatomic_pageblock()
2834 zone->nr_reserved_highatomic -= min( in unreserve_highatomic_pageblock()
2836 zone->nr_reserved_highatomic); in unreserve_highatomic_pageblock()
2840 * Convert to ac->migratetype and avoid the normal in unreserve_highatomic_pageblock()
2848 set_pageblock_migratetype(page, ac->migratetype); in unreserve_highatomic_pageblock()
2849 ret = move_freepages_block(zone, page, ac->migratetype, in unreserve_highatomic_pageblock()
2852 spin_unlock_irqrestore(&zone->lock, flags); in unreserve_highatomic_pageblock()
2856 spin_unlock_irqrestore(&zone->lock, flags); in unreserve_highatomic_pageblock()
2896 for (current_order = MAX_ORDER - 1; current_order >= min_order; in __rmqueue_fallback()
2897 --current_order) { in __rmqueue_fallback()
2898 area = &(zone->free_area[current_order]); in __rmqueue_fallback()
2901 if (fallback_mt == -1) in __rmqueue_fallback()
2924 area = &(zone->free_area[current_order]); in __rmqueue_fallback()
2927 if (fallback_mt != -1) in __rmqueue_fallback()
2932 * This should not happen - we already found a suitable fallback in __rmqueue_fallback()
2952 * Call me with the zone->lock already held.
3000 spin_lock(&zone->lock); in rmqueue_bulk()
3026 list_add_tail(&page->lru, list); in rmqueue_bulk()
3030 -(1 << order)); in rmqueue_bulk()
3039 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); in rmqueue_bulk()
3040 spin_unlock(&zone->lock); in rmqueue_bulk()
3053 struct list_head *list = &pcp->lists[migratetype]; in get_populated_pcp_list()
3056 pcp->count += rmqueue_bulk(zone, order, in get_populated_pcp_list()
3057 pcp->batch, list, in get_populated_pcp_list()
3081 batch = READ_ONCE(pcp->batch); in drain_zone_pages()
3082 to_drain = min(pcp->count, batch); in drain_zone_pages()
3103 pset = per_cpu_ptr(zone->pageset, cpu); in drain_pages_zone()
3105 pcp = &pset->pcp; in drain_pages_zone()
3106 if (pcp->count) in drain_pages_zone()
3107 free_pcppages_bulk(zone, pcp->count, pcp); in drain_pages_zone()
3128 * Spill all of this CPU's per-cpu pages back into the buddy allocator.
3130 * The CPU has to be pinned. When zone parameter is non-NULL, spill just
3157 drain_local_pages(drain->zone); in drain_local_pages_wq()
3162 * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
3164 * When zone parameter is non-NULL, spill just the single zone's pages.
3208 pcp = per_cpu_ptr(zone->pageset, cpu); in drain_all_pages()
3209 if (pcp->pcp.count) in drain_all_pages()
3213 pcp = per_cpu_ptr(z->pageset, cpu); in drain_all_pages()
3214 if (pcp->pcp.count) { in drain_all_pages()
3230 drain->zone = zone; in drain_all_pages()
3231 INIT_WORK(&drain->work, drain_local_pages_wq); in drain_all_pages()
3232 queue_work_on(cpu, mm_percpu_wq, &drain->work); in drain_all_pages()
3235 flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work); in drain_all_pages()
3257 spin_lock_irqsave(&zone->lock, flags); in mark_free_pages()
3260 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) in mark_free_pages()
3264 if (!--page_count) { in mark_free_pages()
3278 &zone->free_area[order].free_list[t], lru) { in mark_free_pages()
3283 if (!--page_count) { in mark_free_pages()
3291 spin_unlock_irqrestore(&zone->lock, flags); in mark_free_pages()
3336 pcp = &this_cpu_ptr(zone->pageset)->pcp; in free_unref_page_commit()
3337 list_add(&page->lru, &pcp->lists[migratetype]); in free_unref_page_commit()
3338 pcp->count++; in free_unref_page_commit()
3339 if (pcp->count >= pcp->high) { in free_unref_page_commit()
3340 unsigned long batch = READ_ONCE(pcp->batch); in free_unref_page_commit()
3346 * Free a 0-order page
3362 * Free a list of 0-order pages
3374 list_del(&page->lru); in free_unref_page_list()
3400 * split_page takes a non-compound higher-order page, and splits it into
3401 * n (1<<order) sub-pages: page[0..n]
3402 * Each sub-page must be freed individually.
3435 * emulate a high-order watermark check with a raised order-0 in __isolate_free_page()
3436 * watermark, because we already know our high-order page in __isolate_free_page()
3439 watermark = zone->_watermark[WMARK_MIN] + (1UL << order); in __isolate_free_page()
3443 __mod_zone_freepage_state(zone, -(1UL << order), mt); in __isolate_free_page()
3454 if (order >= pageblock_order - 1) { in __isolate_free_page()
3455 struct page *endpage = page + (1 << order) - 1; in __isolate_free_page()
3470 * __putback_isolated_page - Return a now-isolated page back where we got it
3483 lockdep_assert_held(&zone->lock); in __putback_isolated_page()
3517 /* Remove page from the per-cpu list, caller must protect the list */
3547 list_del(&page->lru); in __rmqueue_pcplist()
3548 pcp->count--; in __rmqueue_pcplist()
3554 /* Lock and remove page from the per-cpu list */
3564 pcp = &this_cpu_ptr(zone->pageset)->pcp; in rmqueue_pcplist()
3576 * Allocate a page from the given zone. Use pcplists for order-0 allocations.
3595 * allocate greater than order-1 page units with __GFP_NOFAIL. in rmqueue()
3598 spin_lock_irqsave(&zone->lock, flags); in rmqueue()
3603 * order-0 request can reach here when the pcplist is skipped in rmqueue()
3604 * due to non-CMA allocation context. HIGHATOMIC area is in rmqueue()
3605 * reserved for high-order atomic allocation, so order-0 in rmqueue()
3623 spin_unlock(&zone->lock); in rmqueue()
3626 __mod_zone_freepage_state(zone, -(1 << order), in rmqueue()
3637 if (test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags)) { in rmqueue()
3638 clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); in rmqueue()
3696 debugfs_create_bool("ignore-gfp-wait", mode, dir, in fail_page_alloc_debugfs()
3698 debugfs_create_bool("ignore-gfp-highmem", mode, dir, in fail_page_alloc_debugfs()
3700 debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order); in fail_page_alloc_debugfs()
3728 long unusable_free = (1 << order) - 1; in __zone_watermark_unusable_free()
3732 * the high-atomic reserves. This will over-estimate the size of the in __zone_watermark_unusable_free()
3736 unusable_free += z->nr_reserved_highatomic; in __zone_watermark_unusable_free()
3748 * Return true if free base pages are above 'mark'. For high-order checks it
3749 * will return true of the order-0 watermark is reached and there is at least
3761 /* free_pages may go negative - that's OK */ in __zone_watermark_ok()
3762 free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); in __zone_watermark_ok()
3765 min -= min / 2; in __zone_watermark_ok()
3772 * makes during the free path will be small and short-lived. in __zone_watermark_ok()
3775 min -= min / 2; in __zone_watermark_ok()
3777 min -= min / 4; in __zone_watermark_ok()
3781 * Check watermarks for an order-0 allocation request. If these in __zone_watermark_ok()
3782 * are not met, then a high-order request also cannot go ahead in __zone_watermark_ok()
3785 if (free_pages <= min + z->lowmem_reserve[highest_zoneidx]) in __zone_watermark_ok()
3788 /* If this is an order-0 request then the watermark is fine */ in __zone_watermark_ok()
3792 /* For a high-order request, check at least one suitable page is free */ in __zone_watermark_ok()
3794 struct free_area *area = &z->free_area[o]; in __zone_watermark_ok()
3797 if (!area->nr_free) in __zone_watermark_ok()
3842 * Fast check for order-0 only. If this fails then the reserves in zone_watermark_fast()
3852 /* reserved may over estimate high-atomic reserves. */ in zone_watermark_fast()
3853 usable_free -= min(usable_free, reserved); in zone_watermark_fast()
3854 if (usable_free > mark + z->lowmem_reserve[highest_zoneidx]) in zone_watermark_fast()
3862 * Ignore watermark boosting for GFP_ATOMIC order-0 allocations in zone_watermark_fast()
3867 if (unlikely(!order && (gfp_mask & __GFP_ATOMIC) && z->watermark_boost in zone_watermark_fast()
3869 mark = z->_watermark[WMARK_MIN]; in zone_watermark_fast()
3882 if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) in zone_watermark_ok_safe()
3931 * the pointer is within zone->zone_pgdat->node_zones[]. Also assume in alloc_flags_nofragment()
3934 BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1); in alloc_flags_nofragment()
3935 if (nr_online_nodes > 1 && !populated_zone(--zone)) in alloc_flags_nofragment()
3947 unsigned int pflags = current->flags; in current_alloc_flags()
3977 z = ac->preferred_zoneref; in get_page_from_freelist()
3978 for_next_zone_zonelist_nodemask(zone, z, ac->highest_zoneidx, in get_page_from_freelist()
3979 ac->nodemask) { in get_page_from_freelist()
3998 * exceed the per-node dirty limit in the slowpath in get_page_from_freelist()
4004 * dirty-throttling and the flusher threads. in get_page_from_freelist()
4006 if (ac->spread_dirty_pages) { in get_page_from_freelist()
4007 if (last_pgdat_dirty_limit == zone->zone_pgdat) in get_page_from_freelist()
4010 if (!node_dirty_ok(zone->zone_pgdat)) { in get_page_from_freelist()
4011 last_pgdat_dirty_limit = zone->zone_pgdat; in get_page_from_freelist()
4017 zone != ac->preferred_zoneref->zone) { in get_page_from_freelist()
4025 local_nid = zone_to_nid(ac->preferred_zoneref->zone); in get_page_from_freelist()
4034 ac->highest_zoneidx, alloc_flags, in get_page_from_freelist()
4054 !zone_allows_reclaim(ac->preferred_zoneref->zone, zone)) in get_page_from_freelist()
4057 ret = node_reclaim(zone->zone_pgdat, gfp_mask, order); in get_page_from_freelist()
4068 ac->highest_zoneidx, alloc_flags)) in get_page_from_freelist()
4076 page = rmqueue(ac->preferred_zoneref->zone, zone, order, in get_page_from_freelist()
4077 gfp_mask, alloc_flags, ac->migratetype); in get_page_from_freelist()
4082 * If this is a high-order atomic allocation then check in get_page_from_freelist()
4123 (current->flags & (PF_MEMALLOC | PF_EXITING))) in warn_alloc_show_mem()
4146 current->comm, &vaf, gfp_mask, &gfp_mask, in warn_alloc()
4181 .zonelist = ac->zonelist, in __alloc_pages_may_oom()
4182 .nodemask = ac->nodemask, in __alloc_pages_may_oom()
4215 if (current->flags & PF_DUMPCORE) in __alloc_pages_may_oom()
4231 if (ac->highest_zoneidx < ZONE_NORMAL) in __alloc_pages_may_oom()
4250 * Help non-failing allocations by giving them access to memory in __alloc_pages_may_oom()
4269 /* Try memory compaction for high-order allocations before reclaim */
4308 zone->compact_blockskip_flush = false; in __alloc_pages_direct_compact()
4352 * compaction was skipped because there are not enough order-0 pages in should_compact_retry()
4394 (*compact_priority)--; in should_compact_retry()
4427 * Let's give them a good hope and keep retrying while the order-0 in should_compact_retry()
4430 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, in should_compact_retry()
4431 ac->highest_zoneidx, ac->nodemask) { in should_compact_retry()
4433 ac->highest_zoneidx, alloc_flags)) in should_compact_retry()
4453 if (current->flags & PF_MEMALLOC) in __need_fs_reclaim()
4530 progress = try_to_free_pages(ac->zonelist, order, gfp_mask, in __perform_reclaim()
4531 ac->nodemask); in __perform_reclaim()
4562 * pages are pinned on the per-cpu lists or in high alloc reserves. in __alloc_pages_direct_reclaim()
4568 alloc_flags, ac->migratetype, *did_some_progress, &skip_pcp_drain); in __alloc_pages_direct_reclaim()
4586 enum zone_type highest_zoneidx = ac->highest_zoneidx; in wake_all_kswapds()
4588 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx, in wake_all_kswapds()
4589 ac->nodemask) { in wake_all_kswapds()
4590 if (last_pgdat != zone->zone_pgdat) in wake_all_kswapds()
4592 last_pgdat = zone->zone_pgdat; in wake_all_kswapds()
4663 if (in_serving_softirq() && (current->flags & PF_MEMALLOC)) in __gfp_pfmemalloc_flags()
4666 if (current->flags & PF_MEMALLOC) in __gfp_pfmemalloc_flags()
4724 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, in should_reclaim_retry()
4725 ac->highest_zoneidx, ac->nodemask) { in should_reclaim_retry()
4739 ac->highest_zoneidx, alloc_flags, available); in should_reclaim_retry()
4774 if (current->flags & PF_WQ_WORKER) in should_reclaim_retry()
4790 * This assumes that for all allocations, ac->nodemask can come only in check_retry_cpuset()
4795 if (cpusets_enabled() && ac->nodemask && in check_retry_cpuset()
4796 !cpuset_nodemask_valid_mems_allowed(ac->nodemask)) { in check_retry_cpuset()
4797 ac->nodemask = NULL; in check_retry_cpuset()
4858 * there was a cpuset modification and we are retrying - otherwise we in __alloc_pages_slowpath()
4859 * could end up iterating over non-eligible zones endlessly. in __alloc_pages_slowpath()
4861 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, in __alloc_pages_slowpath()
4862 ac->highest_zoneidx, ac->nodemask); in __alloc_pages_slowpath()
4863 if (!ac->preferred_zoneref->zone) in __alloc_pages_slowpath()
4879 * that we have enough base pages and don't need to reclaim. For non- in __alloc_pages_slowpath()
4880 * movable high-order allocations, do that as well, as compaction will in __alloc_pages_slowpath()
4888 (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) in __alloc_pages_slowpath()
4910 * - potentially very expensive because zones are far in __alloc_pages_slowpath()
4913 * - not guaranteed to help because isolate_freepages() in __alloc_pages_slowpath()
4916 * - unlikely to make entire pageblocks free on its in __alloc_pages_slowpath()
4947 ac->nodemask = NULL; in __alloc_pages_slowpath()
4948 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, in __alloc_pages_slowpath()
4949 ac->highest_zoneidx, ac->nodemask); in __alloc_pages_slowpath()
4962 if (current->flags & PF_MEMALLOC) in __alloc_pages_slowpath()
4966 alloc_flags, ac->migratetype, &page); in __alloc_pages_slowpath()
4999 * It doesn't make any sense to retry for the compaction if the order-0 in __alloc_pages_slowpath()
5062 WARN_ON_ONCE(current->flags & PF_MEMALLOC); in __alloc_pages_slowpath()
5073 * Help non-failing allocations by giving them access to memory in __alloc_pages_slowpath()
5087 alloc_flags, ac->migratetype, &page); in __alloc_pages_slowpath()
5091 warn_alloc(gfp_mask, ac->nodemask, in __alloc_pages_slowpath()
5103 ac->highest_zoneidx = gfp_zone(gfp_mask); in prepare_alloc_pages()
5104 ac->zonelist = node_zonelist(preferred_nid, gfp_mask); in prepare_alloc_pages()
5105 ac->nodemask = nodemask; in prepare_alloc_pages()
5106 ac->migratetype = gfp_migratetype(gfp_mask); in prepare_alloc_pages()
5114 if (!in_interrupt() && !ac->nodemask) in prepare_alloc_pages()
5115 ac->nodemask = &cpuset_current_mems_allowed; in prepare_alloc_pages()
5131 ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); in prepare_alloc_pages()
5138 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, in prepare_alloc_pages()
5139 ac->highest_zoneidx, ac->nodemask); in prepare_alloc_pages()
5174 alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask); in __alloc_pages_nodemask()
5192 * &cpuset_current_mems_allowed to optimize the fast-path attempt. in __alloc_pages_nodemask()
5247 while (order-- > 0) in __free_pages()
5264 * An arbitrary-length arbitrary-offset area of memory which resides
5271 * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
5284 nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE; in __page_frag_cache_refill()
5289 nc->va = page ? page_address(page) : NULL; in __page_frag_cache_refill()
5310 if (unlikely(!nc->va)) { in page_frag_alloc()
5318 size = nc->size; in page_frag_alloc()
5326 nc->pfmemalloc = page_is_pfmemalloc(page); in page_frag_alloc()
5327 nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; in page_frag_alloc()
5328 nc->offset = size; in page_frag_alloc()
5331 offset = nc->offset - fragsz; in page_frag_alloc()
5333 page = virt_to_page(nc->va); in page_frag_alloc()
5335 if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) in page_frag_alloc()
5338 if (unlikely(nc->pfmemalloc)) { in page_frag_alloc()
5345 size = nc->size; in page_frag_alloc()
5351 nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; in page_frag_alloc()
5352 offset = size - fragsz; in page_frag_alloc()
5367 nc->pagecnt_bias--; in page_frag_alloc()
5368 nc->offset = offset; in page_frag_alloc()
5370 return nc->va + offset; in page_frag_alloc()
5403 * alloc_pages_exact - allocate an exact number physically-contiguous pages.
5409 * allocate memory in power-of-two pages.
5431 * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
5457 * free_pages_exact - release memory allocated via alloc_pages_exact()
5476 * nr_free_zone_pages - count number of pages beyond high watermark
5483 * nr_free_zone_pages = managed_pages - high_pages
5501 sum += size - high; in nr_free_zone_pages()
5508 * nr_free_buffer_pages - count number of pages beyond high watermark
5548 available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages; in si_mem_available()
5556 pagecache -= min(pagecache / 2, wmark_low); in si_mem_available()
5566 available += reclaimable - min(reclaimable / 2, wmark_low); in si_mem_available()
5576 val->totalram = totalram_pages(); in si_meminfo()
5577 val->sharedram = global_node_page_state(NR_SHMEM); in si_meminfo()
5578 val->freeram = global_zone_page_state(NR_FREE_PAGES); in si_meminfo()
5579 val->bufferram = nr_blockdev_pages(); in si_meminfo()
5580 val->totalhigh = totalhigh_pages(); in si_meminfo()
5581 val->freehigh = nr_free_highpages(); in si_meminfo()
5582 val->mem_unit = PAGE_SIZE; in si_meminfo()
5597 managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]); in si_meminfo_node()
5598 val->totalram = managed_pages; in si_meminfo_node()
5599 val->sharedram = node_page_state(pgdat, NR_SHMEM); in si_meminfo_node()
5600 val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); in si_meminfo_node()
5603 struct zone *zone = &pgdat->node_zones[zone_type]; in si_meminfo_node()
5610 val->totalhigh = managed_highpages; in si_meminfo_node()
5611 val->freehigh = free_highpages; in si_meminfo_node()
5613 val->totalhigh = managed_highpages; in si_meminfo_node()
5614 val->freehigh = free_highpages; in si_meminfo_node()
5616 val->mem_unit = PAGE_SIZE; in si_meminfo_node()
5630 * no node mask - aka implicit memory numa policy. Do not bother with in show_mem_node_skip()
5631 * the synchronization - read_mems_allowed_begin - because we do not in show_mem_node_skip()
5640 #define K(x) ((x) << (PAGE_SHIFT-10))
5670 * Show free area list (used inside shift_scroll-lock stuff)
5690 free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; in show_free_areas()
5720 if (show_mem_node_skip(filter, pgdat->node_id, nodemask)) in show_free_areas()
5747 pgdat->node_id, in show_free_areas()
5770 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? in show_free_areas()
5782 free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; in show_free_areas()
5807 zone->name, in show_free_areas()
5812 K(zone->nr_reserved_highatomic), in show_free_areas()
5819 K(zone->present_pages), in show_free_areas()
5825 K(this_cpu_read(zone->pageset->pcp.count)), in show_free_areas()
5829 printk(KERN_CONT " %ld", zone->lowmem_reserve[i]); in show_free_areas()
5841 printk(KERN_CONT "%s: ", zone->name); in show_free_areas()
5843 spin_lock_irqsave(&zone->lock, flags); in show_free_areas()
5845 struct free_area *area = &zone->free_area[order]; in show_free_areas()
5848 nr[order] = area->nr_free; in show_free_areas()
5857 spin_unlock_irqrestore(&zone->lock, flags); in show_free_areas()
5876 zoneref->zone = zone; in zoneref_set_zone()
5877 zoneref->zone_idx = zone_idx(zone); in zoneref_set_zone()
5892 zone_type--; in build_zonerefs_node()
5893 zone = pgdat->node_zones + zone_type; in build_zonerefs_node()
5915 return -EINVAL; in __parse_numa_zonelist_order()
5938 * find_next_best_node - find the next node that should appear in a given node's fallback list
5999 * This results in maximum locality--normal zone overflows into local
6000 * DMA zone, if any--but risks exhausting DMA zone.
6008 zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs; in build_zonelists_in_node_order()
6018 zonerefs->zone = NULL; in build_zonelists_in_node_order()
6019 zonerefs->zone_idx = 0; in build_zonelists_in_node_order()
6030 zonerefs = pgdat->node_zonelists[ZONELIST_NOFALLBACK]._zonerefs; in build_thisnode_zonelists()
6033 zonerefs->zone = NULL; in build_thisnode_zonelists()
6034 zonerefs->zone_idx = 0; in build_thisnode_zonelists()
6051 /* NUMA-aware ordering of nodes */ in build_zonelists()
6052 local_node = pgdat->node_id; in build_zonelists()
6061 * distance group to make it round-robin. in build_zonelists()
6069 load--; in build_zonelists()
6090 return zone_to_nid(z->zone); in local_memory_node()
6104 local_node = pgdat->node_id; in build_zonelists()
6106 zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs; in build_zonelists()
6131 zonerefs->zone = NULL; in build_zonelists()
6132 zonerefs->zone_idx = 0; in build_zonelists()
6170 * building zonelists is fine - no need to touch other nodes. in __build_all_zonelists()
6172 if (self && !node_online(self->node_id)) { in __build_all_zonelists()
6183 * We now know the "local memory node" for each node-- in __build_all_zonelists()
6185 * Set up numa_mem percpu variable for on-line cpus. During in __build_all_zonelists()
6186 * boot, only the boot cpu should be on-line; we'll init the in __build_all_zonelists()
6187 * secondary cpus' numa_mem as they come on-line. During in __build_all_zonelists()
6188 * node/memory hotplug, we'll fixup all on-line cpus. in __build_all_zonelists()
6216 * (a chicken-egg dilemma). in build_all_zonelists_init()
6246 * more accurate, but expensive to check per-zone. This check is in build_all_zonelists()
6247 * made on memory-hotadd so a system can start with mobility in build_all_zonelists()
6287 * Initially all pages are reserved - free ones are freed
6289 * done. Non-atomic initialization, single-pass.
6303 if (highest_memmap_pfn < end_pfn - 1) in memmap_init_zone()
6304 highest_memmap_pfn = end_pfn - 1; in memmap_init_zone()
6318 if (start_pfn == altmap->base_pfn) in memmap_init_zone()
6319 start_pfn += altmap->reserve; in memmap_init_zone()
6320 end_pfn = altmap->base_pfn + vmem_altmap_offset(altmap); in memmap_init_zone()
6331 * There can be holes in boot-time mem_map[]s handed to this in memmap_init_zone()
6366 struct pglist_data *pgdat = zone->zone_pgdat; in memmap_init_zone_device()
6370 int nid = pgdat->node_id; in memmap_init_zone_device()
6381 start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap); in memmap_init_zone_device()
6382 nr_pages = end_pfn - start_pfn; in memmap_init_zone_device()
6394 * We can use the non-atomic __set_bit operation for setting in memmap_init_zone_device()
6400 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer in memmap_init_zone_device()
6402 * ever freed or placed on a driver-private list. in memmap_init_zone_device()
6404 page->pgmap = pgmap; in memmap_init_zone_device()
6405 page->zone_device_data = NULL; in memmap_init_zone_device()
6411 * the address space during boot when many long-lived in memmap_init_zone_device()
6424 nr_pages, jiffies_to_msecs(jiffies - start)); in memmap_init_zone_device()
6432 INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); in zone_init_free_lists()
6433 zone->free_area[order].nr_free = 0; in zone_init_free_lists()
6444 * - physical memory bank size is not necessarily the exact multiple of the
6446 * - early reserved memory may not be listed in memblock.memory
6447 * - memory layouts defined with memmap= kernel parameter may not align
6451 * - PG_Reserved is set
6452 * - zone and node links point to zone and node that span the page if the
6454 * - zone and node links point to adjacent zone/node if the hole falls on
6469 + pageblock_nr_pages - 1; in init_unavailable_range()
6487 unsigned long zone_start_pfn = zone->zone_start_pfn; in memmap_init_zone_range()
6488 unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages; in memmap_init_zone_range()
6497 memmap_init_zone(end_pfn - start_pfn, nid, zone_id, start_pfn, in memmap_init_zone_range()
6516 struct zone *zone = node->node_zones + j; in memmap_init()
6543 /* A stub for backwards compatibility with custom implementatin on IA-64 */
6556 * The per-cpu-pages pools are set to around 1000th of the in zone_batchsize()
6568 * Clamp the batch to a 2^n - 1 value. Having a power in zone_batchsize()
6577 batch = rounddown_pow_of_two(batch + batch/2) - 1; in zone_batchsize()
6593 * fragmented and becoming unavailable for high-order allocations. in zone_batchsize()
6600 * pcp->high and pcp->batch values are related and dependent on one another:
6601 * ->batch must never be higher then ->high.
6605 * Any new users of pcp->batch and pcp->high should ensure they can cope with
6616 pcp->batch = 1; in pageset_update()
6620 pcp->high = high; in pageset_update()
6623 pcp->batch = batch; in pageset_update()
6629 pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch)); in pageset_set_batch()
6639 pcp = &p->pcp; in pageset_init()
6641 INIT_LIST_HEAD(&pcp->lists[migratetype]); in pageset_init()
6661 pageset_update(&p->pcp, high, batch); in pageset_set_high()
6677 struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); in zone_pageset_init()
6686 zone->pageset = alloc_percpu(struct per_cpu_pageset); in setup_zone_pageset()
6713 memset(pcp->vm_numa_stat_diff, 0, in setup_per_cpu_pageset()
6714 sizeof(pcp->vm_numa_stat_diff)); in setup_per_cpu_pageset()
6719 pgdat->per_cpu_nodestats = in setup_per_cpu_pageset()
6730 zone->pageset = &boot_pageset; in zone_pcp_init()
6734 zone->name, zone->present_pages, in zone_pcp_init()
6742 struct pglist_data *pgdat = zone->zone_pgdat; in init_currently_empty_zone()
6745 if (zone_idx > pgdat->nr_zones) in init_currently_empty_zone()
6746 pgdat->nr_zones = zone_idx; in init_currently_empty_zone()
6748 zone->zone_start_pfn = zone_start_pfn; in init_currently_empty_zone()
6751 "Initialising map node %d zone %lu pfns %lu -> %lu\n", in init_currently_empty_zone()
6752 pgdat->node_id, in init_currently_empty_zone()
6757 zone->initialized = 1; in init_currently_empty_zone()
6761 * get_pfn_range_for_nid - Return the start and end page frames for a node
6777 *start_pfn = -1UL; in get_pfn_range_for_nid()
6785 if (*start_pfn == -1UL) in get_pfn_range_for_nid()
6797 for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) { in find_usable_zone_for_movable()
6806 VM_BUG_ON(zone_index == -1); in find_usable_zone_for_movable()
6849 * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node()
6880 return *zone_end_pfn - *zone_start_pfn; in zone_spanned_pages_in_node()
6891 unsigned long nr_absent = range_end_pfn - range_start_pfn; in __absent_pages_in_range()
6898 nr_absent -= end_pfn - start_pfn; in __absent_pages_in_range()
6904 * absent_pages_in_range - Return number of page frames in holes within a range
6956 nr_absent += end_pfn - start_pfn; in zone_absent_pages_in_node()
6960 nr_absent += end_pfn - start_pfn; in zone_absent_pages_in_node()
6975 struct zone *zone = pgdat->node_zones + i; in calculate_node_totalpages()
6980 spanned = zone_spanned_pages_in_node(pgdat->node_id, i, in calculate_node_totalpages()
6985 absent = zone_absent_pages_in_node(pgdat->node_id, i, in calculate_node_totalpages()
6990 real_size = size - absent; in calculate_node_totalpages()
6993 zone->zone_start_pfn = zone_start_pfn; in calculate_node_totalpages()
6995 zone->zone_start_pfn = 0; in calculate_node_totalpages()
6996 zone->spanned_pages = size; in calculate_node_totalpages()
6997 zone->present_pages = real_size; in calculate_node_totalpages()
7003 pgdat->node_spanned_pages = totalpages; in calculate_node_totalpages()
7004 pgdat->node_present_pages = realtotalpages; in calculate_node_totalpages()
7005 printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, in calculate_node_totalpages()
7011 * Calculate the size of the zone->blockflags rounded to an unsigned long
7021 zonesize += zone_start_pfn & (pageblock_nr_pages-1); in usemap_size()
7036 zone->pageblock_flags = NULL; in setup_usemap()
7038 zone->pageblock_flags = in setup_usemap()
7040 pgdat->node_id); in setup_usemap()
7041 if (!zone->pageblock_flags) in setup_usemap()
7043 usemapsize, zone->name, pgdat->node_id); in setup_usemap()
7065 order = MAX_ORDER - 1; in set_pageblock_order()
7078 * is unused as pageblock_order is set at compile-time. See
7079 * include/linux/pageblock-flags.h for the values of pageblock_order based on
7111 struct deferred_split *ds_queue = &pgdat->deferred_split_queue; in pgdat_init_split_queue()
7113 spin_lock_init(&ds_queue->split_queue_lock); in pgdat_init_split_queue()
7114 INIT_LIST_HEAD(&ds_queue->split_queue); in pgdat_init_split_queue()
7115 ds_queue->split_queue_len = 0; in pgdat_init_split_queue()
7124 init_waitqueue_head(&pgdat->kcompactd_wait); in pgdat_init_kcompactd()
7137 init_waitqueue_head(&pgdat->kswapd_wait); in pgdat_init_internals()
7138 init_waitqueue_head(&pgdat->pfmemalloc_wait); in pgdat_init_internals()
7141 spin_lock_init(&pgdat->lru_lock); in pgdat_init_internals()
7142 lruvec_init(&pgdat->__lruvec); in pgdat_init_internals()
7148 atomic_long_set(&zone->managed_pages, remaining_pages); in zone_init_internals()
7150 zone->name = zone_names[idx]; in zone_init_internals()
7151 zone->zone_pgdat = NODE_DATA(nid); in zone_init_internals()
7152 spin_lock_init(&zone->lock); in zone_init_internals()
7159 * - init pgdat internals
7160 * - init all zones belonging to this node
7172 zone_init_internals(&pgdat->node_zones[z], z, nid, 0); in free_area_init_core_hotplug()
7178 * - mark all pages reserved
7179 * - mark all memory queues empty
7180 * - clear the memory bitmaps
7188 int nid = pgdat->node_id; in free_area_init_core()
7191 pgdat->per_cpu_nodestats = &boot_nodestats; in free_area_init_core()
7194 struct zone *zone = pgdat->node_zones + j; in free_area_init_core()
7196 unsigned long zone_start_pfn = zone->zone_start_pfn; in free_area_init_core()
7198 size = zone->spanned_pages; in free_area_init_core()
7199 freesize = zone->present_pages; in free_area_init_core()
7204 * and per-cpu initialisations in free_area_init_core()
7209 freesize -= memmap_pages; in free_area_init_core()
7221 freesize -= dma_reserve; in free_area_init_core()
7230 nr_kernel_pages -= memmap_pages; in free_area_init_core()
7257 if (!pgdat->node_spanned_pages) in alloc_node_mem_map()
7260 start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); in alloc_node_mem_map()
7261 offset = pgdat->node_start_pfn - start; in alloc_node_mem_map()
7263 if (!pgdat->node_mem_map) { in alloc_node_mem_map()
7274 size = (end - start) * sizeof(struct page); in alloc_node_mem_map()
7276 pgdat->node_id); in alloc_node_mem_map()
7279 size, pgdat->node_id); in alloc_node_mem_map()
7280 pgdat->node_mem_map = map + offset; in alloc_node_mem_map()
7283 __func__, pgdat->node_id, (unsigned long)pgdat, in alloc_node_mem_map()
7284 (unsigned long)pgdat->node_mem_map); in alloc_node_mem_map()
7290 mem_map = NODE_DATA(0)->node_mem_map; in alloc_node_mem_map()
7291 if (page_to_pfn(mem_map) != pgdat->node_start_pfn) in alloc_node_mem_map()
7292 mem_map -= offset; in alloc_node_mem_map()
7303 pgdat->first_deferred_pfn = ULONG_MAX; in pgdat_set_deferred_range()
7316 WARN_ON(pgdat->nr_zones || pgdat->kswapd_highest_zoneidx); in free_area_init_node()
7320 pgdat->node_id = nid; in free_area_init_node()
7321 pgdat->node_start_pfn = start_pfn; in free_area_init_node()
7322 pgdat->per_cpu_nodestats = NULL; in free_area_init_node()
7324 pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, in free_area_init_node()
7326 end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); in free_area_init_node()
7354 * node_map_pfn_alignment - determine the maximum internode alignment
7361 * would indicate 1GiB alignment with (1 << (30 - PAGE_SHIFT)). If the
7365 * This is used to test whether pfn -> nid mapping of the chosen memory
7366 * model has fine enough granularity to avoid incorrect mapping for the
7387 * Start with a mask granular enough to pin-point to the in node_map_pfn_alignment()
7388 * start pfn and tick off bits one-by-one until it becomes in node_map_pfn_alignment()
7391 mask = ~((1 << __ffs(start)) - 1); in node_map_pfn_alignment()
7404 * find_min_pfn_with_active_regions - Find the minimum PFN registered
7426 unsigned long pages = end_pfn - start_pfn; in early_calculate_totalpages()
7466 usable_startpfn = PFN_DOWN(r->base); in find_zone_movable_pfns_for_nodes()
7528 * Round-up so that ZONE_MOVABLE is at least as large as what in find_zone_movable_pfns_for_nodes()
7534 corepages = totalpages - required_movablecore; in find_zone_movable_pfns_for_nodes()
7582 - start_pfn; in find_zone_movable_pfns_for_nodes()
7584 kernelcore_remaining -= min(kernel_pages, in find_zone_movable_pfns_for_nodes()
7586 required_kernelcore -= min(kernel_pages, in find_zone_movable_pfns_for_nodes()
7606 * start_pfn->end_pfn. Calculate size_pages as the in find_zone_movable_pfns_for_nodes()
7609 size_pages = end_pfn - start_pfn; in find_zone_movable_pfns_for_nodes()
7619 required_kernelcore -= min(required_kernelcore, in find_zone_movable_pfns_for_nodes()
7621 kernelcore_remaining -= size_pages; in find_zone_movable_pfns_for_nodes()
7633 usable_nodes--; in find_zone_movable_pfns_for_nodes()
7660 for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) { in check_for_memory()
7661 struct zone *zone = &pgdat->node_zones[zone_type]; in check_for_memory()
7682 * free_area_init - Initialise all pg_data_t and zone data
7711 zone = MAX_NR_ZONES - i - 1; in free_area_init()
7734 pr_info(" %-8s ", zone_names[i]); in free_area_init()
7739 pr_cont("[mem %#018Lx-%#018Lx]\n", in free_area_init()
7743 << PAGE_SHIFT) - 1); in free_area_init()
7756 * subsection-map relative to active online memory ranges to in free_area_init()
7757 * enable future "sub-section" extensions of the memory map. in free_area_init()
7761 pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, in free_area_init()
7763 ((u64)end_pfn << PAGE_SHIFT) - 1); in free_area_init()
7764 subsection_map_init(start_pfn, end_pfn - start_pfn); in free_area_init()
7775 if (pgdat->node_present_pages) in free_area_init()
7790 return -EINVAL; in cmdline_parse_core()
7841 atomic_long_add(count, &page_zone(page)->managed_pages); in adjust_managed_page_count()
7870 * Perform a kasan-unchecked memset() since this memory in free_reserved_area()
7882 s, pages << (PAGE_SHIFT - 10)); in free_reserved_area()
7892 atomic_long_inc(&page_zone(page)->managed_pages); in free_highmem_page()
7904 codesize = _etext - _stext; in mem_init_print_info()
7905 datasize = _edata - _sdata; in mem_init_print_info()
7906 rosize = __end_rodata - __start_rodata; in mem_init_print_info()
7907 bss_size = __bss_stop - __bss_start; in mem_init_print_info()
7908 init_data_size = __init_end - __init_begin; in mem_init_print_info()
7909 init_code_size = _einittext - _sinittext; in mem_init_print_info()
7921 size -= adj; \ in mem_init_print_info()
7933 …(%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved" in mem_init_print_info()
7938 nr_free_pages() << (PAGE_SHIFT - 10), in mem_init_print_info()
7939 physpages << (PAGE_SHIFT - 10), in mem_init_print_info()
7942 (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10), in mem_init_print_info()
7943 totalcma_pages << (PAGE_SHIFT - 10), in mem_init_print_info()
7945 totalhigh_pages() << (PAGE_SHIFT - 10), in mem_init_print_info()
7951 * set_dma_reserve - set the specified number of pages reserved in the first zone
7954 * The per-cpu batchsize and zone watermarks are determined by managed_pages.
7959 * smaller per-cpu batchsize.
8020 * calculate_totalreserve_pages - called when sysctl_lowmem_reserve_ratio
8031 pgdat->totalreserve_pages = 0; in calculate_totalreserve_pages()
8034 struct zone *zone = pgdat->node_zones + i; in calculate_totalreserve_pages()
8040 if (zone->lowmem_reserve[j] > max) in calculate_totalreserve_pages()
8041 max = zone->lowmem_reserve[j]; in calculate_totalreserve_pages()
8050 pgdat->totalreserve_pages += max; in calculate_totalreserve_pages()
8059 * setup_per_zone_lowmem_reserve - called whenever
8070 for (i = 0; i < MAX_NR_ZONES - 1; i++) { in setup_per_zone_lowmem_reserve()
8071 struct zone *zone = &pgdat->node_zones[i]; in setup_per_zone_lowmem_reserve()
8077 struct zone *upper_zone = &pgdat->node_zones[j]; in setup_per_zone_lowmem_reserve()
8082 zone->lowmem_reserve[j] = 0; in setup_per_zone_lowmem_reserve()
8084 zone->lowmem_reserve[j] = managed_pages / ratio; in setup_per_zone_lowmem_reserve()
8095 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); in __setup_per_zone_wmarks()
8096 unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10); in __setup_per_zone_wmarks()
8110 spin_lock_irqsave(&zone->lock, flags); in __setup_per_zone_wmarks()
8121 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) in __setup_per_zone_wmarks()
8129 zone->_watermark[WMARK_MIN] = min_pages; in __setup_per_zone_wmarks()
8135 zone->_watermark[WMARK_MIN] = tmp; in __setup_per_zone_wmarks()
8147 zone->watermark_boost = 0; in __setup_per_zone_wmarks()
8148 zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + low + tmp; in __setup_per_zone_wmarks()
8149 zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + low + tmp * 2; in __setup_per_zone_wmarks()
8151 spin_unlock_irqrestore(&zone->lock, flags); in __setup_per_zone_wmarks()
8159 * setup_per_zone_wmarks - called when min_free_kbytes changes
8160 * or when memory is hot-{added|removed}
8232 * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so in postcore_initcall()
8274 pgdat->min_unmapped_pages = 0; in setup_min_unmapped_ratio()
8277 zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) * in setup_min_unmapped_ratio()
8302 pgdat->min_slab_pages = 0; in setup_min_slab_ratio()
8305 zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) * in setup_min_slab_ratio()
8325 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
8355 per_cpu_ptr(zone->pageset, cpu)); in __zone_pcp_update()
8359 * percpu_pagelist_fraction - changes the pcp->high for each zone on each
8381 ret = -EINVAL; in percpu_pagelist_fraction_sysctl_handler()
8413 * Because 32-bit systems cannot have large physical memory, where this scaling
8424 * - it is assumed that the hash table must contain an exact power-of-2
8426 * - limit is the number of hash buckets, not the total allocation size
8448 numentries -= arch_reserved_kernel_pages(); in alloc_large_system_hash()
8466 numentries >>= (scale - PAGE_SHIFT); in alloc_large_system_hash()
8468 numentries <<= (PAGE_SHIFT - scale); in alloc_large_system_hash()
8470 /* Make sure we've got at least a 0-order allocation.. */ in alloc_large_system_hash()
8512 * If bucketsize is not a power-of-two, we may free in alloc_large_system_hash()
8519 } while (!table && size > PAGE_SIZE && --log2qty); in alloc_large_system_hash()
8525 tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, in alloc_large_system_hash()
8531 *_hash_mask = (1 << log2qty) - 1; in alloc_large_system_hash()
8541 * check without lock_page also may miss some movable non-lru pages at
8568 for (; iter < pageblock_nr_pages - offset; iter++) { in has_unmovable_pages()
8608 skip_pages = compound_nr(head) - (page - head); in has_unmovable_pages()
8609 iter += skip_pages - 1; in has_unmovable_pages()
8617 * because their page->_refcount is zero at all time. in has_unmovable_pages()
8621 iter += (1 << buddy_order(page)) - 1; in has_unmovable_pages()
8662 pageblock_nr_pages) - 1); in pfn_max_align_down()
8673 /* Usage: See admin-guide/dynamic-debug-howto.rst */
8715 .nid = zone_to_nid(cc->zone), in __alloc_contig_migrate_range()
8719 if (cc->alloc_contig && cc->mode == MIGRATE_ASYNC) in __alloc_contig_migrate_range()
8724 while (pfn < end || !list_empty(&cc->migratepages)) { in __alloc_contig_migrate_range()
8726 ret = -EINTR; in __alloc_contig_migrate_range()
8730 if (list_empty(&cc->migratepages)) { in __alloc_contig_migrate_range()
8731 cc->nr_migratepages = 0; in __alloc_contig_migrate_range()
8734 ret = -EINTR; in __alloc_contig_migrate_range()
8739 ret = ret < 0 ? ret : -EBUSY; in __alloc_contig_migrate_range()
8743 nr_reclaimed = reclaim_clean_pages_from_list(cc->zone, in __alloc_contig_migrate_range()
8744 &cc->migratepages); in __alloc_contig_migrate_range()
8745 info->nr_reclaimed += nr_reclaimed; in __alloc_contig_migrate_range()
8746 cc->nr_migratepages -= nr_reclaimed; in __alloc_contig_migrate_range()
8748 list_for_each_entry(page, &cc->migratepages, lru) in __alloc_contig_migrate_range()
8749 info->nr_mapped += page_mapcount(page); in __alloc_contig_migrate_range()
8751 ret = migrate_pages(&cc->migratepages, alloc_migration_target, in __alloc_contig_migrate_range()
8752 NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE); in __alloc_contig_migrate_range()
8754 info->nr_migrated += cc->nr_migratepages; in __alloc_contig_migrate_range()
8759 if (ret == -EBUSY) { in __alloc_contig_migrate_range()
8760 alloc_contig_dump_pages(&cc->migratepages); in __alloc_contig_migrate_range()
8761 page_pinner_mark_migration_failed_pages(&cc->migratepages); in __alloc_contig_migrate_range()
8764 if (!list_empty(&cc->migratepages)) { in __alloc_contig_migrate_range()
8765 page = list_first_entry(&cc->migratepages, struct page , lru); in __alloc_contig_migrate_range()
8766 info->failed_pfn = page_to_pfn(page); in __alloc_contig_migrate_range()
8769 putback_movable_pages(&cc->migratepages); in __alloc_contig_migrate_range()
8770 info->err |= ACR_ERR_MIGRATE; in __alloc_contig_migrate_range()
8777 * alloc_contig_range() -- tries to allocate given range of pages
8779 * @end: one-past-the-last PFN to allocate
8808 .order = -1, in alloc_contig_range()
8844 &info->failed_pfn); in alloc_contig_range()
8846 info->err |= ACR_ERR_ISOLATE; in alloc_contig_range()
8856 * In case of -EBUSY, we'd like to know which page causes problem. in alloc_contig_range()
8863 * -EBUSY is not accidentally used or returned to caller. in alloc_contig_range()
8866 if (ret && (ret != -EBUSY || (gfp_mask & __GFP_NORETRY))) in alloc_contig_range()
8883 * We don't have to hold zone->lock here because the pages are in alloc_contig_range()
8911 if (test_pages_isolated(outer_start, end, 0, &info->failed_pfn)) { in alloc_contig_range()
8914 ret = -EBUSY; in alloc_contig_range()
8915 info->err |= ACR_ERR_TEST; in alloc_contig_range()
8922 ret = -EBUSY; in alloc_contig_range()
8928 free_contig_range(outer_start, start - outer_start); in alloc_contig_range()
8930 free_contig_range(end, outer_end - end); in alloc_contig_range()
8978 unsigned long last_pfn = start_pfn + nr_pages - 1; in zone_spans_last_pfn()
8984 * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
9015 spin_lock_irqsave(&zone->lock, flags); in alloc_contig_pages()
9017 pfn = ALIGN(zone->zone_start_pfn, nr_pages); in alloc_contig_pages()
9027 spin_unlock_irqrestore(&zone->lock, flags); in alloc_contig_pages()
9032 spin_lock_irqsave(&zone->lock, flags); in alloc_contig_pages()
9036 spin_unlock_irqrestore(&zone->lock, flags); in alloc_contig_pages()
9046 for (; nr_pages--; pfn++) { in free_contig_range()
9075 if (zone->pageset != &boot_pageset) { in zone_pcp_reset()
9077 pset = per_cpu_ptr(zone->pageset, cpu); in zone_pcp_reset()
9080 free_percpu(zone->pageset); in zone_pcp_reset()
9081 zone->pageset = &boot_pageset; in zone_pcp_reset()
9101 spin_lock_irqsave(&zone->lock, flags); in __offline_isolated_pages()
9129 spin_unlock_irqrestore(&zone->lock, flags); in __offline_isolated_pages()
9140 spin_lock_irqsave(&zone->lock, flags); in is_free_buddy_page()
9142 struct page *page_head = page - (pfn & ((1 << order) - 1)); in is_free_buddy_page()
9147 spin_unlock_irqrestore(&zone->lock, flags); in is_free_buddy_page()
9154 * Break down a higher-order page in sub-pages, and keep our target out of
9165 high--; in break_down_buddy_pages()
9198 spin_lock_irqsave(&zone->lock, flags); in take_page_off_buddy()
9200 struct page *page_head = page - (pfn & ((1 << order) - 1)); in take_page_off_buddy()
9212 __mod_zone_freepage_state(zone, -1, migratetype); in take_page_off_buddy()
9219 spin_unlock_irqrestore(&zone->lock, flags); in take_page_off_buddy()
9230 struct zone *zone = &pgdat->node_zones[ZONE_DMA]; in has_managed_dma()