Lines Matching +full:processor +full:- +full:intensive

1 // SPDX-License-Identifier: GPL-2.0
34 #include <linux/backing-dev.h>
167 if ((_page)->lru.prev != _base) { \
170 prev = lru_to_page(&(_page->lru)); \
171 prefetchw(&prev->_field); \
204 WARN_ON_ONCE(rs && task->reclaim_state); in set_task_reclaim_state()
206 /* Check for the nulling of an already-nulled member */ in set_task_reclaim_state()
207 WARN_ON_ONCE(!rs && !task->reclaim_state); in set_task_reclaim_state()
209 task->reclaim_state = rs; in set_task_reclaim_state()
217 * We allow subsystems to populate their shrinker-related
234 int id, ret = -ENOMEM; in prealloc_memcg_shrinker()
250 shrinker->id = id; in prealloc_memcg_shrinker()
259 int id = shrinker->id; in unregister_memcg_shrinker()
270 return sc->target_mem_cgroup; in cgroup_reclaim()
274 * writeback_throttling_sane - is the usual dirty throttling mechanism available?
336 * lruvec_lru_size - Returns the number of pages on the given LRU list.
347 struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; in lruvec_lru_size()
365 unsigned int size = sizeof(*shrinker->nr_deferred); in prealloc_shrinker()
367 if (shrinker->flags & SHRINKER_NUMA_AWARE) in prealloc_shrinker()
370 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); in prealloc_shrinker()
371 if (!shrinker->nr_deferred) in prealloc_shrinker()
372 return -ENOMEM; in prealloc_shrinker()
374 if (shrinker->flags & SHRINKER_MEMCG_AWARE) { in prealloc_shrinker()
382 kfree(shrinker->nr_deferred); in prealloc_shrinker()
383 shrinker->nr_deferred = NULL; in prealloc_shrinker()
384 return -ENOMEM; in prealloc_shrinker()
389 if (!shrinker->nr_deferred) in free_prealloced_shrinker()
392 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in free_prealloced_shrinker()
395 kfree(shrinker->nr_deferred); in free_prealloced_shrinker()
396 shrinker->nr_deferred = NULL; in free_prealloced_shrinker()
402 list_add_tail(&shrinker->list, &shrinker_list); in register_shrinker_prepared()
404 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in register_shrinker_prepared()
405 idr_replace(&shrinker_idr, shrinker, shrinker->id); in register_shrinker_prepared()
426 if (!shrinker->nr_deferred) in unregister_shrinker()
428 if (shrinker->flags & SHRINKER_MEMCG_AWARE) in unregister_shrinker()
431 list_del(&shrinker->list); in unregister_shrinker()
433 kfree(shrinker->nr_deferred); in unregister_shrinker()
434 shrinker->nr_deferred = NULL; in unregister_shrinker()
449 int nid = shrinkctl->nid; in do_shrink_slab()
450 long batch_size = shrinker->batch ? shrinker->batch in do_shrink_slab()
454 if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) in do_shrink_slab()
457 freeable = shrinker->count_objects(shrinker, shrinkctl); in do_shrink_slab()
466 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); in do_shrink_slab()
469 if (shrinker->seeks) { in do_shrink_slab()
472 do_div(delta, shrinker->seeks); in do_shrink_slab()
485 shrinker->scan_objects, total_scan); in do_shrink_slab()
494 * shrinkers to return -1 all the time. This results in a large in do_shrink_slab()
537 shrinkctl->nr_to_scan = nr_to_scan; in do_shrink_slab()
538 shrinkctl->nr_scanned = nr_to_scan; in do_shrink_slab()
539 ret = shrinker->scan_objects(shrinker, shrinkctl); in do_shrink_slab()
544 count_vm_events(SLABS_SCANNED, shrinkctl->nr_scanned); in do_shrink_slab()
545 total_scan -= shrinkctl->nr_scanned; in do_shrink_slab()
546 scanned += shrinkctl->nr_scanned; in do_shrink_slab()
552 next_deferred -= scanned; in do_shrink_slab()
562 &shrinker->nr_deferred[nid]); in do_shrink_slab()
564 new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); in do_shrink_slab()
584 map = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_map, in shrink_slab_memcg()
589 for_each_set_bit(i, map->map, shrinker_nr_max) { in shrink_slab_memcg()
600 clear_bit(i, map->map); in shrink_slab_memcg()
604 /* Call non-slab shrinkers even though kmem is disabled */ in shrink_slab_memcg()
606 !(shrinker->flags & SHRINKER_NONSLAB)) in shrink_slab_memcg()
611 clear_bit(i, map->map); in shrink_slab_memcg()
654 * shrink_slab - shrink slab caches
668 * @priority is sc->priority, we take the number of objects and >> by priority
758 * heads at page->private. in is_page_cache_freeable()
761 return page_count(page) - page_has_private(page) == 1 + page_cache_pins; in is_page_cache_freeable()
766 if (current->flags & PF_SWAPWRITE) in may_write_to_inode()
770 if (inode_to_bdi(inode) == current->backing_dev_info) in may_write_to_inode()
777 * -ENOSPC. We need to propagate that into the address_space for a subsequent
810 * Calls ->writepage().
816 * will be non-blocking. To prevent this allocation from being in pageout()
835 * page->mapping == NULL while being dirty with clean buffers. in pageout()
846 if (mapping->a_ops->writepage == NULL) in pageout()
848 if (!may_write_to_inode(mapping->host)) in pageout()
862 res = mapping->a_ops->writepage(page, &wbc); in pageout()
896 xa_lock_irqsave(&mapping->i_pages, flags); in __remove_mapping()
916 * escape unnoticed. The smp_rmb is needed to ensure the page->flags in __remove_mapping()
917 * load is not satisfied before that of page->_refcount. in __remove_mapping()
937 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
942 freepage = mapping->a_ops->freepage; in __remove_mapping()
963 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
972 xa_unlock_irqrestore(&mapping->i_pages, flags); in __remove_mapping()
977 * Attempt to detach a locked page from its ->mapping. If it is dirty or if
997 * putback_lru_page - put previously isolated page onto appropriate LRU list
1035 referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, in page_check_references()
1049 if (referenced_ptes == -1) in page_check_references()
1073 * Activate file-backed executable pages after first usage. in page_check_references()
1114 if (mapping && mapping->a_ops->is_dirty_writeback) in page_check_dirty_writeback()
1115 mapping->a_ops->is_dirty_writeback(page, dirty, writeback); in page_check_dirty_writeback()
1145 list_del(&page->lru); in shrink_page_list()
1155 sc->nr_scanned += nr_pages; in shrink_page_list()
1160 if (!sc->may_unmap && page_mapped(page)) in shrink_page_list()
1163 may_enter_fs = (sc->gfp_mask & __GFP_FS) || in shrink_page_list()
1164 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); in shrink_page_list()
1174 stat->nr_dirty++; in shrink_page_list()
1177 stat->nr_unqueued_dirty++; in shrink_page_list()
1187 inode_write_congested(mapping->host)) || in shrink_page_list()
1189 stat->nr_congested++; in shrink_page_list()
1237 test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { in shrink_page_list()
1238 stat->nr_immediate++; in shrink_page_list()
1245 * This is slightly racy - end_page_writeback() in shrink_page_list()
1248 * as PageReadahead - but that does not matter in shrink_page_list()
1256 stat->nr_writeback++; in shrink_page_list()
1264 list_add_tail(&page->lru, page_list); in shrink_page_list()
1276 stat->nr_ref_keep += nr_pages; in shrink_page_list()
1290 if (!(sc->gfp_mask & __GFP_IO)) in shrink_page_list()
1341 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1358 stat->nr_unmap_fail += nr_pages; in shrink_page_list()
1360 stat->nr_lazyfree_fail += nr_pages; in shrink_page_list()
1369 * injecting inefficient single-page IO into in shrink_page_list()
1378 !test_bit(PGDAT_DIRTY, &pgdat->flags))) { in shrink_page_list()
1395 if (!sc->may_writepage) in shrink_page_list()
1410 stat->nr_pageout += thp_nr_pages(page); in shrink_page_list()
1418 * A synchronous write - probably a ramdisk. Go in shrink_page_list()
1443 * drop the buffers and mark the page clean - it can be freed. in shrink_page_list()
1445 * Rarely, pages can have buffers and no ->mapping. These are in shrink_page_list()
1453 if (!try_to_release_page(page, sc->gfp_mask)) in shrink_page_list()
1486 sc->target_mem_cgroup)) in shrink_page_list()
1505 list_add(&page->lru, &free_pages); in shrink_page_list()
1514 sc->nr_scanned -= (nr_pages - 1); in shrink_page_list()
1526 stat->nr_activate[type] += nr_pages; in shrink_page_list()
1532 list_add(&page->lru, &ret_pages); in shrink_page_list()
1536 pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; in shrink_page_list()
1565 list_move(&page->lru, &clean_pages); in reclaim_clean_pages_from_list()
1569 nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, in reclaim_clean_pages_from_list()
1572 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1573 -(long)nr_reclaimed); in reclaim_clean_pages_from_list()
1580 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, in reclaim_clean_pages_from_list()
1582 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, in reclaim_clean_pages_from_list()
1583 -(long)stat.nr_lazyfree_fail); in reclaim_clean_pages_from_list()
1595 * returns 0 on success, -ve errno on failure.
1599 int ret = -EINVAL; in __isolate_lru_page()
1609 ret = -EBUSY; in __isolate_lru_page()
1614 * blocking - clean pages for the most part. in __isolate_lru_page()
1630 * ->migratepage callback are possible to migrate in __isolate_lru_page()
1641 migrate_dirty = !mapping || mapping->a_ops->migratepage; in __isolate_lru_page()
1654 * sure the page is not being freed elsewhere -- the in __isolate_lru_page()
1678 update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); in update_lru_sizes()
1684 * pgdat->lru_lock is heavily contended. Some of the functions that
1688 * For pagecache intensive workloads, this function is the hottest
1707 struct list_head *src = &lruvec->lists[lru]; in isolate_lru_pages()
1714 isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED); in isolate_lru_pages()
1729 if (page_zonenum(page) > sc->reclaim_idx) { in isolate_lru_pages()
1730 list_move(&page->lru, &pages_skipped); in isolate_lru_pages()
1742 * premature OOM since __isolate_lru_page() returns -EBUSY in isolate_lru_pages()
1751 list_move(&page->lru, dst); in isolate_lru_pages()
1754 case -EBUSY: in isolate_lru_pages()
1756 list_move(&page->lru, src); in isolate_lru_pages()
1784 trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, in isolate_lru_pages()
1791 * isolate_lru_page - tries to isolate a page from its LRU list
1798 * Returns -EBUSY if the page was not on an LRU list.
1818 int ret = -EBUSY; in isolate_lru_page()
1827 spin_lock_irq(&pgdat->lru_lock); in isolate_lru_page()
1836 spin_unlock_irq(&pgdat->lru_lock); in isolate_lru_page()
1869 * won't get blocked by normal direct-reclaimers, forming a circular in too_many_isolated()
1872 if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) in too_many_isolated()
1889 * It is safe to rely on PG_active against the non-LRU pages in here because
1890 * nobody will play with that bit on a non-LRU page.
1892 * The downside is that we have to touch page->_refcount against each page.
1893 * But we had to alter page->flags anyway.
1911 list_del(&page->lru); in move_pages_to_lru()
1912 spin_unlock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1914 spin_lock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1924 list_move(&page->lru, &lruvec->lists[lru]); in move_pages_to_lru()
1933 spin_unlock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1935 spin_lock_irq(&pgdat->lru_lock); in move_pages_to_lru()
1937 list_add(&page->lru, &pages_to_free); in move_pages_to_lru()
1954 * If a kernel thread (such as nfsd for loop-back mounts) services
1961 return !(current->flags & PF_LOCAL_THROTTLE) || in current_may_throttle()
1962 current->backing_dev_info == NULL || in current_may_throttle()
1963 bdi_write_congested(current->backing_dev_info); in current_may_throttle()
1999 spin_lock_irq(&pgdat->lru_lock); in shrink_inactive_list()
2011 spin_unlock_irq(&pgdat->lru_lock); in shrink_inactive_list()
2019 spin_lock_irq(&pgdat->lru_lock); in shrink_inactive_list()
2023 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_inactive_list()
2030 spin_unlock_irq(&pgdat->lru_lock); in shrink_inactive_list()
2049 sc->nr.dirty += stat.nr_dirty; in shrink_inactive_list()
2050 sc->nr.congested += stat.nr_congested; in shrink_inactive_list()
2051 sc->nr.unqueued_dirty += stat.nr_unqueued_dirty; in shrink_inactive_list()
2052 sc->nr.writeback += stat.nr_writeback; in shrink_inactive_list()
2053 sc->nr.immediate += stat.nr_immediate; in shrink_inactive_list()
2054 sc->nr.taken += nr_taken; in shrink_inactive_list()
2056 sc->nr.file_taken += nr_taken; in shrink_inactive_list()
2058 trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, in shrink_inactive_list()
2059 nr_scanned, nr_reclaimed, &stat, sc->priority, file); in shrink_inactive_list()
2084 spin_lock_irq(&pgdat->lru_lock); in shrink_active_list()
2095 spin_unlock_irq(&pgdat->lru_lock); in shrink_active_list()
2100 list_del(&page->lru); in shrink_active_list()
2118 list_add(&page->lru, &l_active); in shrink_active_list()
2127 if (page_referenced(page, 0, sc->target_mem_cgroup, in shrink_active_list()
2130 * Identify referenced, file-backed active pages and in shrink_active_list()
2134 * are not likely to be evicted by use-once streaming in shrink_active_list()
2141 list_add(&page->lru, &l_active); in shrink_active_list()
2147 ClearPageActive(page); /* we are de-activating */ in shrink_active_list()
2149 list_add(&page->lru, &l_inactive); in shrink_active_list()
2155 spin_lock_irq(&pgdat->lru_lock); in shrink_active_list()
2165 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); in shrink_active_list()
2166 spin_unlock_irq(&pgdat->lru_lock); in shrink_active_list()
2170 trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, in shrink_active_list()
2171 nr_deactivate, nr_rotated, sc->priority, file); in shrink_active_list()
2198 list_move(&page->lru, &node_page_list); in reclaim_pages()
2207 list_del(&page->lru); in reclaim_pages()
2220 list_del(&page->lru); in reclaim_pages()
2233 if (sc->may_deactivate & (1 << is_file_lru(lru))) in shrink_list()
2236 sc->skipped_deactivate = 1; in shrink_list()
2248 * to the established workingset on the scan-resistant active list,
2262 * -------------------------------------
2282 gb = (inactive + active) >> (30 - PAGE_SHIFT); in inactive_is_low()
2328 if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) { in get_scan_count()
2351 if (!sc->priority && swappiness) { in get_scan_count()
2357 * If the system is almost out of file pages, force-scan anon. in get_scan_count()
2359 if (sc->file_is_tiny) { in get_scan_count()
2372 if (!balance_anon_file_reclaim && sc->cache_trim_mode) { in get_scan_count()
2393 total_cost = sc->anon_cost + sc->file_cost; in get_scan_count()
2394 anon_cost = total_cost + sc->anon_cost; in get_scan_count()
2395 file_cost = total_cost + sc->file_cost; in get_scan_count()
2401 fp = (200 - swappiness) * (total_cost + 1); in get_scan_count()
2415 lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); in get_scan_count()
2416 mem_cgroup_protection(sc->target_mem_cgroup, memcg, in get_scan_count()
2426 * becomes extremely binary -- from nothing as we in get_scan_count()
2441 * the best-effort low protection. However, we still in get_scan_count()
2442 * ideally want to honor how well-behaved groups are in in get_scan_count()
2453 if (!sc->memcg_low_reclaim && low > min) { in get_scan_count()
2455 sc->memcg_low_skipped = 1; in get_scan_count()
2463 scan = lruvec_size - lruvec_size * protection / in get_scan_count()
2469 * sc->priority further than desirable. in get_scan_count()
2476 scan >>= sc->priority; in get_scan_count()
2495 * round-off error. in get_scan_count()
2524 unsigned long nr_to_reclaim = sc->nr_to_reclaim; in shrink_lruvec()
2545 sc->priority == DEF_PRIORITY); in shrink_lruvec()
2556 nr[lru] -= nr_to_scan; in shrink_lruvec()
2608 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2609 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2610 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2613 nr_scanned = targets[lru] - nr[lru]; in shrink_lruvec()
2614 nr[lru] = targets[lru] * (100 - percentage) / 100; in shrink_lruvec()
2615 nr[lru] -= min(nr[lru], nr_scanned); in shrink_lruvec()
2618 sc->nr_reclaimed += nr_reclaimed; in shrink_lruvec()
2632 if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && in in_reclaim_compaction()
2633 (sc->order > PAGE_ALLOC_COSTLY_ORDER || in in_reclaim_compaction()
2634 sc->priority < DEF_PRIORITY - 2)) in in_reclaim_compaction()
2641 * Reclaim/compaction is used for high-order allocation requests. It reclaims
2642 * order-0 pages before compacting the zone. should_continue_reclaim() returns
2665 * first, by assuming that zero delta of sc->nr_scanned means full LRU in should_continue_reclaim()
2667 * where always a non-zero amount of pages were scanned. in should_continue_reclaim()
2673 for (z = 0; z <= sc->reclaim_idx; z++) { in should_continue_reclaim()
2674 struct zone *zone = &pgdat->node_zones[z]; in should_continue_reclaim()
2678 switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) { in should_continue_reclaim()
2692 pages_for_compaction = compact_gap(sc->order); in should_continue_reclaim()
2702 struct mem_cgroup *target_memcg = sc->target_mem_cgroup; in shrink_node_memcgs()
2713 * This loop can become CPU-bound when target memcgs in shrink_node_memcgs()
2714 * aren't eligible for reclaim - either because they in shrink_node_memcgs()
2739 if (!sc->memcg_low_reclaim) { in shrink_node_memcgs()
2740 sc->memcg_low_skipped = 1; in shrink_node_memcgs()
2746 reclaimed = sc->nr_reclaimed; in shrink_node_memcgs()
2747 scanned = sc->nr_scanned; in shrink_node_memcgs()
2751 shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, in shrink_node_memcgs()
2752 sc->priority); in shrink_node_memcgs()
2755 vmpressure(sc->gfp_mask, memcg, false, in shrink_node_memcgs()
2756 sc->nr_scanned - scanned, in shrink_node_memcgs()
2757 sc->nr_reclaimed - reclaimed); in shrink_node_memcgs()
2764 struct reclaim_state *reclaim_state = current->reclaim_state; in shrink_node()
2770 target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); in shrink_node()
2773 memset(&sc->nr, 0, sizeof(sc->nr)); in shrink_node()
2775 nr_reclaimed = sc->nr_reclaimed; in shrink_node()
2776 nr_scanned = sc->nr_scanned; in shrink_node()
2781 spin_lock_irq(&pgdat->lru_lock); in shrink_node()
2782 sc->anon_cost = target_lruvec->anon_cost; in shrink_node()
2783 sc->file_cost = target_lruvec->file_cost; in shrink_node()
2784 spin_unlock_irq(&pgdat->lru_lock); in shrink_node()
2790 if (!sc->force_deactivate) { in shrink_node()
2795 if (refaults != target_lruvec->refaults[0] || in shrink_node()
2797 sc->may_deactivate |= DEACTIVATE_ANON; in shrink_node()
2799 sc->may_deactivate &= ~DEACTIVATE_ANON; in shrink_node()
2808 if (refaults != target_lruvec->refaults[1] || in shrink_node()
2810 sc->may_deactivate |= DEACTIVATE_FILE; in shrink_node()
2812 sc->may_deactivate &= ~DEACTIVATE_FILE; in shrink_node()
2814 sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; in shrink_node()
2822 if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) in shrink_node()
2823 sc->cache_trim_mode = 1; in shrink_node()
2825 sc->cache_trim_mode = 0; in shrink_node()
2841 free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); in shrink_node()
2846 struct zone *zone = &pgdat->node_zones[z]; in shrink_node()
2860 sc->file_is_tiny = in shrink_node()
2862 !(sc->may_deactivate & DEACTIVATE_ANON) && in shrink_node()
2863 anon >> sc->priority; in shrink_node()
2869 sc->nr_reclaimed += reclaim_state->reclaimed_slab; in shrink_node()
2870 reclaim_state->reclaimed_slab = 0; in shrink_node()
2874 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, in shrink_node()
2875 sc->nr_scanned - nr_scanned, in shrink_node()
2876 sc->nr_reclaimed - nr_reclaimed); in shrink_node()
2878 if (sc->nr_reclaimed - nr_reclaimed) in shrink_node()
2884 * it implies that the long-lived page allocation rate in shrink_node()
2899 if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) in shrink_node()
2900 set_bit(PGDAT_WRITEBACK, &pgdat->flags); in shrink_node()
2903 if (sc->nr.unqueued_dirty == sc->nr.file_taken) in shrink_node()
2904 set_bit(PGDAT_DIRTY, &pgdat->flags); in shrink_node()
2912 if (sc->nr.immediate) in shrink_node()
2926 sc->nr.dirty && sc->nr.dirty == sc->nr.congested) in shrink_node()
2927 set_bit(LRUVEC_CONGESTED, &target_lruvec->flags); in shrink_node()
2936 !sc->hibernation_mode && in shrink_node()
2937 test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) in shrink_node()
2940 if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, in shrink_node()
2951 pgdat->kswapd_failures = 0; in shrink_node()
2955 * Returns true if compaction should go ahead for a costly-order request, or
2964 suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx); in compaction_ready()
2981 watermark = high_wmark_pages(zone) + compact_gap(sc->order); in compaction_ready()
2983 return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); in compaction_ready()
2987 * This is the direct reclaim path, for page-allocating processes. We only
3008 orig_mask = sc->gfp_mask; in shrink_zones()
3010 sc->gfp_mask |= __GFP_HIGHMEM; in shrink_zones()
3011 sc->reclaim_idx = gfp_zone(sc->gfp_mask); in shrink_zones()
3015 sc->reclaim_idx, sc->nodemask) { in shrink_zones()
3029 * non-zero order, only frequent costly order in shrink_zones()
3035 sc->order > PAGE_ALLOC_COSTLY_ORDER && in shrink_zones()
3037 sc->compaction_ready = true; in shrink_zones()
3047 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
3057 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone->zone_pgdat, in shrink_zones()
3058 sc->order, sc->gfp_mask, in shrink_zones()
3060 sc->nr_reclaimed += nr_soft_reclaimed; in shrink_zones()
3061 sc->nr_scanned += nr_soft_scanned; in shrink_zones()
3066 if (zone->zone_pgdat == last_pgdat) in shrink_zones()
3068 last_pgdat = zone->zone_pgdat; in shrink_zones()
3069 shrink_node(zone->zone_pgdat, sc); in shrink_zones()
3076 sc->gfp_mask = orig_mask; in shrink_zones()
3086 target_lruvec->refaults[0] = refaults; in snapshot_refaults()
3088 target_lruvec->refaults[1] = refaults; in snapshot_refaults()
3099 * high - the zone may be full of dirty or under-writeback pages, which this
3111 int initial_priority = sc->priority; in do_try_to_free_pages()
3119 __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1); in do_try_to_free_pages()
3122 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, in do_try_to_free_pages()
3123 sc->priority); in do_try_to_free_pages()
3124 sc->nr_scanned = 0; in do_try_to_free_pages()
3127 if (sc->nr_reclaimed >= sc->nr_to_reclaim) in do_try_to_free_pages()
3130 if (sc->compaction_ready) in do_try_to_free_pages()
3137 if (sc->priority < DEF_PRIORITY - 2) in do_try_to_free_pages()
3138 sc->may_writepage = 1; in do_try_to_free_pages()
3139 } while (--sc->priority >= 0); in do_try_to_free_pages()
3142 for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, in do_try_to_free_pages()
3143 sc->nodemask) { in do_try_to_free_pages()
3144 if (zone->zone_pgdat == last_pgdat) in do_try_to_free_pages()
3146 last_pgdat = zone->zone_pgdat; in do_try_to_free_pages()
3148 snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); in do_try_to_free_pages()
3153 lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, in do_try_to_free_pages()
3154 zone->zone_pgdat); in do_try_to_free_pages()
3155 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in do_try_to_free_pages()
3161 if (sc->nr_reclaimed) in do_try_to_free_pages()
3162 return sc->nr_reclaimed; in do_try_to_free_pages()
3165 if (sc->compaction_ready) in do_try_to_free_pages()
3177 if (sc->skipped_deactivate) { in do_try_to_free_pages()
3178 sc->priority = initial_priority; in do_try_to_free_pages()
3179 sc->force_deactivate = 1; in do_try_to_free_pages()
3180 sc->skipped_deactivate = 0; in do_try_to_free_pages()
3185 if (sc->memcg_low_skipped) { in do_try_to_free_pages()
3186 sc->priority = initial_priority; in do_try_to_free_pages()
3187 sc->force_deactivate = 0; in do_try_to_free_pages()
3188 sc->memcg_low_reclaim = 1; in do_try_to_free_pages()
3189 sc->memcg_low_skipped = 0; in do_try_to_free_pages()
3204 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in allow_direct_reclaim()
3208 zone = &pgdat->node_zones[i]; in allow_direct_reclaim()
3226 if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { in allow_direct_reclaim()
3227 if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) in allow_direct_reclaim()
3228 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, ZONE_NORMAL); in allow_direct_reclaim()
3230 wake_up_interruptible(&pgdat->kswapd_wait); in allow_direct_reclaim()
3259 if (current->flags & PF_KTHREAD) in throttle_direct_reclaim()
3289 pgdat = zone->zone_pgdat; in throttle_direct_reclaim()
3311 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3318 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, in throttle_direct_reclaim()
3386 .reclaim_idx = MAX_NR_ZONES - 1, in mem_cgroup_shrink_node()
3390 WARN_ON_ONCE(!current->reclaim_state); in mem_cgroup_shrink_node()
3425 .reclaim_idx = MAX_NR_ZONES - 1, in try_to_free_mem_cgroup_pages()
3482 * Check for watermark boosts top-down as the higher zones in pgdat_watermark_boosted()
3488 for (i = highest_zoneidx; i >= 0; i--) { in pgdat_watermark_boosted()
3489 zone = pgdat->node_zones + i; in pgdat_watermark_boosted()
3493 if (zone->watermark_boost) in pgdat_watermark_boosted()
3507 unsigned long mark = -1; in pgdat_balanced()
3511 * Check watermarks bottom-up as lower zones are more likely to in pgdat_balanced()
3515 zone = pgdat->node_zones + i; in pgdat_balanced()
3527 * need balancing by definition. This can happen if a zone-restricted in pgdat_balanced()
3530 if (mark == -1) in pgdat_balanced()
3541 clear_bit(LRUVEC_CONGESTED, &lruvec->flags); in clear_pgdat_congested()
3542 clear_bit(PGDAT_DIRTY, &pgdat->flags); in clear_pgdat_congested()
3543 clear_bit(PGDAT_WRITEBACK, &pgdat->flags); in clear_pgdat_congested()
3568 if (waitqueue_active(&pgdat->pfmemalloc_wait)) in prepare_kswapd_sleep()
3569 wake_up_all(&pgdat->pfmemalloc_wait); in prepare_kswapd_sleep()
3572 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) in prepare_kswapd_sleep()
3598 sc->nr_to_reclaim = 0; in kswapd_shrink_node()
3599 for (z = 0; z <= sc->reclaim_idx; z++) { in kswapd_shrink_node()
3600 zone = pgdat->node_zones + z; in kswapd_shrink_node()
3604 sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); in kswapd_shrink_node()
3615 * high-order allocations. If twice the allocation size has been in kswapd_shrink_node()
3616 * reclaimed then recheck watermarks only at order-0 to prevent in kswapd_shrink_node()
3617 * excessive reclaim. Assume that a process requested a high-order in kswapd_shrink_node()
3620 if (sc->order && sc->nr_reclaimed >= compact_gap(sc->order)) in kswapd_shrink_node()
3621 sc->order = 0; in kswapd_shrink_node()
3623 return sc->nr_scanned >= sc->nr_to_reclaim; in kswapd_shrink_node()
3633 * kswapd scans the zones in the highmem->normal->dma direction. It skips
3668 zone = pgdat->node_zones + i; in balance_pgdat()
3672 nr_boost_reclaim += zone->watermark_boost; in balance_pgdat()
3673 zone_boosts[i] = zone->watermark_boost; in balance_pgdat()
3690 * purpose -- on 64-bit systems it is expected that in balance_pgdat()
3691 * buffer_heads are stripped during active rotation. On 32-bit in balance_pgdat()
3698 for (i = MAX_NR_ZONES - 1; i >= 0; i--) { in balance_pgdat()
3699 zone = pgdat->node_zones + i; in balance_pgdat()
3713 * re-evaluate if boosting is required when kswapd next wakes. in balance_pgdat()
3730 if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) in balance_pgdat()
3735 * intent is to relieve pressure not issue sub-optimal IO in balance_pgdat()
3754 if (sc.priority < DEF_PRIORITY - 2) in balance_pgdat()
3777 if (waitqueue_active(&pgdat->pfmemalloc_wait) && in balance_pgdat()
3779 wake_up_all(&pgdat->pfmemalloc_wait); in balance_pgdat()
3792 nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; in balance_pgdat()
3793 nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); in balance_pgdat()
3804 sc.priority--; in balance_pgdat()
3808 pgdat->kswapd_failures++; in balance_pgdat()
3820 zone = pgdat->node_zones + i; in balance_pgdat()
3821 spin_lock_irqsave(&zone->lock, flags); in balance_pgdat()
3822 zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); in balance_pgdat()
3823 spin_unlock_irqrestore(&zone->lock, flags); in balance_pgdat()
3848 * The pgdat->kswapd_highest_zoneidx is used to pass the highest zone index to
3857 enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in kswapd_highest_zoneidx()
3871 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
3903 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, in kswapd_try_to_sleep()
3907 if (READ_ONCE(pgdat->kswapd_order) < reclaim_order) in kswapd_try_to_sleep()
3908 WRITE_ONCE(pgdat->kswapd_order, reclaim_order); in kswapd_try_to_sleep()
3911 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
3912 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); in kswapd_try_to_sleep()
3921 trace_mm_vmscan_kswapd_sleep(pgdat->node_id); in kswapd_try_to_sleep()
3928 * per-cpu vmstat threshold while kswapd is awake and restore in kswapd_try_to_sleep()
3943 finish_wait(&pgdat->kswapd_wait, &wait); in kswapd_try_to_sleep()
3956 * If there are applications that are active memory-allocators
3962 unsigned int highest_zoneidx = MAX_NR_ZONES - 1; in kswapd()
3965 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); in kswapd()
3982 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; in kswapd()
3985 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
3986 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
3990 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
3999 alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); in kswapd()
4002 WRITE_ONCE(pgdat->kswapd_order, 0); in kswapd()
4003 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); in kswapd()
4017 * Reclaim begins at the requested order but if a high-order in kswapd()
4019 * order-0. If that happens, kswapd will consider sleeping in kswapd()
4024 trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx, in kswapd()
4032 tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); in kswapd()
4044 pgdat->mkswapd[hid] = kthread_run(kswapd, pgdat, "kswapd%d:%d", in kswapd_per_node_run()
4046 if (IS_ERR(pgdat->mkswapd[hid])) { in kswapd_per_node_run()
4051 ret = PTR_ERR(pgdat->mkswapd[hid]); in kswapd_per_node_run()
4052 pgdat->mkswapd[hid] = NULL; in kswapd_per_node_run()
4055 if (!pgdat->kswapd) in kswapd_per_node_run()
4056 pgdat->kswapd = pgdat->mkswapd[hid]; in kswapd_per_node_run()
4068 kswapd = NODE_DATA(nid)->mkswapd[hid]; in kswapd_per_node_stop()
4071 NODE_DATA(nid)->mkswapd[hid] = NULL; in kswapd_per_node_stop()
4074 NODE_DATA(nid)->kswapd = NULL; in kswapd_per_node_stop()
4078 * A zone is low on free memory or too fragmented for high-order memory. If
4096 pgdat = zone->zone_pgdat; in wakeup_kswapd()
4097 curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); in wakeup_kswapd()
4100 WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx); in wakeup_kswapd()
4102 if (READ_ONCE(pgdat->kswapd_order) < order) in wakeup_kswapd()
4103 WRITE_ONCE(pgdat->kswapd_order, order); in wakeup_kswapd()
4105 if (!waitqueue_active(&pgdat->kswapd_wait)) in wakeup_kswapd()
4109 if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || in wakeup_kswapd()
4114 * fragmented for high-order allocations. Wake up kcompactd in wakeup_kswapd()
4124 trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order, in wakeup_kswapd()
4126 wake_up_interruptible(&pgdat->kswapd_wait); in wakeup_kswapd()
4131 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
4143 .reclaim_idx = MAX_NR_ZONES - 1, in shrink_all_memory()
4169 * This kswapd start function will be called by init and node-hot-add.
4170 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
4177 if (pgdat->kswapd) in kswapd_run()
4183 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); in kswapd_run()
4184 if (IS_ERR(pgdat->kswapd)) { in kswapd_run()
4188 ret = PTR_ERR(pgdat->kswapd); in kswapd_run()
4189 pgdat->kswapd = NULL; in kswapd_run()
4200 struct task_struct *kswapd = NODE_DATA(nid)->kswapd; in kswapd_stop()
4209 NODE_DATA(nid)->kswapd = NULL; in kswapd_stop()
4229 * If non-zero call node_reclaim when the number of free pages falls below
4272 return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; in node_unmapped_file_pages()
4300 return nr_pagecache_reclaimable - delta; in node_pagecache_reclaimable()
4323 trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, in __node_reclaim()
4334 p->flags |= PF_SWAPWRITE; in __node_reclaim()
4337 if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { in __node_reclaim()
4344 } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); in __node_reclaim()
4348 current->flags &= ~PF_SWAPWRITE; in __node_reclaim()
4371 if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages && in node_reclaim()
4373 pgdat->min_slab_pages) in node_reclaim()
4379 if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC)) in node_reclaim()
4384 * have associated processors. This will favor the local processor in node_reclaim()
4388 if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id()) in node_reclaim()
4391 if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) in node_reclaim()
4395 clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); in node_reclaim()
4405 * check_move_unevictable_pages - check pages for evictability and move to
4421 for (i = 0; i < pvec->nr; i++) { in check_move_unevictable_pages()
4422 struct page *page = pvec->pages[i]; in check_move_unevictable_pages()
4434 spin_unlock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4436 spin_lock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()
4457 spin_unlock_irq(&pgdat->lru_lock); in check_move_unevictable_pages()