Lines Matching +full:current +full:- +full:mode
1 // SPDX-License-Identifier: GPL-2.0-only
120 * run-time system-wide default policy => local allocation
124 .mode = MPOL_PREFERRED,
131 * numa_map_to_online_node - Find closest online node
158 struct mempolicy *pol = p->mempolicy; in get_task_policy()
168 if (pol->mode) in get_task_policy()
182 return pol->flags & MPOL_MODE_FLAGS; in mpol_store_user_nodemask()
196 return -EINVAL; in mpol_new_interleave()
197 pol->v.nodes = *nodes; in mpol_new_interleave()
204 pol->flags |= MPOL_F_LOCAL; /* local allocation */ in mpol_new_preferred()
206 return -EINVAL; /* no allowed nodes */ in mpol_new_preferred()
208 pol->v.preferred_node = first_node(*nodes); in mpol_new_preferred()
215 return -EINVAL; in mpol_new_bind()
216 pol->v.nodes = *nodes; in mpol_new_bind()
223 * parameter with respect to the policy mode and flags. But, we need to
234 /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ in mpol_set_nodemask()
238 nodes_and(nsc->mask1, in mpol_set_nodemask()
242 if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) in mpol_set_nodemask()
245 if (pol->flags & MPOL_F_RELATIVE_NODES) in mpol_set_nodemask()
246 mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1); in mpol_set_nodemask()
248 nodes_and(nsc->mask2, *nodes, nsc->mask1); in mpol_set_nodemask()
251 pol->w.user_nodemask = *nodes; in mpol_set_nodemask()
253 pol->w.cpuset_mems_allowed = in mpol_set_nodemask()
258 ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); in mpol_set_nodemask()
260 ret = mpol_ops[pol->mode].create(pol, NULL); in mpol_set_nodemask()
268 static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, in mpol_new() argument
273 pr_debug("setting mode %d flags %d nodes[0] %lx\n", in mpol_new()
274 mode, flags, nodes ? nodes_addr(*nodes)[0] : NUMA_NO_NODE); in mpol_new()
276 if (mode == MPOL_DEFAULT) { in mpol_new()
278 return ERR_PTR(-EINVAL); in mpol_new()
286 * All other modes require a valid pointer to a non-empty nodemask. in mpol_new()
288 if (mode == MPOL_PREFERRED) { in mpol_new()
292 return ERR_PTR(-EINVAL); in mpol_new()
294 } else if (mode == MPOL_LOCAL) { in mpol_new()
298 return ERR_PTR(-EINVAL); in mpol_new()
299 mode = MPOL_PREFERRED; in mpol_new()
301 return ERR_PTR(-EINVAL); in mpol_new()
304 return ERR_PTR(-ENOMEM); in mpol_new()
305 atomic_set(&policy->refcnt, 1); in mpol_new()
306 policy->mode = mode; in mpol_new()
307 policy->flags = flags; in mpol_new()
315 if (!atomic_dec_and_test(&p->refcnt)) in __mpol_put()
328 if (pol->flags & MPOL_F_STATIC_NODES) in mpol_rebind_nodemask()
329 nodes_and(tmp, pol->w.user_nodemask, *nodes); in mpol_rebind_nodemask()
330 else if (pol->flags & MPOL_F_RELATIVE_NODES) in mpol_rebind_nodemask()
331 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); in mpol_rebind_nodemask()
333 nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed, in mpol_rebind_nodemask()
335 pol->w.cpuset_mems_allowed = *nodes; in mpol_rebind_nodemask()
341 pol->v.nodes = tmp; in mpol_rebind_nodemask()
349 if (pol->flags & MPOL_F_STATIC_NODES) { in mpol_rebind_preferred()
350 int node = first_node(pol->w.user_nodemask); in mpol_rebind_preferred()
353 pol->v.preferred_node = node; in mpol_rebind_preferred()
354 pol->flags &= ~MPOL_F_LOCAL; in mpol_rebind_preferred()
356 pol->flags |= MPOL_F_LOCAL; in mpol_rebind_preferred()
357 } else if (pol->flags & MPOL_F_RELATIVE_NODES) { in mpol_rebind_preferred()
358 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); in mpol_rebind_preferred()
359 pol->v.preferred_node = first_node(tmp); in mpol_rebind_preferred()
360 } else if (!(pol->flags & MPOL_F_LOCAL)) { in mpol_rebind_preferred()
361 pol->v.preferred_node = node_remap(pol->v.preferred_node, in mpol_rebind_preferred()
362 pol->w.cpuset_mems_allowed, in mpol_rebind_preferred()
364 pol->w.cpuset_mems_allowed = *nodes; in mpol_rebind_preferred()
369 * mpol_rebind_policy - Migrate a policy to a different set of nodes
371 * Per-vma policies are protected by mmap_lock. Allocations using per-task
372 * policies are protected by task->mems_allowed_seq to prevent a premature
377 if (!pol || pol->mode == MPOL_LOCAL) in mpol_rebind_policy()
379 if (!mpol_store_user_nodemask(pol) && !(pol->flags & MPOL_F_LOCAL) && in mpol_rebind_policy()
380 nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) in mpol_rebind_policy()
383 mpol_ops[pol->mode].rebind(pol, newmask); in mpol_rebind_policy()
395 mpol_rebind_policy(tsk->mempolicy, new); in mpol_rebind_task()
401 * Call holding a reference to mm. Takes mm->mmap_lock during call.
409 for (vma = mm->mmap; vma; vma = vma->vm_next) { in mpol_rebind_mm()
411 mpol_rebind_policy(vma->vm_policy, new); in mpol_rebind_mm()
448 * Check if the page's nid is in qp->nmask.
450 * If MPOL_MF_INVERT is set in qp->flags, check if the nid is
451 * in the invert of qp->nmask.
457 unsigned long flags = qp->flags; in queue_pages_required()
459 return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); in queue_pages_required()
464 * 0 - pages are placed on the right node or queued successfully.
465 * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
467 * 2 - THP was split.
468 * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
478 struct queue_pages *qp = walk->private; in queue_pages_pmd()
482 ret = -EIO; in queue_pages_pmd()
488 __split_huge_pmd(walk->vma, pmd, addr, false, NULL); in queue_pages_pmd()
495 flags = qp->flags; in queue_pages_pmd()
498 if (!vma_migratable(walk->vma) || in queue_pages_pmd()
499 migrate_page_add(page, qp->pagelist, flags)) { in queue_pages_pmd()
504 ret = -EIO; in queue_pages_pmd()
516 * 0 - pages are placed on the right node or queued successfully.
517 * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
519 * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
525 struct vm_area_struct *vma = walk->vma; in queue_pages_pte_range()
527 struct queue_pages *qp = walk->private; in queue_pages_pte_range()
528 unsigned long flags = qp->flags; in queue_pages_pte_range()
545 mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); in queue_pages_pte_range()
572 if (migrate_page_add(page, qp->pagelist, flags)) in queue_pages_pte_range()
583 return addr != end ? -EIO : 0; in queue_pages_pte_range()
592 struct queue_pages *qp = walk->private; in queue_pages_hugetlb()
593 unsigned long flags = (qp->flags & MPOL_MF_VALID); in queue_pages_hugetlb()
598 ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); in queue_pages_hugetlb()
611 ret = -EIO; in queue_pages_hugetlb()
615 if (!vma_migratable(walk->vma)) { in queue_pages_hugetlb()
618 * stopped walking current vma. in queue_pages_hugetlb()
629 if (!isolate_huge_page(page, qp->pagelist) && in queue_pages_hugetlb()
677 struct vm_area_struct *vma = walk->vma; in queue_pages_test_walk()
678 struct queue_pages *qp = walk->private; in queue_pages_test_walk()
679 unsigned long endvma = vma->vm_end; in queue_pages_test_walk()
680 unsigned long flags = qp->flags; in queue_pages_test_walk()
683 VM_BUG_ON_VMA((vma->vm_start > start) || (vma->vm_end < end), vma); in queue_pages_test_walk()
685 if (!qp->first) { in queue_pages_test_walk()
686 qp->first = vma; in queue_pages_test_walk()
688 (qp->start < vma->vm_start)) in queue_pages_test_walk()
690 return -EFAULT; in queue_pages_test_walk()
693 ((vma->vm_end < qp->end) && in queue_pages_test_walk()
694 (!vma->vm_next || vma->vm_end < vma->vm_next->vm_start))) in queue_pages_test_walk()
696 return -EFAULT; in queue_pages_test_walk()
699 * Need check MPOL_MF_STRICT to return -EIO if possible in queue_pages_test_walk()
712 !(vma->vm_flags & VM_MIXEDMAP)) in queue_pages_test_walk()
717 /* queue pages from current vma */ in queue_pages_test_walk()
737 * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
739 * 0 - queue pages successfully or no misplaced page.
740 * errno - i.e. misplaced pages with MPOL_MF_STRICT specified (-EIO) or
742 * your accessible address space (-EFAULT)
763 err = -EFAULT; in queue_pages_range()
779 pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", in vma_replace_policy()
780 vma->vm_start, vma->vm_end, vma->vm_pgoff, in vma_replace_policy()
781 vma->vm_ops, vma->vm_file, in vma_replace_policy()
782 vma->vm_ops ? vma->vm_ops->set_policy : NULL); in vma_replace_policy()
789 if (vma->vm_ops && vma->vm_ops->set_policy) { in vma_replace_policy()
790 err = vma->vm_ops->set_policy(vma, new); in vma_replace_policy()
795 old = vma->vm_policy; in vma_replace_policy()
800 WRITE_ONCE(vma->vm_policy, new); in vma_replace_policy()
825 prev = vma->vm_prev; in mbind_range()
826 if (start > vma->vm_start) in mbind_range()
829 for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) { in mbind_range()
830 vmstart = max(start, vma->vm_start); in mbind_range()
831 vmend = min(end, vma->vm_end); in mbind_range()
836 pgoff = vma->vm_pgoff + in mbind_range()
837 ((vmstart - vma->vm_start) >> PAGE_SHIFT); in mbind_range()
838 prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, in mbind_range()
839 vma->anon_vma, vma->vm_file, pgoff, in mbind_range()
840 new_pol, vma->vm_userfaultfd_ctx, in mbind_range()
846 if (vma->vm_start != vmstart) { in mbind_range()
847 err = split_vma(vma->vm_mm, vma, vmstart, 1); in mbind_range()
851 if (vma->vm_end != vmend) { in mbind_range()
852 err = split_vma(vma->vm_mm, vma, vmend, 0); in mbind_range()
867 static long do_set_mempolicy(unsigned short mode, unsigned short flags, in do_set_mempolicy() argument
875 return -ENOMEM; in do_set_mempolicy()
877 new = mpol_new(mode, flags, nodes); in do_set_mempolicy()
888 task_lock(current); in do_set_mempolicy()
889 old = current->mempolicy; in do_set_mempolicy()
890 current->mempolicy = new; in do_set_mempolicy()
891 if (new && new->mode == MPOL_INTERLEAVE) in do_set_mempolicy()
892 current->il_prev = MAX_NUMNODES-1; in do_set_mempolicy()
893 task_unlock(current); in do_set_mempolicy()
912 switch (p->mode) { in get_policy_nodemask()
915 *nodes = p->v.nodes; in get_policy_nodemask()
918 if (!(p->flags & MPOL_F_LOCAL)) in get_policy_nodemask()
919 node_set(p->v.preferred_node, *nodes); in get_policy_nodemask()
948 struct mm_struct *mm = current->mm; in do_get_mempolicy()
950 struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL; in do_get_mempolicy()
954 return -EINVAL; in do_get_mempolicy()
958 return -EINVAL; in do_get_mempolicy()
960 task_lock(current); in do_get_mempolicy()
962 task_unlock(current); in do_get_mempolicy()
976 return -EFAULT; in do_get_mempolicy()
978 if (vma->vm_ops && vma->vm_ops->get_policy) in do_get_mempolicy()
979 pol = vma->vm_ops->get_policy(vma, addr); in do_get_mempolicy()
981 pol = vma->vm_policy; in do_get_mempolicy()
983 return -EINVAL; in do_get_mempolicy()
1003 } else if (pol == current->mempolicy && in do_get_mempolicy()
1004 pol->mode == MPOL_INTERLEAVE) { in do_get_mempolicy()
1005 *policy = next_node_in(current->il_prev, pol->v.nodes); in do_get_mempolicy()
1007 err = -EINVAL; in do_get_mempolicy()
1012 pol->mode; in do_get_mempolicy()
1017 *policy |= (pol->flags & MPOL_MODE_FLAGS); in do_get_mempolicy()
1023 *nmask = pol->w.user_nodemask; in do_get_mempolicy()
1025 task_lock(current); in do_get_mempolicy()
1027 task_unlock(current); in do_get_mempolicy()
1053 list_add_tail(&head->lru, pagelist); in migrate_page_add()
1059 * Non-movable page may reach here. And, there may be in migrate_page_add()
1060 * temporary off LRU pages or non-LRU movable pages. in migrate_page_add()
1063 * should return -EIO for this case too. in migrate_page_add()
1065 return -EIO; in migrate_page_add()
1096 queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, in migrate_to_node()
1176 * Example: [2,3,4] -> [3,4,5] moves everything. in do_migrate_pages()
1177 * [0-7] - > [3,4,5] moves only 0,1,2,6,7. in do_migrate_pages()
1218 * list of pages handed to migrate_pages()--which is how we get here--
1226 vma = find_vma(current->mm, start); in new_page()
1229 if (address != -EFAULT) in new_page()
1231 vma = vma->vm_next; in new_page()
1258 return -EIO; in migrate_page_add()
1264 return -ENOSYS; in do_migrate_pages()
1274 unsigned short mode, unsigned short mode_flags, in do_mbind() argument
1277 struct mm_struct *mm = current->mm; in do_mbind()
1285 return -EINVAL; in do_mbind()
1287 return -EPERM; in do_mbind()
1290 return -EINVAL; in do_mbind()
1292 if (mode == MPOL_DEFAULT) in do_mbind()
1295 len = (len + PAGE_SIZE - 1) & PAGE_MASK; in do_mbind()
1299 return -EINVAL; in do_mbind()
1303 new = mpol_new(mode, mode_flags, nmask); in do_mbind()
1308 new->flags |= MPOL_F_MOF; in do_mbind()
1317 pr_debug("mbind %lx-%lx mode:%d flags:%d nodes:%lx\n", in do_mbind()
1318 start, start + len, mode, mode_flags, in do_mbind()
1333 err = -ENOMEM; in do_mbind()
1361 err = -EIO; in do_mbind()
1389 --maxnode; in get_nodes()
1394 return -EINVAL; in get_nodes()
1400 endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1; in get_nodes()
1414 return -EFAULT; in get_nodes()
1415 if (k == nlongs - 1) { in get_nodes()
1417 return -EINVAL; in get_nodes()
1419 return -EINVAL; in get_nodes()
1428 valid_mask &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1); in get_nodes()
1429 if (get_user(t, nmask + nlongs - 1)) in get_nodes()
1430 return -EFAULT; in get_nodes()
1432 return -EINVAL; in get_nodes()
1436 return -EFAULT; in get_nodes()
1437 nodes_addr(*nodes)[nlongs-1] &= endmask; in get_nodes()
1445 unsigned long copy = ALIGN(maxnode-1, 64) / 8; in copy_nodes_to_user()
1450 return -EINVAL; in copy_nodes_to_user()
1451 if (clear_user((char __user *)mask + nbytes, copy - nbytes)) in copy_nodes_to_user()
1452 return -EFAULT; in copy_nodes_to_user()
1455 return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; in copy_nodes_to_user()
1459 unsigned long mode, const unsigned long __user *nmask, in kernel_mbind() argument
1467 mode_flags = mode & MPOL_MODE_FLAGS; in kernel_mbind()
1468 mode &= ~MPOL_MODE_FLAGS; in kernel_mbind()
1469 if (mode >= MPOL_MAX) in kernel_mbind()
1470 return -EINVAL; in kernel_mbind()
1473 return -EINVAL; in kernel_mbind()
1477 return do_mbind(start, len, mode, mode_flags, &nodes, flags); in kernel_mbind()
1481 unsigned long, mode, const unsigned long __user *, nmask, in SYSCALL_DEFINE6() argument
1484 return kernel_mbind(start, len, mode, nmask, maxnode, flags); in SYSCALL_DEFINE6()
1488 static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, in kernel_set_mempolicy() argument
1495 flags = mode & MPOL_MODE_FLAGS; in kernel_set_mempolicy()
1496 mode &= ~MPOL_MODE_FLAGS; in kernel_set_mempolicy()
1497 if ((unsigned int)mode >= MPOL_MAX) in kernel_set_mempolicy()
1498 return -EINVAL; in kernel_set_mempolicy()
1500 return -EINVAL; in kernel_set_mempolicy()
1504 return do_set_mempolicy(mode, flags, &nodes); in kernel_set_mempolicy()
1507 SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, in SYSCALL_DEFINE3() argument
1510 return kernel_set_mempolicy(mode, nmask, maxnode); in SYSCALL_DEFINE3()
1526 return -ENOMEM; in kernel_migrate_pages()
1528 old = &scratch->mask1; in kernel_migrate_pages()
1529 new = &scratch->mask2; in kernel_migrate_pages()
1541 task = pid ? find_task_by_vpid(pid) : current; in kernel_migrate_pages()
1544 err = -ESRCH; in kernel_migrate_pages()
1549 err = -EINVAL; in kernel_migrate_pages()
1557 err = -EPERM; in kernel_migrate_pages()
1565 err = -EPERM; in kernel_migrate_pages()
1569 task_nodes = cpuset_mems_allowed(current); in kernel_migrate_pages()
1582 err = -EINVAL; in kernel_migrate_pages()
1621 return -EINVAL; in kernel_get_mempolicy()
1631 return -EFAULT; in kernel_get_mempolicy()
1658 nr_bits = min_t(unsigned long, maxnode-1, nr_node_ids); in COMPAT_SYSCALL_DEFINE5()
1671 err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8); in COMPAT_SYSCALL_DEFINE5()
1678 COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, in COMPAT_SYSCALL_DEFINE3() argument
1685 nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES); in COMPAT_SYSCALL_DEFINE3()
1690 return -EFAULT; in COMPAT_SYSCALL_DEFINE3()
1693 return -EFAULT; in COMPAT_SYSCALL_DEFINE3()
1696 return kernel_set_mempolicy(mode, nm, nr_bits+1); in COMPAT_SYSCALL_DEFINE3()
1700 compat_ulong_t, mode, compat_ulong_t __user *, nmask, in COMPAT_SYSCALL_DEFINE6() argument
1707 nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES); in COMPAT_SYSCALL_DEFINE6()
1712 return -EFAULT; in COMPAT_SYSCALL_DEFINE6()
1715 return -EFAULT; in COMPAT_SYSCALL_DEFINE6()
1718 return kernel_mbind(start, len, mode, nm, nr_bits+1, flags); in COMPAT_SYSCALL_DEFINE6()
1732 nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES); in COMPAT_SYSCALL_DEFINE4()
1736 return -EFAULT; in COMPAT_SYSCALL_DEFINE4()
1741 return -EFAULT; in COMPAT_SYSCALL_DEFINE4()
1745 return -EFAULT; in COMPAT_SYSCALL_DEFINE4()
1749 return -EFAULT; in COMPAT_SYSCALL_DEFINE4()
1758 if (vma->vm_flags & (VM_IO | VM_PFNMAP)) in vma_migratable()
1777 if (vma->vm_file && in vma_migratable()
1778 gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) in vma_migratable()
1792 if (vma->vm_ops && vma->vm_ops->get_policy) in __get_vma_policy()
1793 return vma->vm_ops->get_policy(vma, addr); in __get_vma_policy()
1799 pol = READ_ONCE(vma->vm_policy); in __get_vma_policy()
1803 * a pseudo vma whose vma->vm_ops=NULL. Take a reference in __get_vma_policy()
1820 * Falls back to current->mempolicy or system default policy, as necessary.
1822 * count--added by the get_policy() vm_op, as appropriate--to protect against
1832 pol = get_task_policy(current); in get_vma_policy()
1841 if (vma->vm_ops && vma->vm_ops->get_policy) { in vma_policy_mof()
1844 pol = vma->vm_ops->get_policy(vma, vma->vm_start); in vma_policy_mof()
1845 if (pol && (pol->flags & MPOL_F_MOF)) in vma_policy_mof()
1852 pol = vma->vm_policy; in vma_policy_mof()
1854 pol = get_task_policy(current); in vma_policy_mof()
1856 return pol->flags & MPOL_F_MOF; in vma_policy_mof()
1866 * if policy->v.nodes has movable memory only, in apply_policy_zone()
1869 * policy->v.nodes is intersect with node_states[N_MEMORY]. in apply_policy_zone()
1871 * policy->v.nodes has movable memory only. in apply_policy_zone()
1873 if (!nodes_intersects(policy->v.nodes, node_states[N_HIGH_MEMORY])) in apply_policy_zone()
1886 if (unlikely(policy->mode == MPOL_BIND) && in policy_nodemask()
1888 cpuset_nodemask_valid_mems_allowed(&policy->v.nodes)) in policy_nodemask()
1889 return &policy->v.nodes; in policy_nodemask()
1897 if (policy->mode == MPOL_PREFERRED && !(policy->flags & MPOL_F_LOCAL)) in policy_node()
1898 nd = policy->v.preferred_node; in policy_node()
1905 WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE)); in policy_node()
1915 struct task_struct *me = current; in interleave_nodes()
1917 next = next_node_in(me->il_prev, policy->v.nodes); in interleave_nodes()
1919 me->il_prev = next; in interleave_nodes()
1935 policy = current->mempolicy; in mempolicy_slab_node()
1936 if (!policy || policy->flags & MPOL_F_LOCAL) in mempolicy_slab_node()
1939 switch (policy->mode) { in mempolicy_slab_node()
1944 return policy->v.preferred_node; in mempolicy_slab_node()
1958 zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK]; in mempolicy_slab_node()
1960 &policy->v.nodes); in mempolicy_slab_node()
1961 return z->zone ? zone_to_nid(z->zone) : node; in mempolicy_slab_node()
1971 * node in pol->v.nodes (starting from n=0), wrapping around if n exceeds the
1976 unsigned nnodes = nodes_weight(pol->v.nodes); in offset_il_node()
1984 nid = first_node(pol->v.nodes); in offset_il_node()
1986 nid = next_node(nid, pol->v.nodes); in offset_il_node()
1999 * shift and PAGE_SHIFT, so the bit-shift is safe. in interleave_nid()
2005 off = vma->vm_pgoff >> (shift - PAGE_SHIFT); in interleave_nid()
2006 off += (addr - vma->vm_start) >> shift; in interleave_nid()
2036 if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) { in huge_node()
2041 if ((*mpol)->mode == MPOL_BIND) in huge_node()
2042 *nodemask = &(*mpol)->v.nodes; in huge_node()
2050 * If the current task's mempolicy is "default" [NULL], return 'false'
2055 * of non-default mempolicy.
2058 * because the current task is examining it's own mempolicy and a task's
2068 if (!(mask && current->mempolicy)) in init_nodemask_of_mempolicy()
2071 task_lock(current); in init_nodemask_of_mempolicy()
2072 mempolicy = current->mempolicy; in init_nodemask_of_mempolicy()
2073 switch (mempolicy->mode) { in init_nodemask_of_mempolicy()
2075 if (mempolicy->flags & MPOL_F_LOCAL) in init_nodemask_of_mempolicy()
2078 nid = mempolicy->v.preferred_node; in init_nodemask_of_mempolicy()
2084 *mask = mempolicy->v.nodes; in init_nodemask_of_mempolicy()
2090 task_unlock(current); in init_nodemask_of_mempolicy()
2115 mempolicy = tsk->mempolicy; in mempolicy_nodemask_intersects()
2119 switch (mempolicy->mode) { in mempolicy_nodemask_intersects()
2130 ret = nodes_intersects(mempolicy->v.nodes, *mask); in mempolicy_nodemask_intersects()
2160 * alloc_pages_vma - Allocate a page for a VMA.
2176 * a NUMA policy associated with the VMA or the current process.
2177 * When VMA is not NULL caller must read-lock the mmap_lock of the
2193 if (pol->mode == MPOL_INTERLEAVE) { in alloc_pages_vma()
2206 * For hugepage allocation and non-interleave policy which in alloc_pages_vma()
2207 * allows the current node (or other explicitly preferred in alloc_pages_vma()
2208 * node) we only try to allocate from the current/preferred in alloc_pages_vma()
2212 * If the policy is interleave, or does not allow the current in alloc_pages_vma()
2215 if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL)) in alloc_pages_vma()
2216 hpage_node = pol->v.preferred_node; in alloc_pages_vma()
2252 * alloc_pages_current - Allocate pages.
2263 * interrupt context and apply the current process NUMA policy.
2272 pol = get_task_policy(current); in alloc_pages_current()
2275 * No reference counting needed for current->mempolicy in alloc_pages_current()
2278 if (pol->mode == MPOL_INTERLEAVE) in alloc_pages_current()
2295 dst->vm_policy = pol; in vma_dup_policy()
2300 * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
2306 * current's mempolicy may be rebinded by the other task(the task that changes
2307 * cpuset's mems), so we needn't do rebind work for current task.
2316 return ERR_PTR(-ENOMEM); in __mpol_dup()
2319 if (old == current->mempolicy) { in __mpol_dup()
2320 task_lock(current); in __mpol_dup()
2322 task_unlock(current); in __mpol_dup()
2327 nodemask_t mems = cpuset_mems_allowed(current); in __mpol_dup()
2330 atomic_set(&new->refcnt, 1); in __mpol_dup()
2339 if (a->mode != b->mode) in __mpol_equal()
2341 if (a->flags != b->flags) in __mpol_equal()
2344 if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) in __mpol_equal()
2347 switch (a->mode) { in __mpol_equal()
2350 return !!nodes_equal(a->v.nodes, b->v.nodes); in __mpol_equal()
2352 /* a's ->flags is the same as b's */ in __mpol_equal()
2353 if (a->flags & MPOL_F_LOCAL) in __mpol_equal()
2355 return a->v.preferred_node == b->v.preferred_node; in __mpol_equal()
2366 * The policies are kept in Red-Black tree linked from the inode.
2367 * They are protected by the sp->lock rwlock, which should be held
2372 * lookup first element intersecting start-end. Caller holds sp->lock for
2378 struct rb_node *n = sp->root.rb_node; in sp_lookup()
2383 if (start >= p->end) in sp_lookup()
2384 n = n->rb_right; in sp_lookup()
2385 else if (end <= p->start) in sp_lookup()
2386 n = n->rb_left; in sp_lookup()
2398 if (w->end <= start) in sp_lookup()
2406 * Insert a new shared policy into the list. Caller holds sp->lock for
2411 struct rb_node **p = &sp->root.rb_node; in sp_insert()
2418 if (new->start < nd->start) in sp_insert()
2419 p = &(*p)->rb_left; in sp_insert()
2420 else if (new->end > nd->end) in sp_insert()
2421 p = &(*p)->rb_right; in sp_insert()
2425 rb_link_node(&new->nd, parent, p); in sp_insert()
2426 rb_insert_color(&new->nd, &sp->root); in sp_insert()
2427 pr_debug("inserting %lx-%lx: %d\n", new->start, new->end, in sp_insert()
2428 new->policy ? new->policy->mode : 0); in sp_insert()
2438 if (!sp->root.rb_node) in mpol_shared_policy_lookup()
2440 read_lock(&sp->lock); in mpol_shared_policy_lookup()
2443 mpol_get(sn->policy); in mpol_shared_policy_lookup()
2444 pol = sn->policy; in mpol_shared_policy_lookup()
2446 read_unlock(&sp->lock); in mpol_shared_policy_lookup()
2452 mpol_put(n->policy); in sp_free()
2457 * mpol_misplaced - check whether current page node is valid in policy
2463 * Lookup current policy node id for vma,addr and "compare to" page's
2467 * -1 - not misplaced, page is in the right node
2468 * node - node id where the page should be
2482 int ret = -1; in mpol_misplaced()
2485 if (!(pol->flags & MPOL_F_MOF)) in mpol_misplaced()
2488 switch (pol->mode) { in mpol_misplaced()
2490 pgoff = vma->vm_pgoff; in mpol_misplaced()
2491 pgoff += (addr - vma->vm_start) >> PAGE_SHIFT; in mpol_misplaced()
2496 if (pol->flags & MPOL_F_LOCAL) in mpol_misplaced()
2499 polnid = pol->v.preferred_node; in mpol_misplaced()
2506 * use current page if in policy nodemask, in mpol_misplaced()
2508 * If no allowed nodes, use current [!misplaced]. in mpol_misplaced()
2510 if (node_isset(curnid, pol->v.nodes)) in mpol_misplaced()
2515 &pol->v.nodes); in mpol_misplaced()
2516 polnid = zone_to_nid(z->zone); in mpol_misplaced()
2524 if (pol->flags & MPOL_F_MORON) { in mpol_misplaced()
2527 if (!should_numa_migrate_memory(current, page, curnid, thiscpu)) in mpol_misplaced()
2540 * Drop the (possibly final) reference to task->mempolicy. It needs to be
2541 * dropped after task->mempolicy is set to NULL so that any allocation done as
2550 pol = task->mempolicy; in mpol_put_task_policy()
2551 task->mempolicy = NULL; in mpol_put_task_policy()
2558 pr_debug("deleting %lx-l%lx\n", n->start, n->end); in sp_delete()
2559 rb_erase(&n->nd, &sp->root); in sp_delete()
2566 node->start = start; in sp_node_init()
2567 node->end = end; in sp_node_init()
2568 node->policy = pol; in sp_node_init()
2586 newpol->flags |= MPOL_F_SHARED; in sp_alloc()
2602 write_lock(&sp->lock); in shared_policy_replace()
2605 while (n && n->start < end) { in shared_policy_replace()
2606 struct rb_node *next = rb_next(&n->nd); in shared_policy_replace()
2607 if (n->start >= start) { in shared_policy_replace()
2608 if (n->end <= end) in shared_policy_replace()
2611 n->start = end; in shared_policy_replace()
2614 if (n->end > end) { in shared_policy_replace()
2618 *mpol_new = *n->policy; in shared_policy_replace()
2619 atomic_set(&mpol_new->refcnt, 1); in shared_policy_replace()
2620 sp_node_init(n_new, end, n->end, mpol_new); in shared_policy_replace()
2621 n->end = start; in shared_policy_replace()
2627 n->end = start; in shared_policy_replace()
2635 write_unlock(&sp->lock); in shared_policy_replace()
2647 write_unlock(&sp->lock); in shared_policy_replace()
2648 ret = -ENOMEM; in shared_policy_replace()
2655 atomic_set(&mpol_new->refcnt, 1); in shared_policy_replace()
2660 * mpol_shared_policy_init - initialize shared policy for inode
2664 * Install non-NULL @mpol in inode's shared policy rb-tree.
2665 * On entry, the current task has a reference on a non-NULL @mpol.
2673 sp->root = RB_ROOT; /* empty tree == default mempolicy */ in mpol_shared_policy_init()
2674 rwlock_init(&sp->lock); in mpol_shared_policy_init()
2684 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); in mpol_shared_policy_init()
2688 task_lock(current); in mpol_shared_policy_init()
2689 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); in mpol_shared_policy_init()
2690 task_unlock(current); in mpol_shared_policy_init()
2694 /* Create pseudo-vma that contains just the policy */ in mpol_shared_policy_init()
2716 vma->vm_pgoff, in mpol_set_shared_policy()
2717 sz, npol ? npol->mode : -1, in mpol_set_shared_policy()
2718 npol ? npol->flags : -1, in mpol_set_shared_policy()
2719 npol ? nodes_addr(npol->v.nodes)[0] : NUMA_NO_NODE); in mpol_set_shared_policy()
2722 new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol); in mpol_set_shared_policy()
2724 return -ENOMEM; in mpol_set_shared_policy()
2726 err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new); in mpol_set_shared_policy()
2738 if (!p->root.rb_node) in mpol_free_shared_policy()
2740 write_lock(&p->lock); in mpol_free_shared_policy()
2741 next = rb_first(&p->root); in mpol_free_shared_policy()
2744 next = rb_next(&n->nd); in mpol_free_shared_policy()
2747 write_unlock(&p->lock); in mpol_free_shared_policy()
2760 /* Parsed by setup_numabalancing. override == 1 enables, -1 disables */ in check_numabalancing_enable()
2781 numabalancing_override = -1; in setup_numabalancing()
2815 .mode = MPOL_PREFERRED, in numa_policy_init()
2851 /* Reset policy of current process to default */
2876 * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
2881 * <mode>[=<flags>][:<nodelist>]
2892 int err = 1, mode; in mpol_parse_str() local
2895 *flags++ = '\0'; /* terminate mode string */ in mpol_parse_str()
2898 /* NUL-terminate mode or flags string */ in mpol_parse_str()
2907 mode = match_string(policy_modes, MPOL_MAX, str); in mpol_parse_str()
2908 if (mode < 0) in mpol_parse_str()
2911 switch (mode) { in mpol_parse_str()
2941 mode = MPOL_PREFERRED; in mpol_parse_str()
2962 * mode flags. in mpol_parse_str()
2972 new = mpol_new(mode, mode_flags, &nodes); in mpol_parse_str()
2980 if (mode != MPOL_PREFERRED) in mpol_parse_str()
2981 new->v.nodes = nodes; in mpol_parse_str()
2983 new->v.preferred_node = first_node(nodes); in mpol_parse_str()
2985 new->flags |= MPOL_F_LOCAL; in mpol_parse_str()
2991 new->w.user_nodemask = nodes; in mpol_parse_str()
2998 *--nodelist = ':'; in mpol_parse_str()
3000 *--flags = '='; in mpol_parse_str()
3008 * mpol_to_str - format a mempolicy structure for printing
3014 * Recommend a @maxlen of at least 32 for the longest mode, "interleave", the
3021 unsigned short mode = MPOL_DEFAULT; in mpol_to_str() local
3024 if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) { in mpol_to_str()
3025 mode = pol->mode; in mpol_to_str()
3026 flags = pol->flags; in mpol_to_str()
3029 switch (mode) { in mpol_to_str()
3034 mode = MPOL_LOCAL; in mpol_to_str()
3036 node_set(pol->v.preferred_node, nodes); in mpol_to_str()
3040 nodes = pol->v.nodes; in mpol_to_str()
3048 p += snprintf(p, maxlen, "%s", policy_modes[mode]); in mpol_to_str()
3051 p += snprintf(p, buffer + maxlen - p, "="); in mpol_to_str()
3057 p += snprintf(p, buffer + maxlen - p, "static"); in mpol_to_str()
3059 p += snprintf(p, buffer + maxlen - p, "relative"); in mpol_to_str()
3063 p += scnprintf(p, buffer + maxlen - p, ":%*pbl", in mpol_to_str()