Lines Matching +full:charge +full:- +full:current +full:- +full:limit +full:- +full:mapping

1 // SPDX-License-Identifier: GPL-2.0-only
9 * 'fork.c' contains the help-routines for the 'fork' system call
83 #include <linux/posix-timers.h>
84 #include <linux/user-return-notifier.h>
132 static int max_threads; /* tunable limit on nr_threads */
212 vfree(vm_stack->addr); in free_vm_stack_cache()
235 kasan_unpoison_range(s->addr, THREAD_SIZE); in alloc_thread_stack_node()
238 memset(s->addr, 0, THREAD_SIZE); in alloc_thread_stack_node()
240 tsk->stack_vm_area = s; in alloc_thread_stack_node()
241 tsk->stack = s->addr; in alloc_thread_stack_node()
242 return s->addr; in alloc_thread_stack_node()
262 tsk->stack_vm_area = find_vm_area(stack); in alloc_thread_stack_node()
263 tsk->stack = stack; in alloc_thread_stack_node()
271 tsk->stack = kasan_reset_tag(page_address(page)); in alloc_thread_stack_node()
272 return tsk->stack; in alloc_thread_stack_node()
287 memcg_kmem_uncharge_page(vm->pages[i], 0); in free_thread_stack()
291 NULL, tsk->stack_vm_area) != NULL) in free_thread_stack()
297 vfree_atomic(tsk->stack); in free_thread_stack()
302 __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); in free_thread_stack()
313 tsk->stack = stack; in alloc_thread_stack_node()
319 kmem_cache_free(thread_stack_cache, tsk->stack); in free_thread_stack()
332 /* SLAB cache for signal_struct structures (tsk->signal) */
335 /* SLAB cache for sighand_struct structures (tsk->sighand) */
338 /* SLAB cache for files_struct structures (tsk->files) */
341 /* SLAB cache for fs_struct structures (tsk->fs) */
347 /* SLAB cache for mm_struct structures (tsk->mm) */
365 ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); in vm_area_dup()
366 ASSERT_EXCLUSIVE_WRITER(orig->vm_file); in vm_area_dup()
368 * orig->shared.rb may be modified concurrently, but the clone in vm_area_dup()
373 new->vm_next = new->vm_prev = NULL; in vm_area_dup()
391 mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB, in account_kernel_stack()
409 BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); in memcg_charge_kernel_stack()
413 * If memcg_kmem_charge_page() fails, page->mem_cgroup in memcg_charge_kernel_stack()
417 ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, in memcg_charge_kernel_stack()
429 if (WARN_ON(tsk->state != TASK_DEAD)) in release_task_stack()
432 account_kernel_stack(tsk, -1); in release_task_stack()
434 tsk->stack = NULL; in release_task_stack()
436 tsk->stack_vm_area = NULL; in release_task_stack()
443 if (refcount_dec_and_test(&tsk->stack_refcount)) in put_task_stack()
466 WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0); in free_task()
471 if (tsk->flags & PF_KTHREAD) in free_task()
484 unsigned long charge; in dup_mmap() local
489 retval = -EINTR; in dup_mmap()
495 * Not linked in yet - no deadlock potential: in dup_mmap()
500 RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); in dup_mmap()
502 mm->total_vm = oldmm->total_vm; in dup_mmap()
503 mm->data_vm = oldmm->data_vm; in dup_mmap()
504 mm->exec_vm = oldmm->exec_vm; in dup_mmap()
505 mm->stack_vm = oldmm->stack_vm; in dup_mmap()
507 rb_link = &mm->mm_rb.rb_node; in dup_mmap()
509 pprev = &mm->mmap; in dup_mmap()
518 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { in dup_mmap()
521 if (mpnt->vm_flags & VM_DONTCOPY) { in dup_mmap()
522 vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); in dup_mmap()
525 charge = 0; in dup_mmap()
527 * Don't duplicate many vmas if we've been oom-killed (for in dup_mmap()
530 if (fatal_signal_pending(current)) { in dup_mmap()
531 retval = -EINTR; in dup_mmap()
534 if (mpnt->vm_flags & VM_ACCOUNT) { in dup_mmap()
539 charge = len; in dup_mmap()
547 tmp->vm_mm = mm; in dup_mmap()
551 if (tmp->vm_flags & VM_WIPEONFORK) { in dup_mmap()
555 * copy page for current vma. in dup_mmap()
557 tmp->anon_vma = NULL; in dup_mmap()
560 tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); in dup_mmap()
561 file = tmp->vm_file; in dup_mmap()
564 struct address_space *mapping = file->f_mapping; in dup_mmap() local
567 if (tmp->vm_flags & VM_DENYWRITE) in dup_mmap()
569 i_mmap_lock_write(mapping); in dup_mmap()
570 if (tmp->vm_flags & VM_SHARED) in dup_mmap()
571 mapping_allow_writable(mapping); in dup_mmap()
572 flush_dcache_mmap_lock(mapping); in dup_mmap()
575 &mapping->i_mmap); in dup_mmap()
576 flush_dcache_mmap_unlock(mapping); in dup_mmap()
577 i_mmap_unlock_write(mapping); in dup_mmap()
581 * Clear hugetlb-related page reserves for children. This only in dup_mmap()
583 * are not guaranteed to succeed, even if read-only in dup_mmap()
592 pprev = &tmp->vm_next; in dup_mmap()
593 tmp->vm_prev = prev; in dup_mmap()
597 rb_link = &tmp->vm_rb.rb_right; in dup_mmap()
598 rb_parent = &tmp->vm_rb; in dup_mmap()
600 mm->map_count++; in dup_mmap()
601 if (!(tmp->vm_flags & VM_WIPEONFORK)) { in dup_mmap()
614 if (tmp->vm_ops && tmp->vm_ops->open) in dup_mmap()
615 tmp->vm_ops->open(tmp); in dup_mmap()
633 for (; last; last = last->vm_prev) { in dup_mmap()
634 if (last->vm_flags & VM_DONTCOPY) in dup_mmap()
636 if (!(last->vm_flags & VM_WIPEONFORK)) in dup_mmap()
651 retval = -ENOMEM; in dup_mmap()
652 vm_unacct_memory(charge); in dup_mmap()
658 mm->pgd = pgd_alloc(mm); in mm_alloc_pgd()
659 if (unlikely(!mm->pgd)) in mm_alloc_pgd()
660 return -ENOMEM; in mm_alloc_pgd()
666 pgd_free(mm, mm->pgd); in mm_free_pgd()
672 RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); in dup_mmap()
688 long x = atomic_long_read(&mm->rss_stat.count[i]); in check_mm()
691 pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", in check_mm()
696 pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", in check_mm()
700 VM_BUG_ON_MM(mm->pmd_huge_pte, mm); in check_mm()
715 WARN_ON_ONCE(mm == current->mm); in __mmdrop()
716 WARN_ON_ONCE(mm == current->active_mm); in __mmdrop()
721 put_user_ns(mm->user_ns); in __mmdrop()
736 if (unlikely(atomic_dec_and_test(&mm->mm_count))) { in mmdrop_async()
737 INIT_WORK(&mm->async_put_work, mmdrop_async_fn); in mmdrop_async()
738 schedule_work(&mm->async_put_work); in mmdrop_async()
750 if (sig->oom_mm) in free_signal_struct()
751 mmdrop_async(sig->oom_mm); in free_signal_struct()
757 if (refcount_dec_and_test(&sig->sigcnt)) in put_signal_struct()
763 WARN_ON(!tsk->exit_state); in __put_task_struct()
764 WARN_ON(refcount_read(&tsk->usage)); in __put_task_struct()
765 WARN_ON(tsk == current); in __put_task_struct()
773 put_signal_struct(tsk->signal); in __put_task_struct()
818 * Handle zero-sized whitelist or empty thread_struct, otherwise in task_struct_whitelist()
851 init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; in fork_init()
852 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; in fork_init()
853 init_task.signal->rlim[RLIMIT_SIGPENDING] = in fork_init()
854 init_task.signal->rlim[RLIMIT_NPROC]; in fork_init()
911 * arch_dup_task_struct() clobbers the stack-related fields. Make in dup_task_struct()
912 * sure they're properly initialized before using any stack-related in dup_task_struct()
915 tsk->stack = stack; in dup_task_struct()
917 tsk->stack_vm_area = stack_vm_area; in dup_task_struct()
920 refcount_set(&tsk->stack_refcount, 1); in dup_task_struct()
937 tsk->seccomp.filter = NULL; in dup_task_struct()
946 tsk->stack_canary = get_random_canary(); in dup_task_struct()
948 if (orig->cpus_ptr == &orig->cpus_mask) in dup_task_struct()
949 tsk->cpus_ptr = &tsk->cpus_mask; in dup_task_struct()
955 refcount_set(&tsk->rcu_users, 2); in dup_task_struct()
957 refcount_set(&tsk->usage, 1); in dup_task_struct()
959 tsk->btrace_seq = 0; in dup_task_struct()
961 tsk->splice_pipe = NULL; in dup_task_struct()
962 tsk->task_frag.page = NULL; in dup_task_struct()
963 tsk->wake_q.next = NULL; in dup_task_struct()
964 tsk->pf_io_worker = NULL; in dup_task_struct()
971 tsk->fail_nth = 0; in dup_task_struct()
975 tsk->throttle_queue = NULL; in dup_task_struct()
976 tsk->use_memdelay = 0; in dup_task_struct()
980 tsk->active_memcg = NULL; in dup_task_struct()
1015 spin_lock_init(&mm->ioctx_lock); in mm_init_aio()
1016 mm->ioctx_table = NULL; in mm_init_aio()
1024 if (mm->owner == p) in mm_clear_owner()
1025 WRITE_ONCE(mm->owner, NULL); in mm_clear_owner()
1032 mm->owner = p; in mm_init_owner()
1039 mm->pasid = INIT_PASID; in mm_init_pasid()
1046 mm->uprobes_state.xol_area = NULL; in mm_init_uprobes_state()
1053 mm->mmap = NULL; in mm_init()
1054 mm->mm_rb = RB_ROOT; in mm_init()
1055 mm->vmacache_seqnum = 0; in mm_init()
1057 rwlock_init(&mm->mm_rb_lock); in mm_init()
1059 atomic_set(&mm->mm_users, 1); in mm_init()
1060 atomic_set(&mm->mm_count, 1); in mm_init()
1061 seqcount_init(&mm->write_protect_seq); in mm_init()
1063 INIT_LIST_HEAD(&mm->mmlist); in mm_init()
1064 mm->core_state = NULL; in mm_init()
1066 mm->map_count = 0; in mm_init()
1067 mm->locked_vm = 0; in mm_init()
1068 atomic_set(&mm->has_pinned, 0); in mm_init()
1069 atomic64_set(&mm->pinned_vm, 0); in mm_init()
1070 memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); in mm_init()
1071 spin_lock_init(&mm->page_table_lock); in mm_init()
1072 spin_lock_init(&mm->arg_lock); in mm_init()
1077 RCU_INIT_POINTER(mm->exe_file, NULL); in mm_init()
1082 mm->pmd_huge_pte = NULL; in mm_init()
1087 if (current->mm) { in mm_init()
1088 mm->flags = current->mm->flags & MMF_INIT_MASK; in mm_init()
1089 mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; in mm_init()
1091 mm->flags = default_dump_filter; in mm_init()
1092 mm->def_flags = 0; in mm_init()
1101 mm->user_ns = get_user_ns(user_ns); in mm_init()
1123 return mm_init(mm, current, current_user_ns()); in mm_alloc()
1128 VM_BUG_ON(atomic_read(&mm->mm_users)); in __mmput()
1137 if (!list_empty(&mm->mmlist)) { in __mmput()
1139 list_del(&mm->mmlist); in __mmput()
1142 if (mm->binfmt) in __mmput()
1143 module_put(mm->binfmt->module); in __mmput()
1154 if (atomic_dec_and_test(&mm->mm_users)) { in mmput()
1172 if (atomic_dec_and_test(&mm->mm_users)) { in mmput_async()
1173 INIT_WORK(&mm->async_put_work, mmput_async_fn); in mmput_async()
1174 schedule_work(&mm->async_put_work); in mmput_async()
1181 * set_mm_exe_file - change a reference to the mm's executable file
1188 * mm->exe_file, but does so without using set_mm_exe_file() in order
1198 * this mm -- see comment above for justification. in set_mm_exe_file()
1200 old_exe_file = rcu_dereference_raw(mm->exe_file); in set_mm_exe_file()
1204 rcu_assign_pointer(mm->exe_file, new_exe_file); in set_mm_exe_file()
1210 * get_mm_exe_file - acquire a reference to the mm's executable file
1220 exe_file = rcu_dereference(mm->exe_file); in get_mm_exe_file()
1229 * get_task_exe_file - acquire a reference to the task's executable file
1241 mm = task->mm; in get_task_exe_file()
1243 if (!(task->flags & PF_KTHREAD)) in get_task_exe_file()
1252 * get_task_mm - acquire a reference to the task's mm
1265 mm = task->mm; in get_task_mm()
1267 if (task->flags & PF_KTHREAD) in get_task_mm()
1282 err = down_read_killable(&task->signal->exec_update_lock); in mm_access()
1287 if (mm && mm != current->mm && in mm_access()
1290 mm = ERR_PTR(-EACCES); in mm_access()
1292 up_read(&task->signal->exec_update_lock); in mm_access()
1302 vfork = tsk->vfork_done; in complete_vfork_done()
1304 tsk->vfork_done = NULL; in complete_vfork_done()
1323 child->vfork_done = NULL; in wait_for_vfork_done()
1336 * from the current process.
1356 if (tsk->clear_child_tid) { in mm_release()
1357 if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && in mm_release()
1358 atomic_read(&mm->mm_users) > 1) { in mm_release()
1360 * We don't check the error code - if userspace has in mm_release()
1363 put_user(0, tsk->clear_child_tid); in mm_release()
1364 do_futex(tsk->clear_child_tid, FUTEX_WAKE, in mm_release()
1367 tsk->clear_child_tid = NULL; in mm_release()
1374 if (tsk->vfork_done) in mm_release()
1391 * dup_mm() - duplicates an existing mm structure
1412 if (!mm_init(mm, tsk, mm->user_ns)) in dup_mm()
1419 mm->hiwater_rss = get_mm_rss(mm); in dup_mm()
1420 mm->hiwater_vm = mm->total_vm; in dup_mm()
1422 if (mm->binfmt && !try_module_get(mm->binfmt->module)) in dup_mm()
1429 mm->binfmt = NULL; in dup_mm()
1442 tsk->min_flt = tsk->maj_flt = 0; in copy_mm()
1443 tsk->nvcsw = tsk->nivcsw = 0; in copy_mm()
1445 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; in copy_mm()
1446 tsk->last_switch_time = 0; in copy_mm()
1449 tsk->mm = NULL; in copy_mm()
1450 tsk->active_mm = NULL; in copy_mm()
1457 oldmm = current->mm; in copy_mm()
1470 retval = -ENOMEM; in copy_mm()
1471 mm = dup_mm(tsk, current->mm); in copy_mm()
1476 tsk->mm = mm; in copy_mm()
1477 tsk->active_mm = mm; in copy_mm()
1486 struct fs_struct *fs = current->fs; in copy_fs()
1488 /* tsk->fs is already what we want */ in copy_fs()
1489 spin_lock(&fs->lock); in copy_fs()
1490 if (fs->in_exec) { in copy_fs()
1491 spin_unlock(&fs->lock); in copy_fs()
1492 return -EAGAIN; in copy_fs()
1494 fs->users++; in copy_fs()
1495 spin_unlock(&fs->lock); in copy_fs()
1498 tsk->fs = copy_fs_struct(fs); in copy_fs()
1499 if (!tsk->fs) in copy_fs()
1500 return -ENOMEM; in copy_fs()
1512 oldf = current->files; in copy_files()
1517 atomic_inc(&oldf->count); in copy_files()
1525 tsk->files = newf; in copy_files()
1534 struct io_context *ioc = current->io_context; in copy_io()
1544 tsk->io_context = ioc; in copy_io()
1545 } else if (ioprio_valid(ioc->ioprio)) { in copy_io()
1548 return -ENOMEM; in copy_io()
1550 new_ioc->ioprio = ioc->ioprio; in copy_io()
1562 refcount_inc(&current->sighand->count); in copy_sighand()
1566 RCU_INIT_POINTER(tsk->sighand, sig); in copy_sighand()
1568 return -ENOMEM; in copy_sighand()
1570 refcount_set(&sig->count, 1); in copy_sighand()
1571 spin_lock_irq(&current->sighand->siglock); in copy_sighand()
1572 memcpy(sig->action, current->sighand->action, sizeof(sig->action)); in copy_sighand()
1573 spin_unlock_irq(&current->sighand->siglock); in copy_sighand()
1584 if (refcount_dec_and_test(&sighand->count)) { in __cleanup_sighand()
1599 struct posix_cputimers *pct = &sig->posix_cputimers; in posix_cpu_timers_init_group()
1602 cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); in posix_cpu_timers_init_group()
1614 tsk->signal = sig; in copy_signal()
1616 return -ENOMEM; in copy_signal()
1618 sig->nr_threads = 1; in copy_signal()
1619 atomic_set(&sig->live, 1); in copy_signal()
1620 refcount_set(&sig->sigcnt, 1); in copy_signal()
1623 sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); in copy_signal()
1624 tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); in copy_signal()
1626 init_waitqueue_head(&sig->wait_chldexit); in copy_signal()
1627 sig->curr_target = tsk; in copy_signal()
1628 init_sigpending(&sig->shared_pending); in copy_signal()
1629 INIT_HLIST_HEAD(&sig->multiprocess); in copy_signal()
1630 seqlock_init(&sig->stats_lock); in copy_signal()
1631 prev_cputime_init(&sig->prev_cputime); in copy_signal()
1634 INIT_LIST_HEAD(&sig->posix_timers); in copy_signal()
1635 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); in copy_signal()
1636 sig->real_timer.function = it_real_fn; in copy_signal()
1639 task_lock(current->group_leader); in copy_signal()
1640 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); in copy_signal()
1641 task_unlock(current->group_leader); in copy_signal()
1648 sig->oom_score_adj = current->signal->oom_score_adj; in copy_signal()
1649 sig->oom_score_adj_min = current->signal->oom_score_adj_min; in copy_signal()
1651 mutex_init(&sig->cred_guard_mutex); in copy_signal()
1652 init_rwsem(&sig->exec_update_lock); in copy_signal()
1661 * Must be called with sighand->lock held, which is common to in copy_seccomp()
1666 assert_spin_locked(&current->sighand->siglock); in copy_seccomp()
1668 /* Ref-count the new filter user, and assign it. */ in copy_seccomp()
1669 get_seccomp_filter(current); in copy_seccomp()
1670 p->seccomp = current->seccomp; in copy_seccomp()
1677 if (task_no_new_privs(current)) in copy_seccomp()
1685 if (p->seccomp.mode != SECCOMP_MODE_DISABLED) in copy_seccomp()
1692 current->clear_child_tid = tidptr; in SYSCALL_DEFINE1()
1694 return task_pid_vnr(current); in SYSCALL_DEFINE1()
1699 raw_spin_lock_init(&p->pi_lock); in rt_mutex_init_task()
1701 p->pi_waiters = RB_ROOT_CACHED; in rt_mutex_init_task()
1702 p->pi_top_task = NULL; in rt_mutex_init_task()
1703 p->pi_blocked_on = NULL; in rt_mutex_init_task()
1712 INIT_HLIST_NODE(&task->pid_links[type]); in init_task_pid_links()
1720 task->thread_pid = pid; in init_task_pid()
1722 task->signal->pids[type] = pid; in init_task_pid()
1728 p->rcu_read_lock_nesting = 0; in rcu_copy_process()
1729 p->rcu_read_unlock_special.s = 0; in rcu_copy_process()
1730 p->rcu_blocked_node = NULL; in rcu_copy_process()
1731 INIT_LIST_HEAD(&p->rcu_node_entry); in rcu_copy_process()
1734 p->rcu_tasks_holdout = false; in rcu_copy_process()
1735 INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); in rcu_copy_process()
1736 p->rcu_tasks_idle_cpu = -1; in rcu_copy_process()
1739 p->trc_reader_nesting = 0; in rcu_copy_process()
1740 p->trc_reader_special.s = 0; in rcu_copy_process()
1741 INIT_LIST_HEAD(&p->trc_holdout_list); in rcu_copy_process()
1747 if (file->f_op == &pidfd_fops) in pidfd_pid()
1748 return file->private_data; in pidfd_pid()
1750 return ERR_PTR(-EBADF); in pidfd_pid()
1755 struct pid *pid = file->private_data; in pidfd_release()
1757 file->private_data = NULL; in pidfd_release()
1764 * pidfd_show_fdinfo - print information about a pidfd
1779 * starting from the current pid namespace of the instance, i.e. the
1790 * - create two new pid namespaces ns1 and ns2 in the initial pid
1793 * - create a process with a pidfd in ns1
1794 * - send pidfd from ns1 to ns2
1795 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
1800 struct pid *pid = f->private_data; in pidfd_show_fdinfo()
1802 pid_t nr = -1; in pidfd_show_fdinfo()
1805 ns = proc_pid_ns(file_inode(m->file)->i_sb); in pidfd_show_fdinfo()
1816 /* If nr is non-zero it means that 'pid' is valid and that in pidfd_show_fdinfo()
1821 for (i = ns->level + 1; i <= pid->level; i++) in pidfd_show_fdinfo()
1822 seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); in pidfd_show_fdinfo()
1834 struct pid *pid = file->private_data; in pidfd_poll()
1837 poll_wait(file, &pid->wait_pidfd, pts); in pidfd_poll()
1868 call_rcu(&tsk->rcu, __delayed_free_task); in delayed_free_task()
1876 if (!tsk->mm) in copy_oom_score_adj()
1885 set_bit(MMF_MULTIPROCESS, &tsk->mm->flags); in copy_oom_score_adj()
1887 tsk->signal->oom_score_adj = current->signal->oom_score_adj; in copy_oom_score_adj()
1888 tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min; in copy_oom_score_adj()
1898 * flags). The actual kick-off is left to the caller.
1906 int pidfd = -1, retval; in copy_process()
1910 u64 clone_flags = args->flags; in copy_process()
1911 struct nsproxy *nsp = current->nsproxy; in copy_process()
1918 return ERR_PTR(-EINVAL); in copy_process()
1921 return ERR_PTR(-EINVAL); in copy_process()
1928 return ERR_PTR(-EINVAL); in copy_process()
1936 return ERR_PTR(-EINVAL); in copy_process()
1941 * multi-rooted process trees, prevent global and container-inits in copy_process()
1945 current->signal->flags & SIGNAL_UNKILLABLE) in copy_process()
1946 return ERR_PTR(-EINVAL); in copy_process()
1954 (task_active_pid_ns(current) != nsp->pid_ns_for_children)) in copy_process()
1955 return ERR_PTR(-EINVAL); in copy_process()
1963 if (nsp->time_ns != nsp->time_ns_for_children) in copy_process()
1964 return ERR_PTR(-EINVAL); in copy_process()
1969 * - CLONE_DETACHED is blocked so that we can potentially in copy_process()
1971 * - CLONE_THREAD is blocked until someone really needs it. in copy_process()
1974 return ERR_PTR(-EINVAL); in copy_process()
1986 spin_lock_irq(&current->sighand->siglock); in copy_process()
1988 hlist_add_head(&delayed.node, &current->signal->multiprocess); in copy_process()
1990 spin_unlock_irq(&current->sighand->siglock); in copy_process()
1991 retval = -ERESTARTNOINTR; in copy_process()
1992 if (task_sigpending(current)) in copy_process()
1995 retval = -ENOMEM; in copy_process()
1996 p = dup_task_struct(current, node); in copy_process()
1999 if (args->io_thread) { in copy_process()
2004 p->flags |= PF_IO_WORKER; in copy_process()
2005 siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); in copy_process()
2013 * p->set_child_tid which is (ab)used as a kthread's data pointer for in copy_process()
2016 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL; in copy_process()
2020 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; in copy_process()
2028 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); in copy_process()
2030 retval = -EAGAIN; in copy_process()
2031 if (atomic_read(&p->real_cred->user->processes) >= in copy_process()
2033 if (p->real_cred->user != INIT_USER && in copy_process()
2037 current->flags &= ~PF_NPROC_EXCEEDED; in copy_process()
2048 retval = -EAGAIN; in copy_process()
2053 p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); in copy_process()
2054 p->flags |= PF_FORKNOEXEC; in copy_process()
2055 INIT_LIST_HEAD(&p->children); in copy_process()
2056 INIT_LIST_HEAD(&p->sibling); in copy_process()
2058 p->vfork_done = NULL; in copy_process()
2059 spin_lock_init(&p->alloc_lock); in copy_process()
2061 init_sigpending(&p->pending); in copy_process()
2063 p->utime = p->stime = p->gtime = 0; in copy_process()
2065 p->utimescaled = p->stimescaled = 0; in copy_process()
2067 prev_cputime_init(&p->prev_cputime); in copy_process()
2070 seqcount_init(&p->vtime.seqcount); in copy_process()
2071 p->vtime.starttime = 0; in copy_process()
2072 p->vtime.state = VTIME_INACTIVE; in copy_process()
2076 p->io_uring = NULL; in copy_process()
2080 memset(&p->rss_stat, 0, sizeof(p->rss_stat)); in copy_process()
2083 p->default_timer_slack_ns = current->timer_slack_ns; in copy_process()
2086 p->psi_flags = 0; in copy_process()
2089 task_io_accounting_init(&p->ioac); in copy_process()
2092 posix_cputimers_init(&p->posix_cputimers); in copy_process()
2094 p->io_context = NULL; in copy_process()
2098 p->mempolicy = mpol_dup(p->mempolicy); in copy_process()
2099 if (IS_ERR(p->mempolicy)) { in copy_process()
2100 retval = PTR_ERR(p->mempolicy); in copy_process()
2101 p->mempolicy = NULL; in copy_process()
2106 p->cpuset_mem_spread_rotor = NUMA_NO_NODE; in copy_process()
2107 p->cpuset_slab_spread_rotor = NUMA_NO_NODE; in copy_process()
2108 seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock); in copy_process()
2111 memset(&p->irqtrace, 0, sizeof(p->irqtrace)); in copy_process()
2112 p->irqtrace.hardirq_disable_ip = _THIS_IP_; in copy_process()
2113 p->irqtrace.softirq_enable_ip = _THIS_IP_; in copy_process()
2114 p->softirqs_enabled = 1; in copy_process()
2115 p->softirq_context = 0; in copy_process()
2118 p->pagefault_disabled = 0; in copy_process()
2125 p->blocked_on = NULL; /* not blocked yet */ in copy_process()
2128 p->sequential_io = 0; in copy_process()
2129 p->sequential_io_avg = 0; in copy_process()
2172 retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls); in copy_process()
2179 pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid, in copy_process()
2180 args->set_tid_size); in copy_process()
2208 retval = put_user(pidfd, args->pidfd); in copy_process()
2214 p->plug = NULL; in copy_process()
2236 p->pid = pid_nr(pid); in copy_process()
2238 p->group_leader = current->group_leader; in copy_process()
2239 p->tgid = current->tgid; in copy_process()
2241 p->group_leader = p; in copy_process()
2242 p->tgid = p->pid; in copy_process()
2245 p->nr_dirtied = 0; in copy_process()
2246 p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); in copy_process()
2247 p->dirty_paused_when = 0; in copy_process()
2249 p->pdeath_signal = 0; in copy_process()
2250 INIT_LIST_HEAD(&p->thread_group); in copy_process()
2251 p->task_works = NULL; in copy_process()
2265 * Now that the cgroups are pinned, re-clone the parent cgroup and put in copy_process()
2269 * This isn't part of ->can_fork() because while the re-cloning is in copy_process()
2276 * From this point on we must avoid any synchronous user-space in copy_process()
2277 * communication until we take the tasklist-lock. In particular, we do in copy_process()
2278 * not want user-space to be able to predict the process start-time by in copy_process()
2283 p->start_time = ktime_get_ns(); in copy_process()
2284 p->start_boottime = ktime_get_boottime_ns(); in copy_process()
2292 /* CLONE_PARENT re-uses the old parent */ in copy_process()
2294 p->real_parent = current->real_parent; in copy_process()
2295 p->parent_exec_id = current->parent_exec_id; in copy_process()
2297 p->exit_signal = -1; in copy_process()
2299 p->exit_signal = current->group_leader->exit_signal; in copy_process()
2301 p->real_parent = current; in copy_process()
2302 p->parent_exec_id = current->self_exec_id; in copy_process()
2303 p->exit_signal = args->exit_signal; in copy_process()
2308 spin_lock(&current->sighand->siglock); in copy_process()
2319 if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) { in copy_process()
2320 retval = -ENOMEM; in copy_process()
2325 if (fatal_signal_pending(current)) { in copy_process()
2326 retval = -EINTR; in copy_process()
2331 if (likely(p->pid)) { in copy_process()
2337 init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); in copy_process()
2338 init_task_pid(p, PIDTYPE_SID, task_session(current)); in copy_process()
2341 ns_of_pid(pid)->child_reaper = p; in copy_process()
2342 p->signal->flags |= SIGNAL_UNKILLABLE; in copy_process()
2344 p->signal->shared_pending.signal = delayed.signal; in copy_process()
2345 p->signal->tty = tty_kref_get(current->signal->tty); in copy_process()
2351 p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || in copy_process()
2352 p->real_parent->signal->is_child_subreaper; in copy_process()
2353 list_add_tail(&p->sibling, &p->real_parent->children); in copy_process()
2354 list_add_tail_rcu(&p->tasks, &init_task.tasks); in copy_process()
2360 current->signal->nr_threads++; in copy_process()
2361 atomic_inc(&current->signal->live); in copy_process()
2362 refcount_inc(&current->signal->sigcnt); in copy_process()
2364 list_add_tail_rcu(&p->thread_group, in copy_process()
2365 &p->group_leader->thread_group); in copy_process()
2366 list_add_tail_rcu(&p->thread_node, in copy_process()
2367 &p->signal->thread_head); in copy_process()
2374 spin_unlock(&current->sighand->siglock); in copy_process()
2394 spin_unlock(&current->sighand->siglock); in copy_process()
2408 if (p->io_context) in copy_process()
2413 if (p->mm) { in copy_process()
2414 mm_clear_owner(p->mm, p); in copy_process()
2415 mmput(p->mm); in copy_process()
2419 free_signal_struct(p->signal); in copy_process()
2421 __cleanup_sighand(p->sighand); in copy_process()
2437 mpol_put(p->mempolicy); in copy_process()
2442 atomic_dec(&p->cred->user->processes); in copy_process()
2445 p->state = TASK_DEAD; in copy_process()
2449 spin_lock_irq(&current->sighand->siglock); in copy_process()
2451 spin_unlock_irq(&current->sighand->siglock); in copy_process()
2460 INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ in init_idle_pids()
2509 * Ok, this is the main fork-routine.
2511 * It copies the process, and if successful kick-starts
2514 * args->exit_signal is expected to be checked for sanity by the caller.
2518 u64 clone_flags = args->flags; in kernel_clone()
2534 if ((args->flags & CLONE_PIDFD) && in kernel_clone()
2535 (args->flags & CLONE_PARENT_SETTID) && in kernel_clone()
2536 (args->pidfd == args->parent_tid)) in kernel_clone()
2537 return -EINVAL; in kernel_clone()
2548 else if (args->exit_signal != SIGCHLD) in kernel_clone()
2553 if (likely(!ptrace_event_enabled(current, trace))) in kernel_clone()
2566 * Do this prior waking up the new thread - the thread pointer in kernel_clone()
2569 trace_sched_process_fork(current, p); in kernel_clone()
2575 put_user(nr, args->parent_tid); in kernel_clone()
2578 p->vfork_done = &vfork; in kernel_clone()
2625 return -EINVAL; in SYSCALL_DEFINE0()
2688 pid_t *kset_tid = kargs->set_tid; in copy_clone_args_from_user()
2699 return -E2BIG; in copy_clone_args_from_user()
2701 return -EINVAL; in copy_clone_args_from_user()
2708 return -EINVAL; in copy_clone_args_from_user()
2711 return -EINVAL; in copy_clone_args_from_user()
2714 return -EINVAL; in copy_clone_args_from_user()
2722 return -EINVAL; in copy_clone_args_from_user()
2726 return -EINVAL; in copy_clone_args_from_user()
2743 (kargs->set_tid_size * sizeof(pid_t)))) in copy_clone_args_from_user()
2744 return -EFAULT; in copy_clone_args_from_user()
2746 kargs->set_tid = kset_tid; in copy_clone_args_from_user()
2752 * clone3_stack_valid - check and prepare stack
2761 if (kargs->stack == 0) { in clone3_stack_valid()
2762 if (kargs->stack_size > 0) in clone3_stack_valid()
2765 if (kargs->stack_size == 0) in clone3_stack_valid()
2768 if (!access_ok((void __user *)kargs->stack, kargs->stack_size)) in clone3_stack_valid()
2772 kargs->stack += kargs->stack_size; in clone3_stack_valid()
2782 if (kargs->flags & in clone3_args_valid()
2787 * - make the CLONE_DETACHED bit reuseable for clone3 in clone3_args_valid()
2788 * - make the CSIGNAL bits reuseable for clone3 in clone3_args_valid()
2790 if (kargs->flags & (CLONE_DETACHED | CSIGNAL)) in clone3_args_valid()
2793 if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == in clone3_args_valid()
2797 if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) && in clone3_args_valid()
2798 kargs->exit_signal) in clone3_args_valid()
2808 * clone3 - create a new process with specific properties
2832 return -EINVAL; in SYSCALL_DEFINE2()
2844 leader = top = top->group_leader; in walk_process_tree()
2847 list_for_each_entry(child, &parent->children, sibling) { in walk_process_tree()
2862 parent = child->real_parent; in walk_process_tree()
2863 leader = parent->group_leader; in walk_process_tree()
2878 spin_lock_init(&sighand->siglock); in sighand_ctor()
2879 init_waitqueue_head(&sighand->signalfd_wqh); in sighand_ctor()
2931 return -EINVAL; in check_unshare_flags()
2939 if (!thread_group_empty(current)) in check_unshare_flags()
2940 return -EINVAL; in check_unshare_flags()
2943 if (refcount_read(&current->sighand->count) > 1) in check_unshare_flags()
2944 return -EINVAL; in check_unshare_flags()
2948 return -EINVAL; in check_unshare_flags()
2959 struct fs_struct *fs = current->fs; in unshare_fs()
2965 if (fs->users == 1) in unshare_fs()
2970 return -ENOMEM; in unshare_fs()
2981 struct files_struct *fd = current->files; in unshare_fd()
2985 (fd && atomic_read(&fd->count) > 1)) { in unshare_fd()
2999 * constructed. Here we are modifying the current, active,
3062 exit_sem(current); in ksys_unshare()
3066 exit_shm(current); in ksys_unshare()
3067 shm_init_task(current); in ksys_unshare()
3071 switch_task_namespaces(current, new_nsproxy); in ksys_unshare()
3073 task_lock(current); in ksys_unshare()
3076 fs = current->fs; in ksys_unshare()
3077 spin_lock(&fs->lock); in ksys_unshare()
3078 current->fs = new_fs; in ksys_unshare()
3079 if (--fs->users) in ksys_unshare()
3083 spin_unlock(&fs->lock); in ksys_unshare()
3087 fd = current->files; in ksys_unshare()
3088 current->files = new_fd; in ksys_unshare()
3092 task_unlock(current); in ksys_unshare()
3101 perf_event_namespaces(current); in ksys_unshare()
3124 * Helper to unshare the files of the current task.
3131 struct task_struct *task = current; in unshare_files()
3140 *displaced = task->files; in unshare_files()
3142 task->files = copy; in unshare_files()