1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * linux/ipc/namespace.c
4*4882a593Smuzhiyun * Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc.
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <linux/ipc.h>
8*4882a593Smuzhiyun #include <linux/msg.h>
9*4882a593Smuzhiyun #include <linux/ipc_namespace.h>
10*4882a593Smuzhiyun #include <linux/rcupdate.h>
11*4882a593Smuzhiyun #include <linux/nsproxy.h>
12*4882a593Smuzhiyun #include <linux/slab.h>
13*4882a593Smuzhiyun #include <linux/cred.h>
14*4882a593Smuzhiyun #include <linux/fs.h>
15*4882a593Smuzhiyun #include <linux/mount.h>
16*4882a593Smuzhiyun #include <linux/user_namespace.h>
17*4882a593Smuzhiyun #include <linux/proc_ns.h>
18*4882a593Smuzhiyun #include <linux/sched/task.h>
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun #include "util.h"
21*4882a593Smuzhiyun
inc_ipc_namespaces(struct user_namespace * ns)22*4882a593Smuzhiyun static struct ucounts *inc_ipc_namespaces(struct user_namespace *ns)
23*4882a593Smuzhiyun {
24*4882a593Smuzhiyun return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES);
25*4882a593Smuzhiyun }
26*4882a593Smuzhiyun
dec_ipc_namespaces(struct ucounts * ucounts)27*4882a593Smuzhiyun static void dec_ipc_namespaces(struct ucounts *ucounts)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES);
30*4882a593Smuzhiyun }
31*4882a593Smuzhiyun
create_ipc_ns(struct user_namespace * user_ns,struct ipc_namespace * old_ns)32*4882a593Smuzhiyun static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
33*4882a593Smuzhiyun struct ipc_namespace *old_ns)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun struct ipc_namespace *ns;
36*4882a593Smuzhiyun struct ucounts *ucounts;
37*4882a593Smuzhiyun int err;
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun err = -ENOSPC;
40*4882a593Smuzhiyun ucounts = inc_ipc_namespaces(user_ns);
41*4882a593Smuzhiyun if (!ucounts)
42*4882a593Smuzhiyun goto fail;
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun err = -ENOMEM;
45*4882a593Smuzhiyun ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
46*4882a593Smuzhiyun if (ns == NULL)
47*4882a593Smuzhiyun goto fail_dec;
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun err = ns_alloc_inum(&ns->ns);
50*4882a593Smuzhiyun if (err)
51*4882a593Smuzhiyun goto fail_free;
52*4882a593Smuzhiyun ns->ns.ops = &ipcns_operations;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun refcount_set(&ns->count, 1);
55*4882a593Smuzhiyun ns->user_ns = get_user_ns(user_ns);
56*4882a593Smuzhiyun ns->ucounts = ucounts;
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun err = mq_init_ns(ns);
59*4882a593Smuzhiyun if (err)
60*4882a593Smuzhiyun goto fail_put;
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun sem_init_ns(ns);
63*4882a593Smuzhiyun msg_init_ns(ns);
64*4882a593Smuzhiyun shm_init_ns(ns);
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun return ns;
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun fail_put:
69*4882a593Smuzhiyun put_user_ns(ns->user_ns);
70*4882a593Smuzhiyun ns_free_inum(&ns->ns);
71*4882a593Smuzhiyun fail_free:
72*4882a593Smuzhiyun kfree(ns);
73*4882a593Smuzhiyun fail_dec:
74*4882a593Smuzhiyun dec_ipc_namespaces(ucounts);
75*4882a593Smuzhiyun fail:
76*4882a593Smuzhiyun return ERR_PTR(err);
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun
copy_ipcs(unsigned long flags,struct user_namespace * user_ns,struct ipc_namespace * ns)79*4882a593Smuzhiyun struct ipc_namespace *copy_ipcs(unsigned long flags,
80*4882a593Smuzhiyun struct user_namespace *user_ns, struct ipc_namespace *ns)
81*4882a593Smuzhiyun {
82*4882a593Smuzhiyun if (!(flags & CLONE_NEWIPC))
83*4882a593Smuzhiyun return get_ipc_ns(ns);
84*4882a593Smuzhiyun return create_ipc_ns(user_ns, ns);
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun /*
88*4882a593Smuzhiyun * free_ipcs - free all ipcs of one type
89*4882a593Smuzhiyun * @ns: the namespace to remove the ipcs from
90*4882a593Smuzhiyun * @ids: the table of ipcs to free
91*4882a593Smuzhiyun * @free: the function called to free each individual ipc
92*4882a593Smuzhiyun *
93*4882a593Smuzhiyun * Called for each kind of ipc when an ipc_namespace exits.
94*4882a593Smuzhiyun */
free_ipcs(struct ipc_namespace * ns,struct ipc_ids * ids,void (* free)(struct ipc_namespace *,struct kern_ipc_perm *))95*4882a593Smuzhiyun void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
96*4882a593Smuzhiyun void (*free)(struct ipc_namespace *, struct kern_ipc_perm *))
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun struct kern_ipc_perm *perm;
99*4882a593Smuzhiyun int next_id;
100*4882a593Smuzhiyun int total, in_use;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun down_write(&ids->rwsem);
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun in_use = ids->in_use;
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun for (total = 0, next_id = 0; total < in_use; next_id++) {
107*4882a593Smuzhiyun perm = idr_find(&ids->ipcs_idr, next_id);
108*4882a593Smuzhiyun if (perm == NULL)
109*4882a593Smuzhiyun continue;
110*4882a593Smuzhiyun rcu_read_lock();
111*4882a593Smuzhiyun ipc_lock_object(perm);
112*4882a593Smuzhiyun free(ns, perm);
113*4882a593Smuzhiyun total++;
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun up_write(&ids->rwsem);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun
free_ipc_ns(struct ipc_namespace * ns)118*4882a593Smuzhiyun static void free_ipc_ns(struct ipc_namespace *ns)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun /* mq_put_mnt() waits for a grace period as kern_unmount()
121*4882a593Smuzhiyun * uses synchronize_rcu().
122*4882a593Smuzhiyun */
123*4882a593Smuzhiyun mq_put_mnt(ns);
124*4882a593Smuzhiyun sem_exit_ns(ns);
125*4882a593Smuzhiyun msg_exit_ns(ns);
126*4882a593Smuzhiyun shm_exit_ns(ns);
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun dec_ipc_namespaces(ns->ucounts);
129*4882a593Smuzhiyun put_user_ns(ns->user_ns);
130*4882a593Smuzhiyun ns_free_inum(&ns->ns);
131*4882a593Smuzhiyun kfree(ns);
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun static LLIST_HEAD(free_ipc_list);
free_ipc(struct work_struct * unused)135*4882a593Smuzhiyun static void free_ipc(struct work_struct *unused)
136*4882a593Smuzhiyun {
137*4882a593Smuzhiyun struct llist_node *node = llist_del_all(&free_ipc_list);
138*4882a593Smuzhiyun struct ipc_namespace *n, *t;
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun llist_for_each_entry_safe(n, t, node, mnt_llist)
141*4882a593Smuzhiyun free_ipc_ns(n);
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun /*
145*4882a593Smuzhiyun * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
146*4882a593Smuzhiyun */
147*4882a593Smuzhiyun static DECLARE_WORK(free_ipc_work, free_ipc);
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun /*
150*4882a593Smuzhiyun * put_ipc_ns - drop a reference to an ipc namespace.
151*4882a593Smuzhiyun * @ns: the namespace to put
152*4882a593Smuzhiyun *
153*4882a593Smuzhiyun * If this is the last task in the namespace exiting, and
154*4882a593Smuzhiyun * it is dropping the refcount to 0, then it can race with
155*4882a593Smuzhiyun * a task in another ipc namespace but in a mounts namespace
156*4882a593Smuzhiyun * which has this ipcns's mqueuefs mounted, doing some action
157*4882a593Smuzhiyun * with one of the mqueuefs files. That can raise the refcount.
158*4882a593Smuzhiyun * So dropping the refcount, and raising the refcount when
159*4882a593Smuzhiyun * accessing it through the VFS, are protected with mq_lock.
160*4882a593Smuzhiyun *
161*4882a593Smuzhiyun * (Clearly, a task raising the refcount on its own ipc_ns
162*4882a593Smuzhiyun * needn't take mq_lock since it can't race with the last task
163*4882a593Smuzhiyun * in the ipcns exiting).
164*4882a593Smuzhiyun */
put_ipc_ns(struct ipc_namespace * ns)165*4882a593Smuzhiyun void put_ipc_ns(struct ipc_namespace *ns)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun if (refcount_dec_and_lock(&ns->count, &mq_lock)) {
168*4882a593Smuzhiyun mq_clear_sbinfo(ns);
169*4882a593Smuzhiyun spin_unlock(&mq_lock);
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun if (llist_add(&ns->mnt_llist, &free_ipc_list))
172*4882a593Smuzhiyun schedule_work(&free_ipc_work);
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun
to_ipc_ns(struct ns_common * ns)176*4882a593Smuzhiyun static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun return container_of(ns, struct ipc_namespace, ns);
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun
ipcns_get(struct task_struct * task)181*4882a593Smuzhiyun static struct ns_common *ipcns_get(struct task_struct *task)
182*4882a593Smuzhiyun {
183*4882a593Smuzhiyun struct ipc_namespace *ns = NULL;
184*4882a593Smuzhiyun struct nsproxy *nsproxy;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun task_lock(task);
187*4882a593Smuzhiyun nsproxy = task->nsproxy;
188*4882a593Smuzhiyun if (nsproxy)
189*4882a593Smuzhiyun ns = get_ipc_ns(nsproxy->ipc_ns);
190*4882a593Smuzhiyun task_unlock(task);
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun return ns ? &ns->ns : NULL;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
ipcns_put(struct ns_common * ns)195*4882a593Smuzhiyun static void ipcns_put(struct ns_common *ns)
196*4882a593Smuzhiyun {
197*4882a593Smuzhiyun return put_ipc_ns(to_ipc_ns(ns));
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun
ipcns_install(struct nsset * nsset,struct ns_common * new)200*4882a593Smuzhiyun static int ipcns_install(struct nsset *nsset, struct ns_common *new)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun struct nsproxy *nsproxy = nsset->nsproxy;
203*4882a593Smuzhiyun struct ipc_namespace *ns = to_ipc_ns(new);
204*4882a593Smuzhiyun if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
205*4882a593Smuzhiyun !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
206*4882a593Smuzhiyun return -EPERM;
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun put_ipc_ns(nsproxy->ipc_ns);
209*4882a593Smuzhiyun nsproxy->ipc_ns = get_ipc_ns(ns);
210*4882a593Smuzhiyun return 0;
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
ipcns_owner(struct ns_common * ns)213*4882a593Smuzhiyun static struct user_namespace *ipcns_owner(struct ns_common *ns)
214*4882a593Smuzhiyun {
215*4882a593Smuzhiyun return to_ipc_ns(ns)->user_ns;
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun
218*4882a593Smuzhiyun const struct proc_ns_operations ipcns_operations = {
219*4882a593Smuzhiyun .name = "ipc",
220*4882a593Smuzhiyun .type = CLONE_NEWIPC,
221*4882a593Smuzhiyun .get = ipcns_get,
222*4882a593Smuzhiyun .put = ipcns_put,
223*4882a593Smuzhiyun .install = ipcns_install,
224*4882a593Smuzhiyun .owner = ipcns_owner,
225*4882a593Smuzhiyun };
226