1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * linux/fs/namei.c
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 1991, 1992 Linus Torvalds
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun /*
9*4882a593Smuzhiyun * Some corrections by tytso.
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
13*4882a593Smuzhiyun * lookup logic.
14*4882a593Smuzhiyun */
15*4882a593Smuzhiyun /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
16*4882a593Smuzhiyun */
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun #include <linux/init.h>
19*4882a593Smuzhiyun #include <linux/export.h>
20*4882a593Smuzhiyun #include <linux/kernel.h>
21*4882a593Smuzhiyun #include <linux/slab.h>
22*4882a593Smuzhiyun #include <linux/fs.h>
23*4882a593Smuzhiyun #include <linux/namei.h>
24*4882a593Smuzhiyun #include <linux/pagemap.h>
25*4882a593Smuzhiyun #include <linux/fsnotify.h>
26*4882a593Smuzhiyun #include <linux/personality.h>
27*4882a593Smuzhiyun #include <linux/security.h>
28*4882a593Smuzhiyun #include <linux/ima.h>
29*4882a593Smuzhiyun #include <linux/syscalls.h>
30*4882a593Smuzhiyun #include <linux/mount.h>
31*4882a593Smuzhiyun #include <linux/audit.h>
32*4882a593Smuzhiyun #include <linux/capability.h>
33*4882a593Smuzhiyun #include <linux/file.h>
34*4882a593Smuzhiyun #include <linux/fcntl.h>
35*4882a593Smuzhiyun #include <linux/device_cgroup.h>
36*4882a593Smuzhiyun #include <linux/fs_struct.h>
37*4882a593Smuzhiyun #include <linux/posix_acl.h>
38*4882a593Smuzhiyun #include <linux/hash.h>
39*4882a593Smuzhiyun #include <linux/bitops.h>
40*4882a593Smuzhiyun #include <linux/init_task.h>
41*4882a593Smuzhiyun #include <linux/uaccess.h>
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun #include "internal.h"
44*4882a593Smuzhiyun #include "mount.h"
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
47*4882a593Smuzhiyun #include <trace/events/namei.h>
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun /* [Feb-1997 T. Schoebel-Theuer]
50*4882a593Smuzhiyun * Fundamental changes in the pathname lookup mechanisms (namei)
51*4882a593Smuzhiyun * were necessary because of omirr. The reason is that omirr needs
52*4882a593Smuzhiyun * to know the _real_ pathname, not the user-supplied one, in case
53*4882a593Smuzhiyun * of symlinks (and also when transname replacements occur).
54*4882a593Smuzhiyun *
55*4882a593Smuzhiyun * The new code replaces the old recursive symlink resolution with
56*4882a593Smuzhiyun * an iterative one (in case of non-nested symlink chains). It does
57*4882a593Smuzhiyun * this with calls to <fs>_follow_link().
58*4882a593Smuzhiyun * As a side effect, dir_namei(), _namei() and follow_link() are now
59*4882a593Smuzhiyun * replaced with a single function lookup_dentry() that can handle all
60*4882a593Smuzhiyun * the special cases of the former code.
61*4882a593Smuzhiyun *
62*4882a593Smuzhiyun * With the new dcache, the pathname is stored at each inode, at least as
63*4882a593Smuzhiyun * long as the refcount of the inode is positive. As a side effect, the
64*4882a593Smuzhiyun * size of the dcache depends on the inode cache and thus is dynamic.
65*4882a593Smuzhiyun *
66*4882a593Smuzhiyun * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
67*4882a593Smuzhiyun * resolution to correspond with current state of the code.
68*4882a593Smuzhiyun *
69*4882a593Smuzhiyun * Note that the symlink resolution is not *completely* iterative.
70*4882a593Smuzhiyun * There is still a significant amount of tail- and mid- recursion in
71*4882a593Smuzhiyun * the algorithm. Also, note that <fs>_readlink() is not used in
72*4882a593Smuzhiyun * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
73*4882a593Smuzhiyun * may return different results than <fs>_follow_link(). Many virtual
74*4882a593Smuzhiyun * filesystems (including /proc) exhibit this behavior.
75*4882a593Smuzhiyun */
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
78*4882a593Smuzhiyun * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
79*4882a593Smuzhiyun * and the name already exists in form of a symlink, try to create the new
80*4882a593Smuzhiyun * name indicated by the symlink. The old code always complained that the
81*4882a593Smuzhiyun * name already exists, due to not following the symlink even if its target
82*4882a593Smuzhiyun * is nonexistent. The new semantics affects also mknod() and link() when
83*4882a593Smuzhiyun * the name is a symlink pointing to a non-existent name.
84*4882a593Smuzhiyun *
85*4882a593Smuzhiyun * I don't know which semantics is the right one, since I have no access
86*4882a593Smuzhiyun * to standards. But I found by trial that HP-UX 9.0 has the full "new"
87*4882a593Smuzhiyun * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
88*4882a593Smuzhiyun * "old" one. Personally, I think the new semantics is much more logical.
89*4882a593Smuzhiyun * Note that "ln old new" where "new" is a symlink pointing to a non-existing
90*4882a593Smuzhiyun * file does succeed in both HP-UX and SunOs, but not in Solaris
91*4882a593Smuzhiyun * and in the old Linux semantics.
92*4882a593Smuzhiyun */
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
95*4882a593Smuzhiyun * semantics. See the comments in "open_namei" and "do_link" below.
96*4882a593Smuzhiyun *
97*4882a593Smuzhiyun * [10-Sep-98 Alan Modra] Another symlink change.
98*4882a593Smuzhiyun */
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
101*4882a593Smuzhiyun * inside the path - always follow.
102*4882a593Smuzhiyun * in the last component in creation/removal/renaming - never follow.
103*4882a593Smuzhiyun * if LOOKUP_FOLLOW passed - follow.
104*4882a593Smuzhiyun * if the pathname has trailing slashes - follow.
105*4882a593Smuzhiyun * otherwise - don't follow.
106*4882a593Smuzhiyun * (applied in that order).
107*4882a593Smuzhiyun *
108*4882a593Smuzhiyun * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
109*4882a593Smuzhiyun * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
110*4882a593Smuzhiyun * During the 2.4 we need to fix the userland stuff depending on it -
111*4882a593Smuzhiyun * hopefully we will be able to get rid of that wart in 2.5. So far only
112*4882a593Smuzhiyun * XEmacs seems to be relying on it...
113*4882a593Smuzhiyun */
114*4882a593Smuzhiyun /*
115*4882a593Smuzhiyun * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
116*4882a593Smuzhiyun * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives
117*4882a593Smuzhiyun * any extra contention...
118*4882a593Smuzhiyun */
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun /* In order to reduce some races, while at the same time doing additional
121*4882a593Smuzhiyun * checking and hopefully speeding things up, we copy filenames to the
122*4882a593Smuzhiyun * kernel data space before using them..
123*4882a593Smuzhiyun *
124*4882a593Smuzhiyun * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
125*4882a593Smuzhiyun * PATH_MAX includes the nul terminator --RR.
126*4882a593Smuzhiyun */
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun struct filename *
getname_flags(const char __user * filename,int flags,int * empty)131*4882a593Smuzhiyun getname_flags(const char __user *filename, int flags, int *empty)
132*4882a593Smuzhiyun {
133*4882a593Smuzhiyun struct filename *result;
134*4882a593Smuzhiyun char *kname;
135*4882a593Smuzhiyun int len;
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun result = audit_reusename(filename);
138*4882a593Smuzhiyun if (result)
139*4882a593Smuzhiyun return result;
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun result = __getname();
142*4882a593Smuzhiyun if (unlikely(!result))
143*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun /*
146*4882a593Smuzhiyun * First, try to embed the struct filename inside the names_cache
147*4882a593Smuzhiyun * allocation
148*4882a593Smuzhiyun */
149*4882a593Smuzhiyun kname = (char *)result->iname;
150*4882a593Smuzhiyun result->name = kname;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
153*4882a593Smuzhiyun if (unlikely(len < 0)) {
154*4882a593Smuzhiyun __putname(result);
155*4882a593Smuzhiyun return ERR_PTR(len);
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun /*
159*4882a593Smuzhiyun * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
160*4882a593Smuzhiyun * separate struct filename so we can dedicate the entire
161*4882a593Smuzhiyun * names_cache allocation for the pathname, and re-do the copy from
162*4882a593Smuzhiyun * userland.
163*4882a593Smuzhiyun */
164*4882a593Smuzhiyun if (unlikely(len == EMBEDDED_NAME_MAX)) {
165*4882a593Smuzhiyun const size_t size = offsetof(struct filename, iname[1]);
166*4882a593Smuzhiyun kname = (char *)result;
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun /*
169*4882a593Smuzhiyun * size is chosen that way we to guarantee that
170*4882a593Smuzhiyun * result->iname[0] is within the same object and that
171*4882a593Smuzhiyun * kname can't be equal to result->iname, no matter what.
172*4882a593Smuzhiyun */
173*4882a593Smuzhiyun result = kzalloc(size, GFP_KERNEL);
174*4882a593Smuzhiyun if (unlikely(!result)) {
175*4882a593Smuzhiyun __putname(kname);
176*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun result->name = kname;
179*4882a593Smuzhiyun len = strncpy_from_user(kname, filename, PATH_MAX);
180*4882a593Smuzhiyun if (unlikely(len < 0)) {
181*4882a593Smuzhiyun __putname(kname);
182*4882a593Smuzhiyun kfree(result);
183*4882a593Smuzhiyun return ERR_PTR(len);
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun if (unlikely(len == PATH_MAX)) {
186*4882a593Smuzhiyun __putname(kname);
187*4882a593Smuzhiyun kfree(result);
188*4882a593Smuzhiyun return ERR_PTR(-ENAMETOOLONG);
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun result->refcnt = 1;
193*4882a593Smuzhiyun /* The empty path is special. */
194*4882a593Smuzhiyun if (unlikely(!len)) {
195*4882a593Smuzhiyun if (empty)
196*4882a593Smuzhiyun *empty = 1;
197*4882a593Smuzhiyun if (!(flags & LOOKUP_EMPTY)) {
198*4882a593Smuzhiyun putname(result);
199*4882a593Smuzhiyun return ERR_PTR(-ENOENT);
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun }
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun result->uptr = filename;
204*4882a593Smuzhiyun result->aname = NULL;
205*4882a593Smuzhiyun audit_getname(result);
206*4882a593Smuzhiyun return result;
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun struct filename *
getname(const char __user * filename)210*4882a593Smuzhiyun getname(const char __user * filename)
211*4882a593Smuzhiyun {
212*4882a593Smuzhiyun return getname_flags(filename, 0, NULL);
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun struct filename *
getname_kernel(const char * filename)216*4882a593Smuzhiyun getname_kernel(const char * filename)
217*4882a593Smuzhiyun {
218*4882a593Smuzhiyun struct filename *result;
219*4882a593Smuzhiyun int len = strlen(filename) + 1;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun result = __getname();
222*4882a593Smuzhiyun if (unlikely(!result))
223*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun if (len <= EMBEDDED_NAME_MAX) {
226*4882a593Smuzhiyun result->name = (char *)result->iname;
227*4882a593Smuzhiyun } else if (len <= PATH_MAX) {
228*4882a593Smuzhiyun const size_t size = offsetof(struct filename, iname[1]);
229*4882a593Smuzhiyun struct filename *tmp;
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun tmp = kmalloc(size, GFP_KERNEL);
232*4882a593Smuzhiyun if (unlikely(!tmp)) {
233*4882a593Smuzhiyun __putname(result);
234*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
235*4882a593Smuzhiyun }
236*4882a593Smuzhiyun tmp->name = (char *)result;
237*4882a593Smuzhiyun result = tmp;
238*4882a593Smuzhiyun } else {
239*4882a593Smuzhiyun __putname(result);
240*4882a593Smuzhiyun return ERR_PTR(-ENAMETOOLONG);
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun memcpy((char *)result->name, filename, len);
243*4882a593Smuzhiyun result->uptr = NULL;
244*4882a593Smuzhiyun result->aname = NULL;
245*4882a593Smuzhiyun result->refcnt = 1;
246*4882a593Smuzhiyun audit_getname(result);
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun return result;
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun
putname(struct filename * name)251*4882a593Smuzhiyun void putname(struct filename *name)
252*4882a593Smuzhiyun {
253*4882a593Smuzhiyun BUG_ON(name->refcnt <= 0);
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun if (--name->refcnt > 0)
256*4882a593Smuzhiyun return;
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun if (name->name != name->iname) {
259*4882a593Smuzhiyun __putname(name->name);
260*4882a593Smuzhiyun kfree(name);
261*4882a593Smuzhiyun } else
262*4882a593Smuzhiyun __putname(name);
263*4882a593Smuzhiyun }
264*4882a593Smuzhiyun
check_acl(struct inode * inode,int mask)265*4882a593Smuzhiyun static int check_acl(struct inode *inode, int mask)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun #ifdef CONFIG_FS_POSIX_ACL
268*4882a593Smuzhiyun struct posix_acl *acl;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun if (mask & MAY_NOT_BLOCK) {
271*4882a593Smuzhiyun acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
272*4882a593Smuzhiyun if (!acl)
273*4882a593Smuzhiyun return -EAGAIN;
274*4882a593Smuzhiyun /* no ->get_acl() calls in RCU mode... */
275*4882a593Smuzhiyun if (is_uncached_acl(acl))
276*4882a593Smuzhiyun return -ECHILD;
277*4882a593Smuzhiyun return posix_acl_permission(inode, acl, mask);
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun acl = get_acl(inode, ACL_TYPE_ACCESS);
281*4882a593Smuzhiyun if (IS_ERR(acl))
282*4882a593Smuzhiyun return PTR_ERR(acl);
283*4882a593Smuzhiyun if (acl) {
284*4882a593Smuzhiyun int error = posix_acl_permission(inode, acl, mask);
285*4882a593Smuzhiyun posix_acl_release(acl);
286*4882a593Smuzhiyun return error;
287*4882a593Smuzhiyun }
288*4882a593Smuzhiyun #endif
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun return -EAGAIN;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun /*
294*4882a593Smuzhiyun * This does the basic UNIX permission checking.
295*4882a593Smuzhiyun *
296*4882a593Smuzhiyun * Note that the POSIX ACL check cares about the MAY_NOT_BLOCK bit,
297*4882a593Smuzhiyun * for RCU walking.
298*4882a593Smuzhiyun */
acl_permission_check(struct inode * inode,int mask)299*4882a593Smuzhiyun static int acl_permission_check(struct inode *inode, int mask)
300*4882a593Smuzhiyun {
301*4882a593Smuzhiyun unsigned int mode = inode->i_mode;
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun /* Are we the owner? If so, ACL's don't matter */
304*4882a593Smuzhiyun if (likely(uid_eq(current_fsuid(), inode->i_uid))) {
305*4882a593Smuzhiyun mask &= 7;
306*4882a593Smuzhiyun mode >>= 6;
307*4882a593Smuzhiyun return (mask & ~mode) ? -EACCES : 0;
308*4882a593Smuzhiyun }
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun /* Do we have ACL's? */
311*4882a593Smuzhiyun if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
312*4882a593Smuzhiyun int error = check_acl(inode, mask);
313*4882a593Smuzhiyun if (error != -EAGAIN)
314*4882a593Smuzhiyun return error;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun /* Only RWX matters for group/other mode bits */
318*4882a593Smuzhiyun mask &= 7;
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun /*
321*4882a593Smuzhiyun * Are the group permissions different from
322*4882a593Smuzhiyun * the other permissions in the bits we care
323*4882a593Smuzhiyun * about? Need to check group ownership if so.
324*4882a593Smuzhiyun */
325*4882a593Smuzhiyun if (mask & (mode ^ (mode >> 3))) {
326*4882a593Smuzhiyun if (in_group_p(inode->i_gid))
327*4882a593Smuzhiyun mode >>= 3;
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun /* Bits in 'mode' clear that we require? */
331*4882a593Smuzhiyun return (mask & ~mode) ? -EACCES : 0;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun /**
335*4882a593Smuzhiyun * generic_permission - check for access rights on a Posix-like filesystem
336*4882a593Smuzhiyun * @inode: inode to check access rights for
337*4882a593Smuzhiyun * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
338*4882a593Smuzhiyun * %MAY_NOT_BLOCK ...)
339*4882a593Smuzhiyun *
340*4882a593Smuzhiyun * Used to check for read/write/execute permissions on a file.
341*4882a593Smuzhiyun * We use "fsuid" for this, letting us set arbitrary permissions
342*4882a593Smuzhiyun * for filesystem access without changing the "normal" uids which
343*4882a593Smuzhiyun * are used for other things.
344*4882a593Smuzhiyun *
345*4882a593Smuzhiyun * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
346*4882a593Smuzhiyun * request cannot be satisfied (eg. requires blocking or too much complexity).
347*4882a593Smuzhiyun * It would then be called again in ref-walk mode.
348*4882a593Smuzhiyun */
generic_permission(struct inode * inode,int mask)349*4882a593Smuzhiyun int generic_permission(struct inode *inode, int mask)
350*4882a593Smuzhiyun {
351*4882a593Smuzhiyun int ret;
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun /*
354*4882a593Smuzhiyun * Do the basic permission checks.
355*4882a593Smuzhiyun */
356*4882a593Smuzhiyun ret = acl_permission_check(inode, mask);
357*4882a593Smuzhiyun if (ret != -EACCES)
358*4882a593Smuzhiyun return ret;
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun if (S_ISDIR(inode->i_mode)) {
361*4882a593Smuzhiyun /* DACs are overridable for directories */
362*4882a593Smuzhiyun if (!(mask & MAY_WRITE))
363*4882a593Smuzhiyun if (capable_wrt_inode_uidgid(inode,
364*4882a593Smuzhiyun CAP_DAC_READ_SEARCH))
365*4882a593Smuzhiyun return 0;
366*4882a593Smuzhiyun if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
367*4882a593Smuzhiyun return 0;
368*4882a593Smuzhiyun return -EACCES;
369*4882a593Smuzhiyun }
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun /*
372*4882a593Smuzhiyun * Searching includes executable on directories, else just read.
373*4882a593Smuzhiyun */
374*4882a593Smuzhiyun mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
375*4882a593Smuzhiyun if (mask == MAY_READ)
376*4882a593Smuzhiyun if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
377*4882a593Smuzhiyun return 0;
378*4882a593Smuzhiyun /*
379*4882a593Smuzhiyun * Read/write DACs are always overridable.
380*4882a593Smuzhiyun * Executable DACs are overridable when there is
381*4882a593Smuzhiyun * at least one exec bit set.
382*4882a593Smuzhiyun */
383*4882a593Smuzhiyun if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
384*4882a593Smuzhiyun if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
385*4882a593Smuzhiyun return 0;
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun return -EACCES;
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun EXPORT_SYMBOL(generic_permission);
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun /*
392*4882a593Smuzhiyun * We _really_ want to just do "generic_permission()" without
393*4882a593Smuzhiyun * even looking at the inode->i_op values. So we keep a cache
394*4882a593Smuzhiyun * flag in inode->i_opflags, that says "this has not special
395*4882a593Smuzhiyun * permission function, use the fast case".
396*4882a593Smuzhiyun */
do_inode_permission(struct inode * inode,int mask)397*4882a593Smuzhiyun static inline int do_inode_permission(struct inode *inode, int mask)
398*4882a593Smuzhiyun {
399*4882a593Smuzhiyun if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
400*4882a593Smuzhiyun if (likely(inode->i_op->permission))
401*4882a593Smuzhiyun return inode->i_op->permission(inode, mask);
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun /* This gets set once for the inode lifetime */
404*4882a593Smuzhiyun spin_lock(&inode->i_lock);
405*4882a593Smuzhiyun inode->i_opflags |= IOP_FASTPERM;
406*4882a593Smuzhiyun spin_unlock(&inode->i_lock);
407*4882a593Smuzhiyun }
408*4882a593Smuzhiyun return generic_permission(inode, mask);
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun /**
412*4882a593Smuzhiyun * sb_permission - Check superblock-level permissions
413*4882a593Smuzhiyun * @sb: Superblock of inode to check permission on
414*4882a593Smuzhiyun * @inode: Inode to check permission on
415*4882a593Smuzhiyun * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
416*4882a593Smuzhiyun *
417*4882a593Smuzhiyun * Separate out file-system wide checks from inode-specific permission checks.
418*4882a593Smuzhiyun */
sb_permission(struct super_block * sb,struct inode * inode,int mask)419*4882a593Smuzhiyun static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
420*4882a593Smuzhiyun {
421*4882a593Smuzhiyun if (unlikely(mask & MAY_WRITE)) {
422*4882a593Smuzhiyun umode_t mode = inode->i_mode;
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun /* Nobody gets write access to a read-only fs. */
425*4882a593Smuzhiyun if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
426*4882a593Smuzhiyun return -EROFS;
427*4882a593Smuzhiyun }
428*4882a593Smuzhiyun return 0;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun /**
432*4882a593Smuzhiyun * inode_permission - Check for access rights to a given inode
433*4882a593Smuzhiyun * @inode: Inode to check permission on
434*4882a593Smuzhiyun * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
435*4882a593Smuzhiyun *
436*4882a593Smuzhiyun * Check for read/write/execute permissions on an inode. We use fs[ug]id for
437*4882a593Smuzhiyun * this, letting us set arbitrary permissions for filesystem access without
438*4882a593Smuzhiyun * changing the "normal" UIDs which are used for other things.
439*4882a593Smuzhiyun *
440*4882a593Smuzhiyun * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
441*4882a593Smuzhiyun */
inode_permission(struct inode * inode,int mask)442*4882a593Smuzhiyun int inode_permission(struct inode *inode, int mask)
443*4882a593Smuzhiyun {
444*4882a593Smuzhiyun int retval;
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun retval = sb_permission(inode->i_sb, inode, mask);
447*4882a593Smuzhiyun if (retval)
448*4882a593Smuzhiyun return retval;
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun if (unlikely(mask & MAY_WRITE)) {
451*4882a593Smuzhiyun /*
452*4882a593Smuzhiyun * Nobody gets write access to an immutable file.
453*4882a593Smuzhiyun */
454*4882a593Smuzhiyun if (IS_IMMUTABLE(inode))
455*4882a593Smuzhiyun return -EPERM;
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun /*
458*4882a593Smuzhiyun * Updating mtime will likely cause i_uid and i_gid to be
459*4882a593Smuzhiyun * written back improperly if their true value is unknown
460*4882a593Smuzhiyun * to the vfs.
461*4882a593Smuzhiyun */
462*4882a593Smuzhiyun if (HAS_UNMAPPED_ID(inode))
463*4882a593Smuzhiyun return -EACCES;
464*4882a593Smuzhiyun }
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun retval = do_inode_permission(inode, mask);
467*4882a593Smuzhiyun if (retval)
468*4882a593Smuzhiyun return retval;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun retval = devcgroup_inode_permission(inode, mask);
471*4882a593Smuzhiyun if (retval)
472*4882a593Smuzhiyun return retval;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun return security_inode_permission(inode, mask);
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun EXPORT_SYMBOL(inode_permission);
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun /**
479*4882a593Smuzhiyun * path_get - get a reference to a path
480*4882a593Smuzhiyun * @path: path to get the reference to
481*4882a593Smuzhiyun *
482*4882a593Smuzhiyun * Given a path increment the reference count to the dentry and the vfsmount.
483*4882a593Smuzhiyun */
path_get(const struct path * path)484*4882a593Smuzhiyun void path_get(const struct path *path)
485*4882a593Smuzhiyun {
486*4882a593Smuzhiyun mntget(path->mnt);
487*4882a593Smuzhiyun dget(path->dentry);
488*4882a593Smuzhiyun }
489*4882a593Smuzhiyun EXPORT_SYMBOL(path_get);
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun /**
492*4882a593Smuzhiyun * path_put - put a reference to a path
493*4882a593Smuzhiyun * @path: path to put the reference to
494*4882a593Smuzhiyun *
495*4882a593Smuzhiyun * Given a path decrement the reference count to the dentry and the vfsmount.
496*4882a593Smuzhiyun */
path_put(const struct path * path)497*4882a593Smuzhiyun void path_put(const struct path *path)
498*4882a593Smuzhiyun {
499*4882a593Smuzhiyun dput(path->dentry);
500*4882a593Smuzhiyun mntput(path->mnt);
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun EXPORT_SYMBOL(path_put);
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun #define EMBEDDED_LEVELS 2
505*4882a593Smuzhiyun struct nameidata {
506*4882a593Smuzhiyun struct path path;
507*4882a593Smuzhiyun struct qstr last;
508*4882a593Smuzhiyun struct path root;
509*4882a593Smuzhiyun struct inode *inode; /* path.dentry.d_inode */
510*4882a593Smuzhiyun unsigned int flags;
511*4882a593Smuzhiyun unsigned seq, m_seq, r_seq;
512*4882a593Smuzhiyun int last_type;
513*4882a593Smuzhiyun unsigned depth;
514*4882a593Smuzhiyun int total_link_count;
515*4882a593Smuzhiyun struct saved {
516*4882a593Smuzhiyun struct path link;
517*4882a593Smuzhiyun struct delayed_call done;
518*4882a593Smuzhiyun const char *name;
519*4882a593Smuzhiyun unsigned seq;
520*4882a593Smuzhiyun } *stack, internal[EMBEDDED_LEVELS];
521*4882a593Smuzhiyun struct filename *name;
522*4882a593Smuzhiyun struct nameidata *saved;
523*4882a593Smuzhiyun unsigned root_seq;
524*4882a593Smuzhiyun int dfd;
525*4882a593Smuzhiyun kuid_t dir_uid;
526*4882a593Smuzhiyun umode_t dir_mode;
527*4882a593Smuzhiyun } __randomize_layout;
528*4882a593Smuzhiyun
set_nameidata(struct nameidata * p,int dfd,struct filename * name)529*4882a593Smuzhiyun static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
530*4882a593Smuzhiyun {
531*4882a593Smuzhiyun struct nameidata *old = current->nameidata;
532*4882a593Smuzhiyun p->stack = p->internal;
533*4882a593Smuzhiyun p->dfd = dfd;
534*4882a593Smuzhiyun p->name = name;
535*4882a593Smuzhiyun p->path.mnt = NULL;
536*4882a593Smuzhiyun p->path.dentry = NULL;
537*4882a593Smuzhiyun p->total_link_count = old ? old->total_link_count : 0;
538*4882a593Smuzhiyun p->saved = old;
539*4882a593Smuzhiyun current->nameidata = p;
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun
restore_nameidata(void)542*4882a593Smuzhiyun static void restore_nameidata(void)
543*4882a593Smuzhiyun {
544*4882a593Smuzhiyun struct nameidata *now = current->nameidata, *old = now->saved;
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun current->nameidata = old;
547*4882a593Smuzhiyun if (old)
548*4882a593Smuzhiyun old->total_link_count = now->total_link_count;
549*4882a593Smuzhiyun if (now->stack != now->internal)
550*4882a593Smuzhiyun kfree(now->stack);
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun
nd_alloc_stack(struct nameidata * nd)553*4882a593Smuzhiyun static bool nd_alloc_stack(struct nameidata *nd)
554*4882a593Smuzhiyun {
555*4882a593Smuzhiyun struct saved *p;
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
558*4882a593Smuzhiyun nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
559*4882a593Smuzhiyun if (unlikely(!p))
560*4882a593Smuzhiyun return false;
561*4882a593Smuzhiyun memcpy(p, nd->internal, sizeof(nd->internal));
562*4882a593Smuzhiyun nd->stack = p;
563*4882a593Smuzhiyun return true;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun /**
567*4882a593Smuzhiyun * path_connected - Verify that a dentry is below mnt.mnt_root
568*4882a593Smuzhiyun *
569*4882a593Smuzhiyun * Rename can sometimes move a file or directory outside of a bind
570*4882a593Smuzhiyun * mount, path_connected allows those cases to be detected.
571*4882a593Smuzhiyun */
path_connected(struct vfsmount * mnt,struct dentry * dentry)572*4882a593Smuzhiyun static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
573*4882a593Smuzhiyun {
574*4882a593Smuzhiyun struct super_block *sb = mnt->mnt_sb;
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun /* Bind mounts can have disconnected paths */
577*4882a593Smuzhiyun if (mnt->mnt_root == sb->s_root)
578*4882a593Smuzhiyun return true;
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun return is_subdir(dentry, mnt->mnt_root);
581*4882a593Smuzhiyun }
582*4882a593Smuzhiyun
drop_links(struct nameidata * nd)583*4882a593Smuzhiyun static void drop_links(struct nameidata *nd)
584*4882a593Smuzhiyun {
585*4882a593Smuzhiyun int i = nd->depth;
586*4882a593Smuzhiyun while (i--) {
587*4882a593Smuzhiyun struct saved *last = nd->stack + i;
588*4882a593Smuzhiyun do_delayed_call(&last->done);
589*4882a593Smuzhiyun clear_delayed_call(&last->done);
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun }
592*4882a593Smuzhiyun
terminate_walk(struct nameidata * nd)593*4882a593Smuzhiyun static void terminate_walk(struct nameidata *nd)
594*4882a593Smuzhiyun {
595*4882a593Smuzhiyun drop_links(nd);
596*4882a593Smuzhiyun if (!(nd->flags & LOOKUP_RCU)) {
597*4882a593Smuzhiyun int i;
598*4882a593Smuzhiyun path_put(&nd->path);
599*4882a593Smuzhiyun for (i = 0; i < nd->depth; i++)
600*4882a593Smuzhiyun path_put(&nd->stack[i].link);
601*4882a593Smuzhiyun if (nd->flags & LOOKUP_ROOT_GRABBED) {
602*4882a593Smuzhiyun path_put(&nd->root);
603*4882a593Smuzhiyun nd->flags &= ~LOOKUP_ROOT_GRABBED;
604*4882a593Smuzhiyun }
605*4882a593Smuzhiyun } else {
606*4882a593Smuzhiyun nd->flags &= ~LOOKUP_RCU;
607*4882a593Smuzhiyun rcu_read_unlock();
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun nd->depth = 0;
610*4882a593Smuzhiyun nd->path.mnt = NULL;
611*4882a593Smuzhiyun nd->path.dentry = NULL;
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun /* path_put is needed afterwards regardless of success or failure */
__legitimize_path(struct path * path,unsigned seq,unsigned mseq)615*4882a593Smuzhiyun static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
616*4882a593Smuzhiyun {
617*4882a593Smuzhiyun int res = __legitimize_mnt(path->mnt, mseq);
618*4882a593Smuzhiyun if (unlikely(res)) {
619*4882a593Smuzhiyun if (res > 0)
620*4882a593Smuzhiyun path->mnt = NULL;
621*4882a593Smuzhiyun path->dentry = NULL;
622*4882a593Smuzhiyun return false;
623*4882a593Smuzhiyun }
624*4882a593Smuzhiyun if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
625*4882a593Smuzhiyun path->dentry = NULL;
626*4882a593Smuzhiyun return false;
627*4882a593Smuzhiyun }
628*4882a593Smuzhiyun return !read_seqcount_retry(&path->dentry->d_seq, seq);
629*4882a593Smuzhiyun }
630*4882a593Smuzhiyun
legitimize_path(struct nameidata * nd,struct path * path,unsigned seq)631*4882a593Smuzhiyun static inline bool legitimize_path(struct nameidata *nd,
632*4882a593Smuzhiyun struct path *path, unsigned seq)
633*4882a593Smuzhiyun {
634*4882a593Smuzhiyun return __legitimize_path(path, seq, nd->m_seq);
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun
legitimize_links(struct nameidata * nd)637*4882a593Smuzhiyun static bool legitimize_links(struct nameidata *nd)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun int i;
640*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_CACHED)) {
641*4882a593Smuzhiyun drop_links(nd);
642*4882a593Smuzhiyun nd->depth = 0;
643*4882a593Smuzhiyun return false;
644*4882a593Smuzhiyun }
645*4882a593Smuzhiyun for (i = 0; i < nd->depth; i++) {
646*4882a593Smuzhiyun struct saved *last = nd->stack + i;
647*4882a593Smuzhiyun if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
648*4882a593Smuzhiyun drop_links(nd);
649*4882a593Smuzhiyun nd->depth = i + 1;
650*4882a593Smuzhiyun return false;
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun }
653*4882a593Smuzhiyun return true;
654*4882a593Smuzhiyun }
655*4882a593Smuzhiyun
legitimize_root(struct nameidata * nd)656*4882a593Smuzhiyun static bool legitimize_root(struct nameidata *nd)
657*4882a593Smuzhiyun {
658*4882a593Smuzhiyun /*
659*4882a593Smuzhiyun * For scoped-lookups (where nd->root has been zeroed), we need to
660*4882a593Smuzhiyun * restart the whole lookup from scratch -- because set_root() is wrong
661*4882a593Smuzhiyun * for these lookups (nd->dfd is the root, not the filesystem root).
662*4882a593Smuzhiyun */
663*4882a593Smuzhiyun if (!nd->root.mnt && (nd->flags & LOOKUP_IS_SCOPED))
664*4882a593Smuzhiyun return false;
665*4882a593Smuzhiyun /* Nothing to do if nd->root is zero or is managed by the VFS user. */
666*4882a593Smuzhiyun if (!nd->root.mnt || (nd->flags & LOOKUP_ROOT))
667*4882a593Smuzhiyun return true;
668*4882a593Smuzhiyun nd->flags |= LOOKUP_ROOT_GRABBED;
669*4882a593Smuzhiyun return legitimize_path(nd, &nd->root, nd->root_seq);
670*4882a593Smuzhiyun }
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun /*
673*4882a593Smuzhiyun * Path walking has 2 modes, rcu-walk and ref-walk (see
674*4882a593Smuzhiyun * Documentation/filesystems/path-lookup.txt). In situations when we can't
675*4882a593Smuzhiyun * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
676*4882a593Smuzhiyun * normal reference counts on dentries and vfsmounts to transition to ref-walk
677*4882a593Smuzhiyun * mode. Refcounts are grabbed at the last known good point before rcu-walk
678*4882a593Smuzhiyun * got stuck, so ref-walk may continue from there. If this is not successful
679*4882a593Smuzhiyun * (eg. a seqcount has changed), then failure is returned and it's up to caller
680*4882a593Smuzhiyun * to restart the path walk from the beginning in ref-walk mode.
681*4882a593Smuzhiyun */
682*4882a593Smuzhiyun
683*4882a593Smuzhiyun /**
684*4882a593Smuzhiyun * try_to_unlazy - try to switch to ref-walk mode.
685*4882a593Smuzhiyun * @nd: nameidata pathwalk data
686*4882a593Smuzhiyun * Returns: true on success, false on failure
687*4882a593Smuzhiyun *
688*4882a593Smuzhiyun * try_to_unlazy attempts to legitimize the current nd->path and nd->root
689*4882a593Smuzhiyun * for ref-walk mode.
690*4882a593Smuzhiyun * Must be called from rcu-walk context.
691*4882a593Smuzhiyun * Nothing should touch nameidata between try_to_unlazy() failure and
692*4882a593Smuzhiyun * terminate_walk().
693*4882a593Smuzhiyun */
try_to_unlazy(struct nameidata * nd)694*4882a593Smuzhiyun static bool try_to_unlazy(struct nameidata *nd)
695*4882a593Smuzhiyun {
696*4882a593Smuzhiyun struct dentry *parent = nd->path.dentry;
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun BUG_ON(!(nd->flags & LOOKUP_RCU));
699*4882a593Smuzhiyun
700*4882a593Smuzhiyun nd->flags &= ~LOOKUP_RCU;
701*4882a593Smuzhiyun if (unlikely(!legitimize_links(nd)))
702*4882a593Smuzhiyun goto out1;
703*4882a593Smuzhiyun if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
704*4882a593Smuzhiyun goto out;
705*4882a593Smuzhiyun if (unlikely(!legitimize_root(nd)))
706*4882a593Smuzhiyun goto out;
707*4882a593Smuzhiyun rcu_read_unlock();
708*4882a593Smuzhiyun BUG_ON(nd->inode != parent->d_inode);
709*4882a593Smuzhiyun return true;
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun out1:
712*4882a593Smuzhiyun nd->path.mnt = NULL;
713*4882a593Smuzhiyun nd->path.dentry = NULL;
714*4882a593Smuzhiyun out:
715*4882a593Smuzhiyun rcu_read_unlock();
716*4882a593Smuzhiyun return false;
717*4882a593Smuzhiyun }
718*4882a593Smuzhiyun
719*4882a593Smuzhiyun /**
720*4882a593Smuzhiyun * try_to_unlazy_next - try to switch to ref-walk mode.
721*4882a593Smuzhiyun * @nd: nameidata pathwalk data
722*4882a593Smuzhiyun * @dentry: next dentry to step into
723*4882a593Smuzhiyun * @seq: seq number to check @dentry against
724*4882a593Smuzhiyun * Returns: true on success, false on failure
725*4882a593Smuzhiyun *
726*4882a593Smuzhiyun * Similar to to try_to_unlazy(), but here we have the next dentry already
727*4882a593Smuzhiyun * picked by rcu-walk and want to legitimize that in addition to the current
728*4882a593Smuzhiyun * nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context.
729*4882a593Smuzhiyun * Nothing should touch nameidata between try_to_unlazy_next() failure and
730*4882a593Smuzhiyun * terminate_walk().
731*4882a593Smuzhiyun */
try_to_unlazy_next(struct nameidata * nd,struct dentry * dentry,unsigned seq)732*4882a593Smuzhiyun static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq)
733*4882a593Smuzhiyun {
734*4882a593Smuzhiyun BUG_ON(!(nd->flags & LOOKUP_RCU));
735*4882a593Smuzhiyun
736*4882a593Smuzhiyun nd->flags &= ~LOOKUP_RCU;
737*4882a593Smuzhiyun if (unlikely(!legitimize_links(nd)))
738*4882a593Smuzhiyun goto out2;
739*4882a593Smuzhiyun if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
740*4882a593Smuzhiyun goto out2;
741*4882a593Smuzhiyun if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
742*4882a593Smuzhiyun goto out1;
743*4882a593Smuzhiyun
744*4882a593Smuzhiyun /*
745*4882a593Smuzhiyun * We need to move both the parent and the dentry from the RCU domain
746*4882a593Smuzhiyun * to be properly refcounted. And the sequence number in the dentry
747*4882a593Smuzhiyun * validates *both* dentry counters, since we checked the sequence
748*4882a593Smuzhiyun * number of the parent after we got the child sequence number. So we
749*4882a593Smuzhiyun * know the parent must still be valid if the child sequence number is
750*4882a593Smuzhiyun */
751*4882a593Smuzhiyun if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
752*4882a593Smuzhiyun goto out;
753*4882a593Smuzhiyun if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
754*4882a593Smuzhiyun goto out_dput;
755*4882a593Smuzhiyun /*
756*4882a593Smuzhiyun * Sequence counts matched. Now make sure that the root is
757*4882a593Smuzhiyun * still valid and get it if required.
758*4882a593Smuzhiyun */
759*4882a593Smuzhiyun if (unlikely(!legitimize_root(nd)))
760*4882a593Smuzhiyun goto out_dput;
761*4882a593Smuzhiyun rcu_read_unlock();
762*4882a593Smuzhiyun return true;
763*4882a593Smuzhiyun
764*4882a593Smuzhiyun out2:
765*4882a593Smuzhiyun nd->path.mnt = NULL;
766*4882a593Smuzhiyun out1:
767*4882a593Smuzhiyun nd->path.dentry = NULL;
768*4882a593Smuzhiyun out:
769*4882a593Smuzhiyun rcu_read_unlock();
770*4882a593Smuzhiyun return false;
771*4882a593Smuzhiyun out_dput:
772*4882a593Smuzhiyun rcu_read_unlock();
773*4882a593Smuzhiyun dput(dentry);
774*4882a593Smuzhiyun return false;
775*4882a593Smuzhiyun }
776*4882a593Smuzhiyun
d_revalidate(struct dentry * dentry,unsigned int flags)777*4882a593Smuzhiyun static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
778*4882a593Smuzhiyun {
779*4882a593Smuzhiyun if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
780*4882a593Smuzhiyun return dentry->d_op->d_revalidate(dentry, flags);
781*4882a593Smuzhiyun else
782*4882a593Smuzhiyun return 1;
783*4882a593Smuzhiyun }
784*4882a593Smuzhiyun
785*4882a593Smuzhiyun #define INIT_PATH_SIZE 64
786*4882a593Smuzhiyun
success_walk_trace(struct nameidata * nd)787*4882a593Smuzhiyun static void success_walk_trace(struct nameidata *nd)
788*4882a593Smuzhiyun {
789*4882a593Smuzhiyun struct path *pt = &nd->path;
790*4882a593Smuzhiyun struct inode *i = nd->inode;
791*4882a593Smuzhiyun char buf[INIT_PATH_SIZE], *try_buf;
792*4882a593Smuzhiyun int cur_path_size;
793*4882a593Smuzhiyun char *p;
794*4882a593Smuzhiyun
795*4882a593Smuzhiyun /* When eBPF/ tracepoint is disabled, keep overhead low. */
796*4882a593Smuzhiyun if (!trace_inodepath_enabled())
797*4882a593Smuzhiyun return;
798*4882a593Smuzhiyun
799*4882a593Smuzhiyun /* First try stack allocated buffer. */
800*4882a593Smuzhiyun try_buf = buf;
801*4882a593Smuzhiyun cur_path_size = INIT_PATH_SIZE;
802*4882a593Smuzhiyun
803*4882a593Smuzhiyun while (cur_path_size <= PATH_MAX) {
804*4882a593Smuzhiyun /* Free previous heap allocation if we are now trying
805*4882a593Smuzhiyun * a second or later heap allocation.
806*4882a593Smuzhiyun */
807*4882a593Smuzhiyun if (try_buf != buf)
808*4882a593Smuzhiyun kfree(try_buf);
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun /* All but the first alloc are on the heap. */
811*4882a593Smuzhiyun if (cur_path_size != INIT_PATH_SIZE) {
812*4882a593Smuzhiyun try_buf = kmalloc(cur_path_size, GFP_KERNEL);
813*4882a593Smuzhiyun if (!try_buf) {
814*4882a593Smuzhiyun try_buf = buf;
815*4882a593Smuzhiyun sprintf(try_buf, "error:buf_alloc_failed");
816*4882a593Smuzhiyun break;
817*4882a593Smuzhiyun }
818*4882a593Smuzhiyun }
819*4882a593Smuzhiyun
820*4882a593Smuzhiyun p = d_path(pt, try_buf, cur_path_size);
821*4882a593Smuzhiyun
822*4882a593Smuzhiyun if (!IS_ERR(p)) {
823*4882a593Smuzhiyun char *end = mangle_path(try_buf, p, "\n");
824*4882a593Smuzhiyun
825*4882a593Smuzhiyun if (end) {
826*4882a593Smuzhiyun try_buf[end - try_buf] = 0;
827*4882a593Smuzhiyun break;
828*4882a593Smuzhiyun } else {
829*4882a593Smuzhiyun /* On mangle errors, double path size
830*4882a593Smuzhiyun * till PATH_MAX.
831*4882a593Smuzhiyun */
832*4882a593Smuzhiyun cur_path_size = cur_path_size << 1;
833*4882a593Smuzhiyun continue;
834*4882a593Smuzhiyun }
835*4882a593Smuzhiyun }
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun if (PTR_ERR(p) == -ENAMETOOLONG) {
838*4882a593Smuzhiyun /* If d_path complains that name is too long,
839*4882a593Smuzhiyun * then double path size till PATH_MAX.
840*4882a593Smuzhiyun */
841*4882a593Smuzhiyun cur_path_size = cur_path_size << 1;
842*4882a593Smuzhiyun continue;
843*4882a593Smuzhiyun }
844*4882a593Smuzhiyun
845*4882a593Smuzhiyun sprintf(try_buf, "error:d_path_failed_%lu",
846*4882a593Smuzhiyun -1 * PTR_ERR(p));
847*4882a593Smuzhiyun break;
848*4882a593Smuzhiyun }
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun if (cur_path_size > PATH_MAX)
851*4882a593Smuzhiyun sprintf(try_buf, "error:d_path_name_too_long");
852*4882a593Smuzhiyun
853*4882a593Smuzhiyun trace_inodepath(i, try_buf);
854*4882a593Smuzhiyun
855*4882a593Smuzhiyun if (try_buf != buf)
856*4882a593Smuzhiyun kfree(try_buf);
857*4882a593Smuzhiyun return;
858*4882a593Smuzhiyun }
859*4882a593Smuzhiyun
860*4882a593Smuzhiyun /**
861*4882a593Smuzhiyun * complete_walk - successful completion of path walk
862*4882a593Smuzhiyun * @nd: pointer nameidata
863*4882a593Smuzhiyun *
864*4882a593Smuzhiyun * If we had been in RCU mode, drop out of it and legitimize nd->path.
865*4882a593Smuzhiyun * Revalidate the final result, unless we'd already done that during
866*4882a593Smuzhiyun * the path walk or the filesystem doesn't ask for it. Return 0 on
867*4882a593Smuzhiyun * success, -error on failure. In case of failure caller does not
868*4882a593Smuzhiyun * need to drop nd->path.
869*4882a593Smuzhiyun */
complete_walk(struct nameidata * nd)870*4882a593Smuzhiyun static int complete_walk(struct nameidata *nd)
871*4882a593Smuzhiyun {
872*4882a593Smuzhiyun struct dentry *dentry = nd->path.dentry;
873*4882a593Smuzhiyun int status;
874*4882a593Smuzhiyun
875*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
876*4882a593Smuzhiyun /*
877*4882a593Smuzhiyun * We don't want to zero nd->root for scoped-lookups or
878*4882a593Smuzhiyun * externally-managed nd->root.
879*4882a593Smuzhiyun */
880*4882a593Smuzhiyun if (!(nd->flags & (LOOKUP_ROOT | LOOKUP_IS_SCOPED)))
881*4882a593Smuzhiyun nd->root.mnt = NULL;
882*4882a593Smuzhiyun nd->flags &= ~LOOKUP_CACHED;
883*4882a593Smuzhiyun if (!try_to_unlazy(nd))
884*4882a593Smuzhiyun return -ECHILD;
885*4882a593Smuzhiyun }
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
888*4882a593Smuzhiyun /*
889*4882a593Smuzhiyun * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
890*4882a593Smuzhiyun * ever step outside the root during lookup" and should already
891*4882a593Smuzhiyun * be guaranteed by the rest of namei, we want to avoid a namei
892*4882a593Smuzhiyun * BUG resulting in userspace being given a path that was not
893*4882a593Smuzhiyun * scoped within the root at some point during the lookup.
894*4882a593Smuzhiyun *
895*4882a593Smuzhiyun * So, do a final sanity-check to make sure that in the
896*4882a593Smuzhiyun * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
897*4882a593Smuzhiyun * we won't silently return an fd completely outside of the
898*4882a593Smuzhiyun * requested root to userspace.
899*4882a593Smuzhiyun *
900*4882a593Smuzhiyun * Userspace could move the path outside the root after this
901*4882a593Smuzhiyun * check, but as discussed elsewhere this is not a concern (the
902*4882a593Smuzhiyun * resolved file was inside the root at some point).
903*4882a593Smuzhiyun */
904*4882a593Smuzhiyun if (!path_is_under(&nd->path, &nd->root))
905*4882a593Smuzhiyun return -EXDEV;
906*4882a593Smuzhiyun }
907*4882a593Smuzhiyun
908*4882a593Smuzhiyun if (likely(!(nd->flags & LOOKUP_JUMPED))) {
909*4882a593Smuzhiyun success_walk_trace(nd);
910*4882a593Smuzhiyun return 0;
911*4882a593Smuzhiyun }
912*4882a593Smuzhiyun
913*4882a593Smuzhiyun if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) {
914*4882a593Smuzhiyun success_walk_trace(nd);
915*4882a593Smuzhiyun return 0;
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun
918*4882a593Smuzhiyun status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
919*4882a593Smuzhiyun if (status > 0) {
920*4882a593Smuzhiyun success_walk_trace(nd);
921*4882a593Smuzhiyun return 0;
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun
924*4882a593Smuzhiyun if (!status)
925*4882a593Smuzhiyun status = -ESTALE;
926*4882a593Smuzhiyun
927*4882a593Smuzhiyun return status;
928*4882a593Smuzhiyun }
929*4882a593Smuzhiyun
set_root(struct nameidata * nd)930*4882a593Smuzhiyun static int set_root(struct nameidata *nd)
931*4882a593Smuzhiyun {
932*4882a593Smuzhiyun struct fs_struct *fs = current->fs;
933*4882a593Smuzhiyun
934*4882a593Smuzhiyun /*
935*4882a593Smuzhiyun * Jumping to the real root in a scoped-lookup is a BUG in namei, but we
936*4882a593Smuzhiyun * still have to ensure it doesn't happen because it will cause a breakout
937*4882a593Smuzhiyun * from the dirfd.
938*4882a593Smuzhiyun */
939*4882a593Smuzhiyun if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED))
940*4882a593Smuzhiyun return -ENOTRECOVERABLE;
941*4882a593Smuzhiyun
942*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
943*4882a593Smuzhiyun unsigned seq;
944*4882a593Smuzhiyun
945*4882a593Smuzhiyun do {
946*4882a593Smuzhiyun seq = read_seqcount_begin(&fs->seq);
947*4882a593Smuzhiyun nd->root = fs->root;
948*4882a593Smuzhiyun nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
949*4882a593Smuzhiyun } while (read_seqcount_retry(&fs->seq, seq));
950*4882a593Smuzhiyun } else {
951*4882a593Smuzhiyun get_fs_root(fs, &nd->root);
952*4882a593Smuzhiyun nd->flags |= LOOKUP_ROOT_GRABBED;
953*4882a593Smuzhiyun }
954*4882a593Smuzhiyun return 0;
955*4882a593Smuzhiyun }
956*4882a593Smuzhiyun
nd_jump_root(struct nameidata * nd)957*4882a593Smuzhiyun static int nd_jump_root(struct nameidata *nd)
958*4882a593Smuzhiyun {
959*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_BENEATH))
960*4882a593Smuzhiyun return -EXDEV;
961*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
962*4882a593Smuzhiyun /* Absolute path arguments to path_init() are allowed. */
963*4882a593Smuzhiyun if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
964*4882a593Smuzhiyun return -EXDEV;
965*4882a593Smuzhiyun }
966*4882a593Smuzhiyun if (!nd->root.mnt) {
967*4882a593Smuzhiyun int error = set_root(nd);
968*4882a593Smuzhiyun if (error)
969*4882a593Smuzhiyun return error;
970*4882a593Smuzhiyun }
971*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
972*4882a593Smuzhiyun struct dentry *d;
973*4882a593Smuzhiyun nd->path = nd->root;
974*4882a593Smuzhiyun d = nd->path.dentry;
975*4882a593Smuzhiyun nd->inode = d->d_inode;
976*4882a593Smuzhiyun nd->seq = nd->root_seq;
977*4882a593Smuzhiyun if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
978*4882a593Smuzhiyun return -ECHILD;
979*4882a593Smuzhiyun } else {
980*4882a593Smuzhiyun path_put(&nd->path);
981*4882a593Smuzhiyun nd->path = nd->root;
982*4882a593Smuzhiyun path_get(&nd->path);
983*4882a593Smuzhiyun nd->inode = nd->path.dentry->d_inode;
984*4882a593Smuzhiyun }
985*4882a593Smuzhiyun nd->flags |= LOOKUP_JUMPED;
986*4882a593Smuzhiyun return 0;
987*4882a593Smuzhiyun }
988*4882a593Smuzhiyun
989*4882a593Smuzhiyun /*
990*4882a593Smuzhiyun * Helper to directly jump to a known parsed path from ->get_link,
991*4882a593Smuzhiyun * caller must have taken a reference to path beforehand.
992*4882a593Smuzhiyun */
nd_jump_link(struct path * path)993*4882a593Smuzhiyun int nd_jump_link(struct path *path)
994*4882a593Smuzhiyun {
995*4882a593Smuzhiyun int error = -ELOOP;
996*4882a593Smuzhiyun struct nameidata *nd = current->nameidata;
997*4882a593Smuzhiyun
998*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
999*4882a593Smuzhiyun goto err;
1000*4882a593Smuzhiyun
1001*4882a593Smuzhiyun error = -EXDEV;
1002*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
1003*4882a593Smuzhiyun if (nd->path.mnt != path->mnt)
1004*4882a593Smuzhiyun goto err;
1005*4882a593Smuzhiyun }
1006*4882a593Smuzhiyun /* Not currently safe for scoped-lookups. */
1007*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
1008*4882a593Smuzhiyun goto err;
1009*4882a593Smuzhiyun
1010*4882a593Smuzhiyun path_put(&nd->path);
1011*4882a593Smuzhiyun nd->path = *path;
1012*4882a593Smuzhiyun nd->inode = nd->path.dentry->d_inode;
1013*4882a593Smuzhiyun nd->flags |= LOOKUP_JUMPED;
1014*4882a593Smuzhiyun return 0;
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun err:
1017*4882a593Smuzhiyun path_put(path);
1018*4882a593Smuzhiyun return error;
1019*4882a593Smuzhiyun }
1020*4882a593Smuzhiyun
put_link(struct nameidata * nd)1021*4882a593Smuzhiyun static inline void put_link(struct nameidata *nd)
1022*4882a593Smuzhiyun {
1023*4882a593Smuzhiyun struct saved *last = nd->stack + --nd->depth;
1024*4882a593Smuzhiyun do_delayed_call(&last->done);
1025*4882a593Smuzhiyun if (!(nd->flags & LOOKUP_RCU))
1026*4882a593Smuzhiyun path_put(&last->link);
1027*4882a593Smuzhiyun }
1028*4882a593Smuzhiyun
1029*4882a593Smuzhiyun int sysctl_protected_symlinks __read_mostly = 0;
1030*4882a593Smuzhiyun int sysctl_protected_hardlinks __read_mostly = 0;
1031*4882a593Smuzhiyun int sysctl_protected_fifos __read_mostly;
1032*4882a593Smuzhiyun int sysctl_protected_regular __read_mostly;
1033*4882a593Smuzhiyun
1034*4882a593Smuzhiyun /**
1035*4882a593Smuzhiyun * may_follow_link - Check symlink following for unsafe situations
1036*4882a593Smuzhiyun * @nd: nameidata pathwalk data
1037*4882a593Smuzhiyun *
1038*4882a593Smuzhiyun * In the case of the sysctl_protected_symlinks sysctl being enabled,
1039*4882a593Smuzhiyun * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
1040*4882a593Smuzhiyun * in a sticky world-writable directory. This is to protect privileged
1041*4882a593Smuzhiyun * processes from failing races against path names that may change out
1042*4882a593Smuzhiyun * from under them by way of other users creating malicious symlinks.
1043*4882a593Smuzhiyun * It will permit symlinks to be followed only when outside a sticky
1044*4882a593Smuzhiyun * world-writable directory, or when the uid of the symlink and follower
1045*4882a593Smuzhiyun * match, or when the directory owner matches the symlink's owner.
1046*4882a593Smuzhiyun *
1047*4882a593Smuzhiyun * Returns 0 if following the symlink is allowed, -ve on error.
1048*4882a593Smuzhiyun */
may_follow_link(struct nameidata * nd,const struct inode * inode)1049*4882a593Smuzhiyun static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
1050*4882a593Smuzhiyun {
1051*4882a593Smuzhiyun if (!sysctl_protected_symlinks)
1052*4882a593Smuzhiyun return 0;
1053*4882a593Smuzhiyun
1054*4882a593Smuzhiyun /* Allowed if owner and follower match. */
1055*4882a593Smuzhiyun if (uid_eq(current_cred()->fsuid, inode->i_uid))
1056*4882a593Smuzhiyun return 0;
1057*4882a593Smuzhiyun
1058*4882a593Smuzhiyun /* Allowed if parent directory not sticky and world-writable. */
1059*4882a593Smuzhiyun if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
1060*4882a593Smuzhiyun return 0;
1061*4882a593Smuzhiyun
1062*4882a593Smuzhiyun /* Allowed if parent directory and link owner match. */
1063*4882a593Smuzhiyun if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
1064*4882a593Smuzhiyun return 0;
1065*4882a593Smuzhiyun
1066*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU)
1067*4882a593Smuzhiyun return -ECHILD;
1068*4882a593Smuzhiyun
1069*4882a593Smuzhiyun audit_inode(nd->name, nd->stack[0].link.dentry, 0);
1070*4882a593Smuzhiyun audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link");
1071*4882a593Smuzhiyun return -EACCES;
1072*4882a593Smuzhiyun }
1073*4882a593Smuzhiyun
1074*4882a593Smuzhiyun /**
1075*4882a593Smuzhiyun * safe_hardlink_source - Check for safe hardlink conditions
1076*4882a593Smuzhiyun * @inode: the source inode to hardlink from
1077*4882a593Smuzhiyun *
1078*4882a593Smuzhiyun * Return false if at least one of the following conditions:
1079*4882a593Smuzhiyun * - inode is not a regular file
1080*4882a593Smuzhiyun * - inode is setuid
1081*4882a593Smuzhiyun * - inode is setgid and group-exec
1082*4882a593Smuzhiyun * - access failure for read and write
1083*4882a593Smuzhiyun *
1084*4882a593Smuzhiyun * Otherwise returns true.
1085*4882a593Smuzhiyun */
safe_hardlink_source(struct inode * inode)1086*4882a593Smuzhiyun static bool safe_hardlink_source(struct inode *inode)
1087*4882a593Smuzhiyun {
1088*4882a593Smuzhiyun umode_t mode = inode->i_mode;
1089*4882a593Smuzhiyun
1090*4882a593Smuzhiyun /* Special files should not get pinned to the filesystem. */
1091*4882a593Smuzhiyun if (!S_ISREG(mode))
1092*4882a593Smuzhiyun return false;
1093*4882a593Smuzhiyun
1094*4882a593Smuzhiyun /* Setuid files should not get pinned to the filesystem. */
1095*4882a593Smuzhiyun if (mode & S_ISUID)
1096*4882a593Smuzhiyun return false;
1097*4882a593Smuzhiyun
1098*4882a593Smuzhiyun /* Executable setgid files should not get pinned to the filesystem. */
1099*4882a593Smuzhiyun if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
1100*4882a593Smuzhiyun return false;
1101*4882a593Smuzhiyun
1102*4882a593Smuzhiyun /* Hardlinking to unreadable or unwritable sources is dangerous. */
1103*4882a593Smuzhiyun if (inode_permission(inode, MAY_READ | MAY_WRITE))
1104*4882a593Smuzhiyun return false;
1105*4882a593Smuzhiyun
1106*4882a593Smuzhiyun return true;
1107*4882a593Smuzhiyun }
1108*4882a593Smuzhiyun
1109*4882a593Smuzhiyun /**
1110*4882a593Smuzhiyun * may_linkat - Check permissions for creating a hardlink
1111*4882a593Smuzhiyun * @link: the source to hardlink from
1112*4882a593Smuzhiyun *
1113*4882a593Smuzhiyun * Block hardlink when all of:
1114*4882a593Smuzhiyun * - sysctl_protected_hardlinks enabled
1115*4882a593Smuzhiyun * - fsuid does not match inode
1116*4882a593Smuzhiyun * - hardlink source is unsafe (see safe_hardlink_source() above)
1117*4882a593Smuzhiyun * - not CAP_FOWNER in a namespace with the inode owner uid mapped
1118*4882a593Smuzhiyun *
1119*4882a593Smuzhiyun * Returns 0 if successful, -ve on error.
1120*4882a593Smuzhiyun */
may_linkat(struct path * link)1121*4882a593Smuzhiyun int may_linkat(struct path *link)
1122*4882a593Smuzhiyun {
1123*4882a593Smuzhiyun struct inode *inode = link->dentry->d_inode;
1124*4882a593Smuzhiyun
1125*4882a593Smuzhiyun /* Inode writeback is not safe when the uid or gid are invalid. */
1126*4882a593Smuzhiyun if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
1127*4882a593Smuzhiyun return -EOVERFLOW;
1128*4882a593Smuzhiyun
1129*4882a593Smuzhiyun if (!sysctl_protected_hardlinks)
1130*4882a593Smuzhiyun return 0;
1131*4882a593Smuzhiyun
1132*4882a593Smuzhiyun /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
1133*4882a593Smuzhiyun * otherwise, it must be a safe source.
1134*4882a593Smuzhiyun */
1135*4882a593Smuzhiyun if (safe_hardlink_source(inode) || inode_owner_or_capable(inode))
1136*4882a593Smuzhiyun return 0;
1137*4882a593Smuzhiyun
1138*4882a593Smuzhiyun audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
1139*4882a593Smuzhiyun return -EPERM;
1140*4882a593Smuzhiyun }
1141*4882a593Smuzhiyun
1142*4882a593Smuzhiyun /**
1143*4882a593Smuzhiyun * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
1144*4882a593Smuzhiyun * should be allowed, or not, on files that already
1145*4882a593Smuzhiyun * exist.
1146*4882a593Smuzhiyun * @dir_mode: mode bits of directory
1147*4882a593Smuzhiyun * @dir_uid: owner of directory
1148*4882a593Smuzhiyun * @inode: the inode of the file to open
1149*4882a593Smuzhiyun *
1150*4882a593Smuzhiyun * Block an O_CREAT open of a FIFO (or a regular file) when:
1151*4882a593Smuzhiyun * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
1152*4882a593Smuzhiyun * - the file already exists
1153*4882a593Smuzhiyun * - we are in a sticky directory
1154*4882a593Smuzhiyun * - we don't own the file
1155*4882a593Smuzhiyun * - the owner of the directory doesn't own the file
1156*4882a593Smuzhiyun * - the directory is world writable
1157*4882a593Smuzhiyun * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
1158*4882a593Smuzhiyun * the directory doesn't have to be world writable: being group writable will
1159*4882a593Smuzhiyun * be enough.
1160*4882a593Smuzhiyun *
1161*4882a593Smuzhiyun * Returns 0 if the open is allowed, -ve on error.
1162*4882a593Smuzhiyun */
may_create_in_sticky(umode_t dir_mode,kuid_t dir_uid,struct inode * const inode)1163*4882a593Smuzhiyun static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
1164*4882a593Smuzhiyun struct inode * const inode)
1165*4882a593Smuzhiyun {
1166*4882a593Smuzhiyun if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
1167*4882a593Smuzhiyun (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
1168*4882a593Smuzhiyun likely(!(dir_mode & S_ISVTX)) ||
1169*4882a593Smuzhiyun uid_eq(inode->i_uid, dir_uid) ||
1170*4882a593Smuzhiyun uid_eq(current_fsuid(), inode->i_uid))
1171*4882a593Smuzhiyun return 0;
1172*4882a593Smuzhiyun
1173*4882a593Smuzhiyun if (likely(dir_mode & 0002) ||
1174*4882a593Smuzhiyun (dir_mode & 0020 &&
1175*4882a593Smuzhiyun ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
1176*4882a593Smuzhiyun (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
1177*4882a593Smuzhiyun const char *operation = S_ISFIFO(inode->i_mode) ?
1178*4882a593Smuzhiyun "sticky_create_fifo" :
1179*4882a593Smuzhiyun "sticky_create_regular";
1180*4882a593Smuzhiyun audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
1181*4882a593Smuzhiyun return -EACCES;
1182*4882a593Smuzhiyun }
1183*4882a593Smuzhiyun return 0;
1184*4882a593Smuzhiyun }
1185*4882a593Smuzhiyun
1186*4882a593Smuzhiyun /*
1187*4882a593Smuzhiyun * follow_up - Find the mountpoint of path's vfsmount
1188*4882a593Smuzhiyun *
1189*4882a593Smuzhiyun * Given a path, find the mountpoint of its source file system.
1190*4882a593Smuzhiyun * Replace @path with the path of the mountpoint in the parent mount.
1191*4882a593Smuzhiyun * Up is towards /.
1192*4882a593Smuzhiyun *
1193*4882a593Smuzhiyun * Return 1 if we went up a level and 0 if we were already at the
1194*4882a593Smuzhiyun * root.
1195*4882a593Smuzhiyun */
follow_up(struct path * path)1196*4882a593Smuzhiyun int follow_up(struct path *path)
1197*4882a593Smuzhiyun {
1198*4882a593Smuzhiyun struct mount *mnt = real_mount(path->mnt);
1199*4882a593Smuzhiyun struct mount *parent;
1200*4882a593Smuzhiyun struct dentry *mountpoint;
1201*4882a593Smuzhiyun
1202*4882a593Smuzhiyun read_seqlock_excl(&mount_lock);
1203*4882a593Smuzhiyun parent = mnt->mnt_parent;
1204*4882a593Smuzhiyun if (parent == mnt) {
1205*4882a593Smuzhiyun read_sequnlock_excl(&mount_lock);
1206*4882a593Smuzhiyun return 0;
1207*4882a593Smuzhiyun }
1208*4882a593Smuzhiyun mntget(&parent->mnt);
1209*4882a593Smuzhiyun mountpoint = dget(mnt->mnt_mountpoint);
1210*4882a593Smuzhiyun read_sequnlock_excl(&mount_lock);
1211*4882a593Smuzhiyun dput(path->dentry);
1212*4882a593Smuzhiyun path->dentry = mountpoint;
1213*4882a593Smuzhiyun mntput(path->mnt);
1214*4882a593Smuzhiyun path->mnt = &parent->mnt;
1215*4882a593Smuzhiyun return 1;
1216*4882a593Smuzhiyun }
1217*4882a593Smuzhiyun EXPORT_SYMBOL(follow_up);
1218*4882a593Smuzhiyun
choose_mountpoint_rcu(struct mount * m,const struct path * root,struct path * path,unsigned * seqp)1219*4882a593Smuzhiyun static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
1220*4882a593Smuzhiyun struct path *path, unsigned *seqp)
1221*4882a593Smuzhiyun {
1222*4882a593Smuzhiyun while (mnt_has_parent(m)) {
1223*4882a593Smuzhiyun struct dentry *mountpoint = m->mnt_mountpoint;
1224*4882a593Smuzhiyun
1225*4882a593Smuzhiyun m = m->mnt_parent;
1226*4882a593Smuzhiyun if (unlikely(root->dentry == mountpoint &&
1227*4882a593Smuzhiyun root->mnt == &m->mnt))
1228*4882a593Smuzhiyun break;
1229*4882a593Smuzhiyun if (mountpoint != m->mnt.mnt_root) {
1230*4882a593Smuzhiyun path->mnt = &m->mnt;
1231*4882a593Smuzhiyun path->dentry = mountpoint;
1232*4882a593Smuzhiyun *seqp = read_seqcount_begin(&mountpoint->d_seq);
1233*4882a593Smuzhiyun return true;
1234*4882a593Smuzhiyun }
1235*4882a593Smuzhiyun }
1236*4882a593Smuzhiyun return false;
1237*4882a593Smuzhiyun }
1238*4882a593Smuzhiyun
choose_mountpoint(struct mount * m,const struct path * root,struct path * path)1239*4882a593Smuzhiyun static bool choose_mountpoint(struct mount *m, const struct path *root,
1240*4882a593Smuzhiyun struct path *path)
1241*4882a593Smuzhiyun {
1242*4882a593Smuzhiyun bool found;
1243*4882a593Smuzhiyun
1244*4882a593Smuzhiyun rcu_read_lock();
1245*4882a593Smuzhiyun while (1) {
1246*4882a593Smuzhiyun unsigned seq, mseq = read_seqbegin(&mount_lock);
1247*4882a593Smuzhiyun
1248*4882a593Smuzhiyun found = choose_mountpoint_rcu(m, root, path, &seq);
1249*4882a593Smuzhiyun if (unlikely(!found)) {
1250*4882a593Smuzhiyun if (!read_seqretry(&mount_lock, mseq))
1251*4882a593Smuzhiyun break;
1252*4882a593Smuzhiyun } else {
1253*4882a593Smuzhiyun if (likely(__legitimize_path(path, seq, mseq)))
1254*4882a593Smuzhiyun break;
1255*4882a593Smuzhiyun rcu_read_unlock();
1256*4882a593Smuzhiyun path_put(path);
1257*4882a593Smuzhiyun rcu_read_lock();
1258*4882a593Smuzhiyun }
1259*4882a593Smuzhiyun }
1260*4882a593Smuzhiyun rcu_read_unlock();
1261*4882a593Smuzhiyun return found;
1262*4882a593Smuzhiyun }
1263*4882a593Smuzhiyun
1264*4882a593Smuzhiyun /*
1265*4882a593Smuzhiyun * Perform an automount
1266*4882a593Smuzhiyun * - return -EISDIR to tell follow_managed() to stop and return the path we
1267*4882a593Smuzhiyun * were called with.
1268*4882a593Smuzhiyun */
follow_automount(struct path * path,int * count,unsigned lookup_flags)1269*4882a593Smuzhiyun static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
1270*4882a593Smuzhiyun {
1271*4882a593Smuzhiyun struct dentry *dentry = path->dentry;
1272*4882a593Smuzhiyun
1273*4882a593Smuzhiyun /* We don't want to mount if someone's just doing a stat -
1274*4882a593Smuzhiyun * unless they're stat'ing a directory and appended a '/' to
1275*4882a593Smuzhiyun * the name.
1276*4882a593Smuzhiyun *
1277*4882a593Smuzhiyun * We do, however, want to mount if someone wants to open or
1278*4882a593Smuzhiyun * create a file of any type under the mountpoint, wants to
1279*4882a593Smuzhiyun * traverse through the mountpoint or wants to open the
1280*4882a593Smuzhiyun * mounted directory. Also, autofs may mark negative dentries
1281*4882a593Smuzhiyun * as being automount points. These will need the attentions
1282*4882a593Smuzhiyun * of the daemon to instantiate them before they can be used.
1283*4882a593Smuzhiyun */
1284*4882a593Smuzhiyun if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
1285*4882a593Smuzhiyun LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
1286*4882a593Smuzhiyun dentry->d_inode)
1287*4882a593Smuzhiyun return -EISDIR;
1288*4882a593Smuzhiyun
1289*4882a593Smuzhiyun if (count && (*count)++ >= MAXSYMLINKS)
1290*4882a593Smuzhiyun return -ELOOP;
1291*4882a593Smuzhiyun
1292*4882a593Smuzhiyun return finish_automount(dentry->d_op->d_automount(path), path);
1293*4882a593Smuzhiyun }
1294*4882a593Smuzhiyun
1295*4882a593Smuzhiyun /*
1296*4882a593Smuzhiyun * mount traversal - out-of-line part. One note on ->d_flags accesses -
1297*4882a593Smuzhiyun * dentries are pinned but not locked here, so negative dentry can go
1298*4882a593Smuzhiyun * positive right under us. Use of smp_load_acquire() provides a barrier
1299*4882a593Smuzhiyun * sufficient for ->d_inode and ->d_flags consistency.
1300*4882a593Smuzhiyun */
__traverse_mounts(struct path * path,unsigned flags,bool * jumped,int * count,unsigned lookup_flags)1301*4882a593Smuzhiyun static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
1302*4882a593Smuzhiyun int *count, unsigned lookup_flags)
1303*4882a593Smuzhiyun {
1304*4882a593Smuzhiyun struct vfsmount *mnt = path->mnt;
1305*4882a593Smuzhiyun bool need_mntput = false;
1306*4882a593Smuzhiyun int ret = 0;
1307*4882a593Smuzhiyun
1308*4882a593Smuzhiyun while (flags & DCACHE_MANAGED_DENTRY) {
1309*4882a593Smuzhiyun /* Allow the filesystem to manage the transit without i_mutex
1310*4882a593Smuzhiyun * being held. */
1311*4882a593Smuzhiyun if (flags & DCACHE_MANAGE_TRANSIT) {
1312*4882a593Smuzhiyun ret = path->dentry->d_op->d_manage(path, false);
1313*4882a593Smuzhiyun flags = smp_load_acquire(&path->dentry->d_flags);
1314*4882a593Smuzhiyun if (ret < 0)
1315*4882a593Smuzhiyun break;
1316*4882a593Smuzhiyun }
1317*4882a593Smuzhiyun
1318*4882a593Smuzhiyun if (flags & DCACHE_MOUNTED) { // something's mounted on it..
1319*4882a593Smuzhiyun struct vfsmount *mounted = lookup_mnt(path);
1320*4882a593Smuzhiyun if (mounted) { // ... in our namespace
1321*4882a593Smuzhiyun dput(path->dentry);
1322*4882a593Smuzhiyun if (need_mntput)
1323*4882a593Smuzhiyun mntput(path->mnt);
1324*4882a593Smuzhiyun path->mnt = mounted;
1325*4882a593Smuzhiyun path->dentry = dget(mounted->mnt_root);
1326*4882a593Smuzhiyun // here we know it's positive
1327*4882a593Smuzhiyun flags = path->dentry->d_flags;
1328*4882a593Smuzhiyun need_mntput = true;
1329*4882a593Smuzhiyun continue;
1330*4882a593Smuzhiyun }
1331*4882a593Smuzhiyun }
1332*4882a593Smuzhiyun
1333*4882a593Smuzhiyun if (!(flags & DCACHE_NEED_AUTOMOUNT))
1334*4882a593Smuzhiyun break;
1335*4882a593Smuzhiyun
1336*4882a593Smuzhiyun // uncovered automount point
1337*4882a593Smuzhiyun ret = follow_automount(path, count, lookup_flags);
1338*4882a593Smuzhiyun flags = smp_load_acquire(&path->dentry->d_flags);
1339*4882a593Smuzhiyun if (ret < 0)
1340*4882a593Smuzhiyun break;
1341*4882a593Smuzhiyun }
1342*4882a593Smuzhiyun
1343*4882a593Smuzhiyun if (ret == -EISDIR)
1344*4882a593Smuzhiyun ret = 0;
1345*4882a593Smuzhiyun // possible if you race with several mount --move
1346*4882a593Smuzhiyun if (need_mntput && path->mnt == mnt)
1347*4882a593Smuzhiyun mntput(path->mnt);
1348*4882a593Smuzhiyun if (!ret && unlikely(d_flags_negative(flags)))
1349*4882a593Smuzhiyun ret = -ENOENT;
1350*4882a593Smuzhiyun *jumped = need_mntput;
1351*4882a593Smuzhiyun return ret;
1352*4882a593Smuzhiyun }
1353*4882a593Smuzhiyun
traverse_mounts(struct path * path,bool * jumped,int * count,unsigned lookup_flags)1354*4882a593Smuzhiyun static inline int traverse_mounts(struct path *path, bool *jumped,
1355*4882a593Smuzhiyun int *count, unsigned lookup_flags)
1356*4882a593Smuzhiyun {
1357*4882a593Smuzhiyun unsigned flags = smp_load_acquire(&path->dentry->d_flags);
1358*4882a593Smuzhiyun
1359*4882a593Smuzhiyun /* fastpath */
1360*4882a593Smuzhiyun if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
1361*4882a593Smuzhiyun *jumped = false;
1362*4882a593Smuzhiyun if (unlikely(d_flags_negative(flags)))
1363*4882a593Smuzhiyun return -ENOENT;
1364*4882a593Smuzhiyun return 0;
1365*4882a593Smuzhiyun }
1366*4882a593Smuzhiyun return __traverse_mounts(path, flags, jumped, count, lookup_flags);
1367*4882a593Smuzhiyun }
1368*4882a593Smuzhiyun
follow_down_one(struct path * path)1369*4882a593Smuzhiyun int follow_down_one(struct path *path)
1370*4882a593Smuzhiyun {
1371*4882a593Smuzhiyun struct vfsmount *mounted;
1372*4882a593Smuzhiyun
1373*4882a593Smuzhiyun mounted = lookup_mnt(path);
1374*4882a593Smuzhiyun if (mounted) {
1375*4882a593Smuzhiyun dput(path->dentry);
1376*4882a593Smuzhiyun mntput(path->mnt);
1377*4882a593Smuzhiyun path->mnt = mounted;
1378*4882a593Smuzhiyun path->dentry = dget(mounted->mnt_root);
1379*4882a593Smuzhiyun return 1;
1380*4882a593Smuzhiyun }
1381*4882a593Smuzhiyun return 0;
1382*4882a593Smuzhiyun }
1383*4882a593Smuzhiyun EXPORT_SYMBOL(follow_down_one);
1384*4882a593Smuzhiyun
1385*4882a593Smuzhiyun /*
1386*4882a593Smuzhiyun * Follow down to the covering mount currently visible to userspace. At each
1387*4882a593Smuzhiyun * point, the filesystem owning that dentry may be queried as to whether the
1388*4882a593Smuzhiyun * caller is permitted to proceed or not.
1389*4882a593Smuzhiyun */
follow_down(struct path * path)1390*4882a593Smuzhiyun int follow_down(struct path *path)
1391*4882a593Smuzhiyun {
1392*4882a593Smuzhiyun struct vfsmount *mnt = path->mnt;
1393*4882a593Smuzhiyun bool jumped;
1394*4882a593Smuzhiyun int ret = traverse_mounts(path, &jumped, NULL, 0);
1395*4882a593Smuzhiyun
1396*4882a593Smuzhiyun if (path->mnt != mnt)
1397*4882a593Smuzhiyun mntput(mnt);
1398*4882a593Smuzhiyun return ret;
1399*4882a593Smuzhiyun }
1400*4882a593Smuzhiyun EXPORT_SYMBOL(follow_down);
1401*4882a593Smuzhiyun
1402*4882a593Smuzhiyun /*
1403*4882a593Smuzhiyun * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
1404*4882a593Smuzhiyun * we meet a managed dentry that would need blocking.
1405*4882a593Smuzhiyun */
__follow_mount_rcu(struct nameidata * nd,struct path * path,struct inode ** inode,unsigned * seqp)1406*4882a593Smuzhiyun static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1407*4882a593Smuzhiyun struct inode **inode, unsigned *seqp)
1408*4882a593Smuzhiyun {
1409*4882a593Smuzhiyun struct dentry *dentry = path->dentry;
1410*4882a593Smuzhiyun unsigned int flags = dentry->d_flags;
1411*4882a593Smuzhiyun
1412*4882a593Smuzhiyun if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
1413*4882a593Smuzhiyun return true;
1414*4882a593Smuzhiyun
1415*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_NO_XDEV))
1416*4882a593Smuzhiyun return false;
1417*4882a593Smuzhiyun
1418*4882a593Smuzhiyun for (;;) {
1419*4882a593Smuzhiyun /*
1420*4882a593Smuzhiyun * Don't forget we might have a non-mountpoint managed dentry
1421*4882a593Smuzhiyun * that wants to block transit.
1422*4882a593Smuzhiyun */
1423*4882a593Smuzhiyun if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
1424*4882a593Smuzhiyun int res = dentry->d_op->d_manage(path, true);
1425*4882a593Smuzhiyun if (res)
1426*4882a593Smuzhiyun return res == -EISDIR;
1427*4882a593Smuzhiyun flags = dentry->d_flags;
1428*4882a593Smuzhiyun }
1429*4882a593Smuzhiyun
1430*4882a593Smuzhiyun if (flags & DCACHE_MOUNTED) {
1431*4882a593Smuzhiyun struct mount *mounted = __lookup_mnt(path->mnt, dentry);
1432*4882a593Smuzhiyun if (mounted) {
1433*4882a593Smuzhiyun path->mnt = &mounted->mnt;
1434*4882a593Smuzhiyun dentry = path->dentry = mounted->mnt.mnt_root;
1435*4882a593Smuzhiyun nd->flags |= LOOKUP_JUMPED;
1436*4882a593Smuzhiyun *seqp = read_seqcount_begin(&dentry->d_seq);
1437*4882a593Smuzhiyun *inode = dentry->d_inode;
1438*4882a593Smuzhiyun /*
1439*4882a593Smuzhiyun * We don't need to re-check ->d_seq after this
1440*4882a593Smuzhiyun * ->d_inode read - there will be an RCU delay
1441*4882a593Smuzhiyun * between mount hash removal and ->mnt_root
1442*4882a593Smuzhiyun * becoming unpinned.
1443*4882a593Smuzhiyun */
1444*4882a593Smuzhiyun flags = dentry->d_flags;
1445*4882a593Smuzhiyun if (read_seqretry(&mount_lock, nd->m_seq))
1446*4882a593Smuzhiyun return false;
1447*4882a593Smuzhiyun continue;
1448*4882a593Smuzhiyun }
1449*4882a593Smuzhiyun if (read_seqretry(&mount_lock, nd->m_seq))
1450*4882a593Smuzhiyun return false;
1451*4882a593Smuzhiyun }
1452*4882a593Smuzhiyun return !(flags & DCACHE_NEED_AUTOMOUNT);
1453*4882a593Smuzhiyun }
1454*4882a593Smuzhiyun }
1455*4882a593Smuzhiyun
handle_mounts(struct nameidata * nd,struct dentry * dentry,struct path * path,struct inode ** inode,unsigned int * seqp)1456*4882a593Smuzhiyun static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
1457*4882a593Smuzhiyun struct path *path, struct inode **inode,
1458*4882a593Smuzhiyun unsigned int *seqp)
1459*4882a593Smuzhiyun {
1460*4882a593Smuzhiyun bool jumped;
1461*4882a593Smuzhiyun int ret;
1462*4882a593Smuzhiyun
1463*4882a593Smuzhiyun path->mnt = nd->path.mnt;
1464*4882a593Smuzhiyun path->dentry = dentry;
1465*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
1466*4882a593Smuzhiyun unsigned int seq = *seqp;
1467*4882a593Smuzhiyun if (unlikely(!*inode))
1468*4882a593Smuzhiyun return -ENOENT;
1469*4882a593Smuzhiyun if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
1470*4882a593Smuzhiyun return 0;
1471*4882a593Smuzhiyun if (!try_to_unlazy_next(nd, dentry, seq))
1472*4882a593Smuzhiyun return -ECHILD;
1473*4882a593Smuzhiyun // *path might've been clobbered by __follow_mount_rcu()
1474*4882a593Smuzhiyun path->mnt = nd->path.mnt;
1475*4882a593Smuzhiyun path->dentry = dentry;
1476*4882a593Smuzhiyun }
1477*4882a593Smuzhiyun ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
1478*4882a593Smuzhiyun if (jumped) {
1479*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_NO_XDEV))
1480*4882a593Smuzhiyun ret = -EXDEV;
1481*4882a593Smuzhiyun else
1482*4882a593Smuzhiyun nd->flags |= LOOKUP_JUMPED;
1483*4882a593Smuzhiyun }
1484*4882a593Smuzhiyun if (unlikely(ret)) {
1485*4882a593Smuzhiyun dput(path->dentry);
1486*4882a593Smuzhiyun if (path->mnt != nd->path.mnt)
1487*4882a593Smuzhiyun mntput(path->mnt);
1488*4882a593Smuzhiyun } else {
1489*4882a593Smuzhiyun *inode = d_backing_inode(path->dentry);
1490*4882a593Smuzhiyun *seqp = 0; /* out of RCU mode, so the value doesn't matter */
1491*4882a593Smuzhiyun }
1492*4882a593Smuzhiyun return ret;
1493*4882a593Smuzhiyun }
1494*4882a593Smuzhiyun
1495*4882a593Smuzhiyun /*
1496*4882a593Smuzhiyun * This looks up the name in dcache and possibly revalidates the found dentry.
1497*4882a593Smuzhiyun * NULL is returned if the dentry does not exist in the cache.
1498*4882a593Smuzhiyun */
lookup_dcache(const struct qstr * name,struct dentry * dir,unsigned int flags)1499*4882a593Smuzhiyun static struct dentry *lookup_dcache(const struct qstr *name,
1500*4882a593Smuzhiyun struct dentry *dir,
1501*4882a593Smuzhiyun unsigned int flags)
1502*4882a593Smuzhiyun {
1503*4882a593Smuzhiyun struct dentry *dentry = d_lookup(dir, name);
1504*4882a593Smuzhiyun if (dentry) {
1505*4882a593Smuzhiyun int error = d_revalidate(dentry, flags);
1506*4882a593Smuzhiyun if (unlikely(error <= 0)) {
1507*4882a593Smuzhiyun if (!error)
1508*4882a593Smuzhiyun d_invalidate(dentry);
1509*4882a593Smuzhiyun dput(dentry);
1510*4882a593Smuzhiyun return ERR_PTR(error);
1511*4882a593Smuzhiyun }
1512*4882a593Smuzhiyun }
1513*4882a593Smuzhiyun return dentry;
1514*4882a593Smuzhiyun }
1515*4882a593Smuzhiyun
1516*4882a593Smuzhiyun /*
1517*4882a593Smuzhiyun * Parent directory has inode locked exclusive. This is one
1518*4882a593Smuzhiyun * and only case when ->lookup() gets called on non in-lookup
1519*4882a593Smuzhiyun * dentries - as the matter of fact, this only gets called
1520*4882a593Smuzhiyun * when directory is guaranteed to have no in-lookup children
1521*4882a593Smuzhiyun * at all.
1522*4882a593Smuzhiyun */
__lookup_hash(const struct qstr * name,struct dentry * base,unsigned int flags)1523*4882a593Smuzhiyun static struct dentry *__lookup_hash(const struct qstr *name,
1524*4882a593Smuzhiyun struct dentry *base, unsigned int flags)
1525*4882a593Smuzhiyun {
1526*4882a593Smuzhiyun struct dentry *dentry = lookup_dcache(name, base, flags);
1527*4882a593Smuzhiyun struct dentry *old;
1528*4882a593Smuzhiyun struct inode *dir = base->d_inode;
1529*4882a593Smuzhiyun
1530*4882a593Smuzhiyun if (dentry)
1531*4882a593Smuzhiyun return dentry;
1532*4882a593Smuzhiyun
1533*4882a593Smuzhiyun /* Don't create child dentry for a dead directory. */
1534*4882a593Smuzhiyun if (unlikely(IS_DEADDIR(dir)))
1535*4882a593Smuzhiyun return ERR_PTR(-ENOENT);
1536*4882a593Smuzhiyun
1537*4882a593Smuzhiyun dentry = d_alloc(base, name);
1538*4882a593Smuzhiyun if (unlikely(!dentry))
1539*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
1540*4882a593Smuzhiyun
1541*4882a593Smuzhiyun old = dir->i_op->lookup(dir, dentry, flags);
1542*4882a593Smuzhiyun if (unlikely(old)) {
1543*4882a593Smuzhiyun dput(dentry);
1544*4882a593Smuzhiyun dentry = old;
1545*4882a593Smuzhiyun }
1546*4882a593Smuzhiyun return dentry;
1547*4882a593Smuzhiyun }
1548*4882a593Smuzhiyun
lookup_fast(struct nameidata * nd,struct inode ** inode,unsigned * seqp)1549*4882a593Smuzhiyun static struct dentry *lookup_fast(struct nameidata *nd,
1550*4882a593Smuzhiyun struct inode **inode,
1551*4882a593Smuzhiyun unsigned *seqp)
1552*4882a593Smuzhiyun {
1553*4882a593Smuzhiyun struct dentry *dentry, *parent = nd->path.dentry;
1554*4882a593Smuzhiyun int status = 1;
1555*4882a593Smuzhiyun
1556*4882a593Smuzhiyun /*
1557*4882a593Smuzhiyun * Rename seqlock is not required here because in the off chance
1558*4882a593Smuzhiyun * of a false negative due to a concurrent rename, the caller is
1559*4882a593Smuzhiyun * going to fall back to non-racy lookup.
1560*4882a593Smuzhiyun */
1561*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
1562*4882a593Smuzhiyun unsigned seq;
1563*4882a593Smuzhiyun dentry = __d_lookup_rcu(parent, &nd->last, &seq);
1564*4882a593Smuzhiyun if (unlikely(!dentry)) {
1565*4882a593Smuzhiyun if (!try_to_unlazy(nd))
1566*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1567*4882a593Smuzhiyun return NULL;
1568*4882a593Smuzhiyun }
1569*4882a593Smuzhiyun
1570*4882a593Smuzhiyun /*
1571*4882a593Smuzhiyun * This sequence count validates that the inode matches
1572*4882a593Smuzhiyun * the dentry name information from lookup.
1573*4882a593Smuzhiyun */
1574*4882a593Smuzhiyun *inode = d_backing_inode(dentry);
1575*4882a593Smuzhiyun if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
1576*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1577*4882a593Smuzhiyun
1578*4882a593Smuzhiyun /*
1579*4882a593Smuzhiyun * This sequence count validates that the parent had no
1580*4882a593Smuzhiyun * changes while we did the lookup of the dentry above.
1581*4882a593Smuzhiyun *
1582*4882a593Smuzhiyun * The memory barrier in read_seqcount_begin of child is
1583*4882a593Smuzhiyun * enough, we can use __read_seqcount_retry here.
1584*4882a593Smuzhiyun */
1585*4882a593Smuzhiyun if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
1586*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1587*4882a593Smuzhiyun
1588*4882a593Smuzhiyun *seqp = seq;
1589*4882a593Smuzhiyun status = d_revalidate(dentry, nd->flags);
1590*4882a593Smuzhiyun if (likely(status > 0))
1591*4882a593Smuzhiyun return dentry;
1592*4882a593Smuzhiyun if (!try_to_unlazy_next(nd, dentry, seq))
1593*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1594*4882a593Smuzhiyun if (unlikely(status == -ECHILD))
1595*4882a593Smuzhiyun /* we'd been told to redo it in non-rcu mode */
1596*4882a593Smuzhiyun status = d_revalidate(dentry, nd->flags);
1597*4882a593Smuzhiyun } else {
1598*4882a593Smuzhiyun dentry = __d_lookup(parent, &nd->last);
1599*4882a593Smuzhiyun if (unlikely(!dentry))
1600*4882a593Smuzhiyun return NULL;
1601*4882a593Smuzhiyun status = d_revalidate(dentry, nd->flags);
1602*4882a593Smuzhiyun }
1603*4882a593Smuzhiyun if (unlikely(status <= 0)) {
1604*4882a593Smuzhiyun if (!status)
1605*4882a593Smuzhiyun d_invalidate(dentry);
1606*4882a593Smuzhiyun dput(dentry);
1607*4882a593Smuzhiyun return ERR_PTR(status);
1608*4882a593Smuzhiyun }
1609*4882a593Smuzhiyun return dentry;
1610*4882a593Smuzhiyun }
1611*4882a593Smuzhiyun
1612*4882a593Smuzhiyun /* Fast lookup failed, do it the slow way */
__lookup_slow(const struct qstr * name,struct dentry * dir,unsigned int flags)1613*4882a593Smuzhiyun static struct dentry *__lookup_slow(const struct qstr *name,
1614*4882a593Smuzhiyun struct dentry *dir,
1615*4882a593Smuzhiyun unsigned int flags)
1616*4882a593Smuzhiyun {
1617*4882a593Smuzhiyun struct dentry *dentry, *old;
1618*4882a593Smuzhiyun struct inode *inode = dir->d_inode;
1619*4882a593Smuzhiyun DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1620*4882a593Smuzhiyun
1621*4882a593Smuzhiyun /* Don't go there if it's already dead */
1622*4882a593Smuzhiyun if (unlikely(IS_DEADDIR(inode)))
1623*4882a593Smuzhiyun return ERR_PTR(-ENOENT);
1624*4882a593Smuzhiyun again:
1625*4882a593Smuzhiyun dentry = d_alloc_parallel(dir, name, &wq);
1626*4882a593Smuzhiyun if (IS_ERR(dentry))
1627*4882a593Smuzhiyun return dentry;
1628*4882a593Smuzhiyun if (unlikely(!d_in_lookup(dentry))) {
1629*4882a593Smuzhiyun int error = d_revalidate(dentry, flags);
1630*4882a593Smuzhiyun if (unlikely(error <= 0)) {
1631*4882a593Smuzhiyun if (!error) {
1632*4882a593Smuzhiyun d_invalidate(dentry);
1633*4882a593Smuzhiyun dput(dentry);
1634*4882a593Smuzhiyun goto again;
1635*4882a593Smuzhiyun }
1636*4882a593Smuzhiyun dput(dentry);
1637*4882a593Smuzhiyun dentry = ERR_PTR(error);
1638*4882a593Smuzhiyun }
1639*4882a593Smuzhiyun } else {
1640*4882a593Smuzhiyun old = inode->i_op->lookup(inode, dentry, flags);
1641*4882a593Smuzhiyun d_lookup_done(dentry);
1642*4882a593Smuzhiyun if (unlikely(old)) {
1643*4882a593Smuzhiyun dput(dentry);
1644*4882a593Smuzhiyun dentry = old;
1645*4882a593Smuzhiyun }
1646*4882a593Smuzhiyun }
1647*4882a593Smuzhiyun return dentry;
1648*4882a593Smuzhiyun }
1649*4882a593Smuzhiyun
lookup_slow(const struct qstr * name,struct dentry * dir,unsigned int flags)1650*4882a593Smuzhiyun static struct dentry *lookup_slow(const struct qstr *name,
1651*4882a593Smuzhiyun struct dentry *dir,
1652*4882a593Smuzhiyun unsigned int flags)
1653*4882a593Smuzhiyun {
1654*4882a593Smuzhiyun struct inode *inode = dir->d_inode;
1655*4882a593Smuzhiyun struct dentry *res;
1656*4882a593Smuzhiyun inode_lock_shared(inode);
1657*4882a593Smuzhiyun res = __lookup_slow(name, dir, flags);
1658*4882a593Smuzhiyun inode_unlock_shared(inode);
1659*4882a593Smuzhiyun return res;
1660*4882a593Smuzhiyun }
1661*4882a593Smuzhiyun
may_lookup(struct nameidata * nd)1662*4882a593Smuzhiyun static inline int may_lookup(struct nameidata *nd)
1663*4882a593Smuzhiyun {
1664*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
1665*4882a593Smuzhiyun int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
1666*4882a593Smuzhiyun if (err != -ECHILD || !try_to_unlazy(nd))
1667*4882a593Smuzhiyun return err;
1668*4882a593Smuzhiyun }
1669*4882a593Smuzhiyun return inode_permission(nd->inode, MAY_EXEC);
1670*4882a593Smuzhiyun }
1671*4882a593Smuzhiyun
reserve_stack(struct nameidata * nd,struct path * link,unsigned seq)1672*4882a593Smuzhiyun static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
1673*4882a593Smuzhiyun {
1674*4882a593Smuzhiyun if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
1675*4882a593Smuzhiyun return -ELOOP;
1676*4882a593Smuzhiyun
1677*4882a593Smuzhiyun if (likely(nd->depth != EMBEDDED_LEVELS))
1678*4882a593Smuzhiyun return 0;
1679*4882a593Smuzhiyun if (likely(nd->stack != nd->internal))
1680*4882a593Smuzhiyun return 0;
1681*4882a593Smuzhiyun if (likely(nd_alloc_stack(nd)))
1682*4882a593Smuzhiyun return 0;
1683*4882a593Smuzhiyun
1684*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
1685*4882a593Smuzhiyun // we need to grab link before we do unlazy. And we can't skip
1686*4882a593Smuzhiyun // unlazy even if we fail to grab the link - cleanup needs it
1687*4882a593Smuzhiyun bool grabbed_link = legitimize_path(nd, link, seq);
1688*4882a593Smuzhiyun
1689*4882a593Smuzhiyun if (!try_to_unlazy(nd) != 0 || !grabbed_link)
1690*4882a593Smuzhiyun return -ECHILD;
1691*4882a593Smuzhiyun
1692*4882a593Smuzhiyun if (nd_alloc_stack(nd))
1693*4882a593Smuzhiyun return 0;
1694*4882a593Smuzhiyun }
1695*4882a593Smuzhiyun return -ENOMEM;
1696*4882a593Smuzhiyun }
1697*4882a593Smuzhiyun
1698*4882a593Smuzhiyun enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
1699*4882a593Smuzhiyun
pick_link(struct nameidata * nd,struct path * link,struct inode * inode,unsigned seq,int flags)1700*4882a593Smuzhiyun static const char *pick_link(struct nameidata *nd, struct path *link,
1701*4882a593Smuzhiyun struct inode *inode, unsigned seq, int flags)
1702*4882a593Smuzhiyun {
1703*4882a593Smuzhiyun struct saved *last;
1704*4882a593Smuzhiyun const char *res;
1705*4882a593Smuzhiyun int error = reserve_stack(nd, link, seq);
1706*4882a593Smuzhiyun
1707*4882a593Smuzhiyun if (unlikely(error)) {
1708*4882a593Smuzhiyun if (!(nd->flags & LOOKUP_RCU))
1709*4882a593Smuzhiyun path_put(link);
1710*4882a593Smuzhiyun return ERR_PTR(error);
1711*4882a593Smuzhiyun }
1712*4882a593Smuzhiyun last = nd->stack + nd->depth++;
1713*4882a593Smuzhiyun last->link = *link;
1714*4882a593Smuzhiyun clear_delayed_call(&last->done);
1715*4882a593Smuzhiyun last->seq = seq;
1716*4882a593Smuzhiyun
1717*4882a593Smuzhiyun if (flags & WALK_TRAILING) {
1718*4882a593Smuzhiyun error = may_follow_link(nd, inode);
1719*4882a593Smuzhiyun if (unlikely(error))
1720*4882a593Smuzhiyun return ERR_PTR(error);
1721*4882a593Smuzhiyun }
1722*4882a593Smuzhiyun
1723*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) ||
1724*4882a593Smuzhiyun unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
1725*4882a593Smuzhiyun return ERR_PTR(-ELOOP);
1726*4882a593Smuzhiyun
1727*4882a593Smuzhiyun if (!(nd->flags & LOOKUP_RCU)) {
1728*4882a593Smuzhiyun touch_atime(&last->link);
1729*4882a593Smuzhiyun cond_resched();
1730*4882a593Smuzhiyun } else if (atime_needs_update(&last->link, inode)) {
1731*4882a593Smuzhiyun if (!try_to_unlazy(nd))
1732*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1733*4882a593Smuzhiyun touch_atime(&last->link);
1734*4882a593Smuzhiyun }
1735*4882a593Smuzhiyun
1736*4882a593Smuzhiyun error = security_inode_follow_link(link->dentry, inode,
1737*4882a593Smuzhiyun nd->flags & LOOKUP_RCU);
1738*4882a593Smuzhiyun if (unlikely(error))
1739*4882a593Smuzhiyun return ERR_PTR(error);
1740*4882a593Smuzhiyun
1741*4882a593Smuzhiyun res = READ_ONCE(inode->i_link);
1742*4882a593Smuzhiyun if (!res) {
1743*4882a593Smuzhiyun const char * (*get)(struct dentry *, struct inode *,
1744*4882a593Smuzhiyun struct delayed_call *);
1745*4882a593Smuzhiyun get = inode->i_op->get_link;
1746*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
1747*4882a593Smuzhiyun res = get(NULL, inode, &last->done);
1748*4882a593Smuzhiyun if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd))
1749*4882a593Smuzhiyun res = get(link->dentry, inode, &last->done);
1750*4882a593Smuzhiyun } else {
1751*4882a593Smuzhiyun res = get(link->dentry, inode, &last->done);
1752*4882a593Smuzhiyun }
1753*4882a593Smuzhiyun if (!res)
1754*4882a593Smuzhiyun goto all_done;
1755*4882a593Smuzhiyun if (IS_ERR(res))
1756*4882a593Smuzhiyun return res;
1757*4882a593Smuzhiyun }
1758*4882a593Smuzhiyun if (*res == '/') {
1759*4882a593Smuzhiyun error = nd_jump_root(nd);
1760*4882a593Smuzhiyun if (unlikely(error))
1761*4882a593Smuzhiyun return ERR_PTR(error);
1762*4882a593Smuzhiyun while (unlikely(*++res == '/'))
1763*4882a593Smuzhiyun ;
1764*4882a593Smuzhiyun }
1765*4882a593Smuzhiyun if (*res)
1766*4882a593Smuzhiyun return res;
1767*4882a593Smuzhiyun all_done: // pure jump
1768*4882a593Smuzhiyun put_link(nd);
1769*4882a593Smuzhiyun return NULL;
1770*4882a593Smuzhiyun }
1771*4882a593Smuzhiyun
1772*4882a593Smuzhiyun /*
1773*4882a593Smuzhiyun * Do we need to follow links? We _really_ want to be able
1774*4882a593Smuzhiyun * to do this check without having to look at inode->i_op,
1775*4882a593Smuzhiyun * so we keep a cache of "no, this doesn't need follow_link"
1776*4882a593Smuzhiyun * for the common case.
1777*4882a593Smuzhiyun */
step_into(struct nameidata * nd,int flags,struct dentry * dentry,struct inode * inode,unsigned seq)1778*4882a593Smuzhiyun static const char *step_into(struct nameidata *nd, int flags,
1779*4882a593Smuzhiyun struct dentry *dentry, struct inode *inode, unsigned seq)
1780*4882a593Smuzhiyun {
1781*4882a593Smuzhiyun struct path path;
1782*4882a593Smuzhiyun int err = handle_mounts(nd, dentry, &path, &inode, &seq);
1783*4882a593Smuzhiyun
1784*4882a593Smuzhiyun if (err < 0)
1785*4882a593Smuzhiyun return ERR_PTR(err);
1786*4882a593Smuzhiyun if (likely(!d_is_symlink(path.dentry)) ||
1787*4882a593Smuzhiyun ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
1788*4882a593Smuzhiyun (flags & WALK_NOFOLLOW)) {
1789*4882a593Smuzhiyun /* not a symlink or should not follow */
1790*4882a593Smuzhiyun if (!(nd->flags & LOOKUP_RCU)) {
1791*4882a593Smuzhiyun dput(nd->path.dentry);
1792*4882a593Smuzhiyun if (nd->path.mnt != path.mnt)
1793*4882a593Smuzhiyun mntput(nd->path.mnt);
1794*4882a593Smuzhiyun }
1795*4882a593Smuzhiyun nd->path = path;
1796*4882a593Smuzhiyun nd->inode = inode;
1797*4882a593Smuzhiyun nd->seq = seq;
1798*4882a593Smuzhiyun return NULL;
1799*4882a593Smuzhiyun }
1800*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
1801*4882a593Smuzhiyun /* make sure that d_is_symlink above matches inode */
1802*4882a593Smuzhiyun if (read_seqcount_retry(&path.dentry->d_seq, seq))
1803*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1804*4882a593Smuzhiyun } else {
1805*4882a593Smuzhiyun if (path.mnt == nd->path.mnt)
1806*4882a593Smuzhiyun mntget(path.mnt);
1807*4882a593Smuzhiyun }
1808*4882a593Smuzhiyun return pick_link(nd, &path, inode, seq, flags);
1809*4882a593Smuzhiyun }
1810*4882a593Smuzhiyun
follow_dotdot_rcu(struct nameidata * nd,struct inode ** inodep,unsigned * seqp)1811*4882a593Smuzhiyun static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
1812*4882a593Smuzhiyun struct inode **inodep,
1813*4882a593Smuzhiyun unsigned *seqp)
1814*4882a593Smuzhiyun {
1815*4882a593Smuzhiyun struct dentry *parent, *old;
1816*4882a593Smuzhiyun
1817*4882a593Smuzhiyun if (path_equal(&nd->path, &nd->root))
1818*4882a593Smuzhiyun goto in_root;
1819*4882a593Smuzhiyun if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
1820*4882a593Smuzhiyun struct path path;
1821*4882a593Smuzhiyun unsigned seq;
1822*4882a593Smuzhiyun if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
1823*4882a593Smuzhiyun &nd->root, &path, &seq))
1824*4882a593Smuzhiyun goto in_root;
1825*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_NO_XDEV))
1826*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1827*4882a593Smuzhiyun nd->path = path;
1828*4882a593Smuzhiyun nd->inode = path.dentry->d_inode;
1829*4882a593Smuzhiyun nd->seq = seq;
1830*4882a593Smuzhiyun if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
1831*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1832*4882a593Smuzhiyun /* we know that mountpoint was pinned */
1833*4882a593Smuzhiyun }
1834*4882a593Smuzhiyun old = nd->path.dentry;
1835*4882a593Smuzhiyun parent = old->d_parent;
1836*4882a593Smuzhiyun *inodep = parent->d_inode;
1837*4882a593Smuzhiyun *seqp = read_seqcount_begin(&parent->d_seq);
1838*4882a593Smuzhiyun if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
1839*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1840*4882a593Smuzhiyun if (unlikely(!path_connected(nd->path.mnt, parent)))
1841*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1842*4882a593Smuzhiyun return parent;
1843*4882a593Smuzhiyun in_root:
1844*4882a593Smuzhiyun if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
1845*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1846*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_BENEATH))
1847*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
1848*4882a593Smuzhiyun return NULL;
1849*4882a593Smuzhiyun }
1850*4882a593Smuzhiyun
follow_dotdot(struct nameidata * nd,struct inode ** inodep,unsigned * seqp)1851*4882a593Smuzhiyun static struct dentry *follow_dotdot(struct nameidata *nd,
1852*4882a593Smuzhiyun struct inode **inodep,
1853*4882a593Smuzhiyun unsigned *seqp)
1854*4882a593Smuzhiyun {
1855*4882a593Smuzhiyun struct dentry *parent;
1856*4882a593Smuzhiyun
1857*4882a593Smuzhiyun if (path_equal(&nd->path, &nd->root))
1858*4882a593Smuzhiyun goto in_root;
1859*4882a593Smuzhiyun if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
1860*4882a593Smuzhiyun struct path path;
1861*4882a593Smuzhiyun
1862*4882a593Smuzhiyun if (!choose_mountpoint(real_mount(nd->path.mnt),
1863*4882a593Smuzhiyun &nd->root, &path))
1864*4882a593Smuzhiyun goto in_root;
1865*4882a593Smuzhiyun path_put(&nd->path);
1866*4882a593Smuzhiyun nd->path = path;
1867*4882a593Smuzhiyun nd->inode = path.dentry->d_inode;
1868*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_NO_XDEV))
1869*4882a593Smuzhiyun return ERR_PTR(-EXDEV);
1870*4882a593Smuzhiyun }
1871*4882a593Smuzhiyun /* rare case of legitimate dget_parent()... */
1872*4882a593Smuzhiyun parent = dget_parent(nd->path.dentry);
1873*4882a593Smuzhiyun if (unlikely(!path_connected(nd->path.mnt, parent))) {
1874*4882a593Smuzhiyun dput(parent);
1875*4882a593Smuzhiyun return ERR_PTR(-ENOENT);
1876*4882a593Smuzhiyun }
1877*4882a593Smuzhiyun *seqp = 0;
1878*4882a593Smuzhiyun *inodep = parent->d_inode;
1879*4882a593Smuzhiyun return parent;
1880*4882a593Smuzhiyun
1881*4882a593Smuzhiyun in_root:
1882*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_BENEATH))
1883*4882a593Smuzhiyun return ERR_PTR(-EXDEV);
1884*4882a593Smuzhiyun dget(nd->path.dentry);
1885*4882a593Smuzhiyun return NULL;
1886*4882a593Smuzhiyun }
1887*4882a593Smuzhiyun
handle_dots(struct nameidata * nd,int type)1888*4882a593Smuzhiyun static const char *handle_dots(struct nameidata *nd, int type)
1889*4882a593Smuzhiyun {
1890*4882a593Smuzhiyun if (type == LAST_DOTDOT) {
1891*4882a593Smuzhiyun const char *error = NULL;
1892*4882a593Smuzhiyun struct dentry *parent;
1893*4882a593Smuzhiyun struct inode *inode;
1894*4882a593Smuzhiyun unsigned seq;
1895*4882a593Smuzhiyun
1896*4882a593Smuzhiyun if (!nd->root.mnt) {
1897*4882a593Smuzhiyun error = ERR_PTR(set_root(nd));
1898*4882a593Smuzhiyun if (error)
1899*4882a593Smuzhiyun return error;
1900*4882a593Smuzhiyun }
1901*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU)
1902*4882a593Smuzhiyun parent = follow_dotdot_rcu(nd, &inode, &seq);
1903*4882a593Smuzhiyun else
1904*4882a593Smuzhiyun parent = follow_dotdot(nd, &inode, &seq);
1905*4882a593Smuzhiyun if (IS_ERR(parent))
1906*4882a593Smuzhiyun return ERR_CAST(parent);
1907*4882a593Smuzhiyun if (unlikely(!parent))
1908*4882a593Smuzhiyun error = step_into(nd, WALK_NOFOLLOW,
1909*4882a593Smuzhiyun nd->path.dentry, nd->inode, nd->seq);
1910*4882a593Smuzhiyun else
1911*4882a593Smuzhiyun error = step_into(nd, WALK_NOFOLLOW,
1912*4882a593Smuzhiyun parent, inode, seq);
1913*4882a593Smuzhiyun if (unlikely(error))
1914*4882a593Smuzhiyun return error;
1915*4882a593Smuzhiyun
1916*4882a593Smuzhiyun if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
1917*4882a593Smuzhiyun /*
1918*4882a593Smuzhiyun * If there was a racing rename or mount along our
1919*4882a593Smuzhiyun * path, then we can't be sure that ".." hasn't jumped
1920*4882a593Smuzhiyun * above nd->root (and so userspace should retry or use
1921*4882a593Smuzhiyun * some fallback).
1922*4882a593Smuzhiyun */
1923*4882a593Smuzhiyun smp_rmb();
1924*4882a593Smuzhiyun if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
1925*4882a593Smuzhiyun return ERR_PTR(-EAGAIN);
1926*4882a593Smuzhiyun if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
1927*4882a593Smuzhiyun return ERR_PTR(-EAGAIN);
1928*4882a593Smuzhiyun }
1929*4882a593Smuzhiyun }
1930*4882a593Smuzhiyun return NULL;
1931*4882a593Smuzhiyun }
1932*4882a593Smuzhiyun
walk_component(struct nameidata * nd,int flags)1933*4882a593Smuzhiyun static const char *walk_component(struct nameidata *nd, int flags)
1934*4882a593Smuzhiyun {
1935*4882a593Smuzhiyun struct dentry *dentry;
1936*4882a593Smuzhiyun struct inode *inode;
1937*4882a593Smuzhiyun unsigned seq;
1938*4882a593Smuzhiyun /*
1939*4882a593Smuzhiyun * "." and ".." are special - ".." especially so because it has
1940*4882a593Smuzhiyun * to be able to know about the current root directory and
1941*4882a593Smuzhiyun * parent relationships.
1942*4882a593Smuzhiyun */
1943*4882a593Smuzhiyun if (unlikely(nd->last_type != LAST_NORM)) {
1944*4882a593Smuzhiyun if (!(flags & WALK_MORE) && nd->depth)
1945*4882a593Smuzhiyun put_link(nd);
1946*4882a593Smuzhiyun return handle_dots(nd, nd->last_type);
1947*4882a593Smuzhiyun }
1948*4882a593Smuzhiyun dentry = lookup_fast(nd, &inode, &seq);
1949*4882a593Smuzhiyun if (IS_ERR(dentry))
1950*4882a593Smuzhiyun return ERR_CAST(dentry);
1951*4882a593Smuzhiyun if (unlikely(!dentry)) {
1952*4882a593Smuzhiyun dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
1953*4882a593Smuzhiyun if (IS_ERR(dentry))
1954*4882a593Smuzhiyun return ERR_CAST(dentry);
1955*4882a593Smuzhiyun }
1956*4882a593Smuzhiyun if (!(flags & WALK_MORE) && nd->depth)
1957*4882a593Smuzhiyun put_link(nd);
1958*4882a593Smuzhiyun return step_into(nd, flags, dentry, inode, seq);
1959*4882a593Smuzhiyun }
1960*4882a593Smuzhiyun
1961*4882a593Smuzhiyun /*
1962*4882a593Smuzhiyun * We can do the critical dentry name comparison and hashing
1963*4882a593Smuzhiyun * operations one word at a time, but we are limited to:
1964*4882a593Smuzhiyun *
1965*4882a593Smuzhiyun * - Architectures with fast unaligned word accesses. We could
1966*4882a593Smuzhiyun * do a "get_unaligned()" if this helps and is sufficiently
1967*4882a593Smuzhiyun * fast.
1968*4882a593Smuzhiyun *
1969*4882a593Smuzhiyun * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
1970*4882a593Smuzhiyun * do not trap on the (extremely unlikely) case of a page
1971*4882a593Smuzhiyun * crossing operation.
1972*4882a593Smuzhiyun *
1973*4882a593Smuzhiyun * - Furthermore, we need an efficient 64-bit compile for the
1974*4882a593Smuzhiyun * 64-bit case in order to generate the "number of bytes in
1975*4882a593Smuzhiyun * the final mask". Again, that could be replaced with a
1976*4882a593Smuzhiyun * efficient population count instruction or similar.
1977*4882a593Smuzhiyun */
1978*4882a593Smuzhiyun #ifdef CONFIG_DCACHE_WORD_ACCESS
1979*4882a593Smuzhiyun
1980*4882a593Smuzhiyun #include <asm/word-at-a-time.h>
1981*4882a593Smuzhiyun
1982*4882a593Smuzhiyun #ifdef HASH_MIX
1983*4882a593Smuzhiyun
1984*4882a593Smuzhiyun /* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
1985*4882a593Smuzhiyun
1986*4882a593Smuzhiyun #elif defined(CONFIG_64BIT)
1987*4882a593Smuzhiyun /*
1988*4882a593Smuzhiyun * Register pressure in the mixing function is an issue, particularly
1989*4882a593Smuzhiyun * on 32-bit x86, but almost any function requires one state value and
1990*4882a593Smuzhiyun * one temporary. Instead, use a function designed for two state values
1991*4882a593Smuzhiyun * and no temporaries.
1992*4882a593Smuzhiyun *
1993*4882a593Smuzhiyun * This function cannot create a collision in only two iterations, so
1994*4882a593Smuzhiyun * we have two iterations to achieve avalanche. In those two iterations,
1995*4882a593Smuzhiyun * we have six layers of mixing, which is enough to spread one bit's
1996*4882a593Smuzhiyun * influence out to 2^6 = 64 state bits.
1997*4882a593Smuzhiyun *
1998*4882a593Smuzhiyun * Rotate constants are scored by considering either 64 one-bit input
1999*4882a593Smuzhiyun * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
2000*4882a593Smuzhiyun * probability of that delta causing a change to each of the 128 output
2001*4882a593Smuzhiyun * bits, using a sample of random initial states.
2002*4882a593Smuzhiyun *
2003*4882a593Smuzhiyun * The Shannon entropy of the computed probabilities is then summed
2004*4882a593Smuzhiyun * to produce a score. Ideally, any input change has a 50% chance of
2005*4882a593Smuzhiyun * toggling any given output bit.
2006*4882a593Smuzhiyun *
2007*4882a593Smuzhiyun * Mixing scores (in bits) for (12,45):
2008*4882a593Smuzhiyun * Input delta: 1-bit 2-bit
2009*4882a593Smuzhiyun * 1 round: 713.3 42542.6
2010*4882a593Smuzhiyun * 2 rounds: 2753.7 140389.8
2011*4882a593Smuzhiyun * 3 rounds: 5954.1 233458.2
2012*4882a593Smuzhiyun * 4 rounds: 7862.6 256672.2
2013*4882a593Smuzhiyun * Perfect: 8192 258048
2014*4882a593Smuzhiyun * (64*128) (64*63/2 * 128)
2015*4882a593Smuzhiyun */
2016*4882a593Smuzhiyun #define HASH_MIX(x, y, a) \
2017*4882a593Smuzhiyun ( x ^= (a), \
2018*4882a593Smuzhiyun y ^= x, x = rol64(x,12),\
2019*4882a593Smuzhiyun x += y, y = rol64(y,45),\
2020*4882a593Smuzhiyun y *= 9 )
2021*4882a593Smuzhiyun
2022*4882a593Smuzhiyun /*
2023*4882a593Smuzhiyun * Fold two longs into one 32-bit hash value. This must be fast, but
2024*4882a593Smuzhiyun * latency isn't quite as critical, as there is a fair bit of additional
2025*4882a593Smuzhiyun * work done before the hash value is used.
2026*4882a593Smuzhiyun */
fold_hash(unsigned long x,unsigned long y)2027*4882a593Smuzhiyun static inline unsigned int fold_hash(unsigned long x, unsigned long y)
2028*4882a593Smuzhiyun {
2029*4882a593Smuzhiyun y ^= x * GOLDEN_RATIO_64;
2030*4882a593Smuzhiyun y *= GOLDEN_RATIO_64;
2031*4882a593Smuzhiyun return y >> 32;
2032*4882a593Smuzhiyun }
2033*4882a593Smuzhiyun
2034*4882a593Smuzhiyun #else /* 32-bit case */
2035*4882a593Smuzhiyun
2036*4882a593Smuzhiyun /*
2037*4882a593Smuzhiyun * Mixing scores (in bits) for (7,20):
2038*4882a593Smuzhiyun * Input delta: 1-bit 2-bit
2039*4882a593Smuzhiyun * 1 round: 330.3 9201.6
2040*4882a593Smuzhiyun * 2 rounds: 1246.4 25475.4
2041*4882a593Smuzhiyun * 3 rounds: 1907.1 31295.1
2042*4882a593Smuzhiyun * 4 rounds: 2042.3 31718.6
2043*4882a593Smuzhiyun * Perfect: 2048 31744
2044*4882a593Smuzhiyun * (32*64) (32*31/2 * 64)
2045*4882a593Smuzhiyun */
2046*4882a593Smuzhiyun #define HASH_MIX(x, y, a) \
2047*4882a593Smuzhiyun ( x ^= (a), \
2048*4882a593Smuzhiyun y ^= x, x = rol32(x, 7),\
2049*4882a593Smuzhiyun x += y, y = rol32(y,20),\
2050*4882a593Smuzhiyun y *= 9 )
2051*4882a593Smuzhiyun
fold_hash(unsigned long x,unsigned long y)2052*4882a593Smuzhiyun static inline unsigned int fold_hash(unsigned long x, unsigned long y)
2053*4882a593Smuzhiyun {
2054*4882a593Smuzhiyun /* Use arch-optimized multiply if one exists */
2055*4882a593Smuzhiyun return __hash_32(y ^ __hash_32(x));
2056*4882a593Smuzhiyun }
2057*4882a593Smuzhiyun
2058*4882a593Smuzhiyun #endif
2059*4882a593Smuzhiyun
2060*4882a593Smuzhiyun /*
2061*4882a593Smuzhiyun * Return the hash of a string of known length. This is carfully
2062*4882a593Smuzhiyun * designed to match hash_name(), which is the more critical function.
2063*4882a593Smuzhiyun * In particular, we must end by hashing a final word containing 0..7
2064*4882a593Smuzhiyun * payload bytes, to match the way that hash_name() iterates until it
2065*4882a593Smuzhiyun * finds the delimiter after the name.
2066*4882a593Smuzhiyun */
full_name_hash(const void * salt,const char * name,unsigned int len)2067*4882a593Smuzhiyun unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
2068*4882a593Smuzhiyun {
2069*4882a593Smuzhiyun unsigned long a, x = 0, y = (unsigned long)salt;
2070*4882a593Smuzhiyun
2071*4882a593Smuzhiyun for (;;) {
2072*4882a593Smuzhiyun if (!len)
2073*4882a593Smuzhiyun goto done;
2074*4882a593Smuzhiyun a = load_unaligned_zeropad(name);
2075*4882a593Smuzhiyun if (len < sizeof(unsigned long))
2076*4882a593Smuzhiyun break;
2077*4882a593Smuzhiyun HASH_MIX(x, y, a);
2078*4882a593Smuzhiyun name += sizeof(unsigned long);
2079*4882a593Smuzhiyun len -= sizeof(unsigned long);
2080*4882a593Smuzhiyun }
2081*4882a593Smuzhiyun x ^= a & bytemask_from_count(len);
2082*4882a593Smuzhiyun done:
2083*4882a593Smuzhiyun return fold_hash(x, y);
2084*4882a593Smuzhiyun }
2085*4882a593Smuzhiyun EXPORT_SYMBOL(full_name_hash);
2086*4882a593Smuzhiyun
2087*4882a593Smuzhiyun /* Return the "hash_len" (hash and length) of a null-terminated string */
hashlen_string(const void * salt,const char * name)2088*4882a593Smuzhiyun u64 hashlen_string(const void *salt, const char *name)
2089*4882a593Smuzhiyun {
2090*4882a593Smuzhiyun unsigned long a = 0, x = 0, y = (unsigned long)salt;
2091*4882a593Smuzhiyun unsigned long adata, mask, len;
2092*4882a593Smuzhiyun const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
2093*4882a593Smuzhiyun
2094*4882a593Smuzhiyun len = 0;
2095*4882a593Smuzhiyun goto inside;
2096*4882a593Smuzhiyun
2097*4882a593Smuzhiyun do {
2098*4882a593Smuzhiyun HASH_MIX(x, y, a);
2099*4882a593Smuzhiyun len += sizeof(unsigned long);
2100*4882a593Smuzhiyun inside:
2101*4882a593Smuzhiyun a = load_unaligned_zeropad(name+len);
2102*4882a593Smuzhiyun } while (!has_zero(a, &adata, &constants));
2103*4882a593Smuzhiyun
2104*4882a593Smuzhiyun adata = prep_zero_mask(a, adata, &constants);
2105*4882a593Smuzhiyun mask = create_zero_mask(adata);
2106*4882a593Smuzhiyun x ^= a & zero_bytemask(mask);
2107*4882a593Smuzhiyun
2108*4882a593Smuzhiyun return hashlen_create(fold_hash(x, y), len + find_zero(mask));
2109*4882a593Smuzhiyun }
2110*4882a593Smuzhiyun EXPORT_SYMBOL(hashlen_string);
2111*4882a593Smuzhiyun
2112*4882a593Smuzhiyun /*
2113*4882a593Smuzhiyun * Calculate the length and hash of the path component, and
2114*4882a593Smuzhiyun * return the "hash_len" as the result.
2115*4882a593Smuzhiyun */
hash_name(const void * salt,const char * name)2116*4882a593Smuzhiyun static inline u64 hash_name(const void *salt, const char *name)
2117*4882a593Smuzhiyun {
2118*4882a593Smuzhiyun unsigned long a = 0, b, x = 0, y = (unsigned long)salt;
2119*4882a593Smuzhiyun unsigned long adata, bdata, mask, len;
2120*4882a593Smuzhiyun const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
2121*4882a593Smuzhiyun
2122*4882a593Smuzhiyun len = 0;
2123*4882a593Smuzhiyun goto inside;
2124*4882a593Smuzhiyun
2125*4882a593Smuzhiyun do {
2126*4882a593Smuzhiyun HASH_MIX(x, y, a);
2127*4882a593Smuzhiyun len += sizeof(unsigned long);
2128*4882a593Smuzhiyun inside:
2129*4882a593Smuzhiyun a = load_unaligned_zeropad(name+len);
2130*4882a593Smuzhiyun b = a ^ REPEAT_BYTE('/');
2131*4882a593Smuzhiyun } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
2132*4882a593Smuzhiyun
2133*4882a593Smuzhiyun adata = prep_zero_mask(a, adata, &constants);
2134*4882a593Smuzhiyun bdata = prep_zero_mask(b, bdata, &constants);
2135*4882a593Smuzhiyun mask = create_zero_mask(adata | bdata);
2136*4882a593Smuzhiyun x ^= a & zero_bytemask(mask);
2137*4882a593Smuzhiyun
2138*4882a593Smuzhiyun return hashlen_create(fold_hash(x, y), len + find_zero(mask));
2139*4882a593Smuzhiyun }
2140*4882a593Smuzhiyun
2141*4882a593Smuzhiyun #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
2142*4882a593Smuzhiyun
2143*4882a593Smuzhiyun /* Return the hash of a string of known length */
full_name_hash(const void * salt,const char * name,unsigned int len)2144*4882a593Smuzhiyun unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
2145*4882a593Smuzhiyun {
2146*4882a593Smuzhiyun unsigned long hash = init_name_hash(salt);
2147*4882a593Smuzhiyun while (len--)
2148*4882a593Smuzhiyun hash = partial_name_hash((unsigned char)*name++, hash);
2149*4882a593Smuzhiyun return end_name_hash(hash);
2150*4882a593Smuzhiyun }
2151*4882a593Smuzhiyun EXPORT_SYMBOL(full_name_hash);
2152*4882a593Smuzhiyun
2153*4882a593Smuzhiyun /* Return the "hash_len" (hash and length) of a null-terminated string */
hashlen_string(const void * salt,const char * name)2154*4882a593Smuzhiyun u64 hashlen_string(const void *salt, const char *name)
2155*4882a593Smuzhiyun {
2156*4882a593Smuzhiyun unsigned long hash = init_name_hash(salt);
2157*4882a593Smuzhiyun unsigned long len = 0, c;
2158*4882a593Smuzhiyun
2159*4882a593Smuzhiyun c = (unsigned char)*name;
2160*4882a593Smuzhiyun while (c) {
2161*4882a593Smuzhiyun len++;
2162*4882a593Smuzhiyun hash = partial_name_hash(c, hash);
2163*4882a593Smuzhiyun c = (unsigned char)name[len];
2164*4882a593Smuzhiyun }
2165*4882a593Smuzhiyun return hashlen_create(end_name_hash(hash), len);
2166*4882a593Smuzhiyun }
2167*4882a593Smuzhiyun EXPORT_SYMBOL(hashlen_string);
2168*4882a593Smuzhiyun
2169*4882a593Smuzhiyun /*
2170*4882a593Smuzhiyun * We know there's a real path component here of at least
2171*4882a593Smuzhiyun * one character.
2172*4882a593Smuzhiyun */
hash_name(const void * salt,const char * name)2173*4882a593Smuzhiyun static inline u64 hash_name(const void *salt, const char *name)
2174*4882a593Smuzhiyun {
2175*4882a593Smuzhiyun unsigned long hash = init_name_hash(salt);
2176*4882a593Smuzhiyun unsigned long len = 0, c;
2177*4882a593Smuzhiyun
2178*4882a593Smuzhiyun c = (unsigned char)*name;
2179*4882a593Smuzhiyun do {
2180*4882a593Smuzhiyun len++;
2181*4882a593Smuzhiyun hash = partial_name_hash(c, hash);
2182*4882a593Smuzhiyun c = (unsigned char)name[len];
2183*4882a593Smuzhiyun } while (c && c != '/');
2184*4882a593Smuzhiyun return hashlen_create(end_name_hash(hash), len);
2185*4882a593Smuzhiyun }
2186*4882a593Smuzhiyun
2187*4882a593Smuzhiyun #endif
2188*4882a593Smuzhiyun
2189*4882a593Smuzhiyun /*
2190*4882a593Smuzhiyun * Name resolution.
2191*4882a593Smuzhiyun * This is the basic name resolution function, turning a pathname into
2192*4882a593Smuzhiyun * the final dentry. We expect 'base' to be positive and a directory.
2193*4882a593Smuzhiyun *
2194*4882a593Smuzhiyun * Returns 0 and nd will have valid dentry and mnt on success.
2195*4882a593Smuzhiyun * Returns error and drops reference to input namei data on failure.
2196*4882a593Smuzhiyun */
link_path_walk(const char * name,struct nameidata * nd)2197*4882a593Smuzhiyun static int link_path_walk(const char *name, struct nameidata *nd)
2198*4882a593Smuzhiyun {
2199*4882a593Smuzhiyun int depth = 0; // depth <= nd->depth
2200*4882a593Smuzhiyun int err;
2201*4882a593Smuzhiyun
2202*4882a593Smuzhiyun nd->last_type = LAST_ROOT;
2203*4882a593Smuzhiyun nd->flags |= LOOKUP_PARENT;
2204*4882a593Smuzhiyun if (IS_ERR(name))
2205*4882a593Smuzhiyun return PTR_ERR(name);
2206*4882a593Smuzhiyun while (*name=='/')
2207*4882a593Smuzhiyun name++;
2208*4882a593Smuzhiyun if (!*name)
2209*4882a593Smuzhiyun return 0;
2210*4882a593Smuzhiyun
2211*4882a593Smuzhiyun /* At this point we know we have a real path component. */
2212*4882a593Smuzhiyun for(;;) {
2213*4882a593Smuzhiyun const char *link;
2214*4882a593Smuzhiyun u64 hash_len;
2215*4882a593Smuzhiyun int type;
2216*4882a593Smuzhiyun
2217*4882a593Smuzhiyun err = may_lookup(nd);
2218*4882a593Smuzhiyun if (err)
2219*4882a593Smuzhiyun return err;
2220*4882a593Smuzhiyun
2221*4882a593Smuzhiyun hash_len = hash_name(nd->path.dentry, name);
2222*4882a593Smuzhiyun
2223*4882a593Smuzhiyun type = LAST_NORM;
2224*4882a593Smuzhiyun if (name[0] == '.') switch (hashlen_len(hash_len)) {
2225*4882a593Smuzhiyun case 2:
2226*4882a593Smuzhiyun if (name[1] == '.') {
2227*4882a593Smuzhiyun type = LAST_DOTDOT;
2228*4882a593Smuzhiyun nd->flags |= LOOKUP_JUMPED;
2229*4882a593Smuzhiyun }
2230*4882a593Smuzhiyun break;
2231*4882a593Smuzhiyun case 1:
2232*4882a593Smuzhiyun type = LAST_DOT;
2233*4882a593Smuzhiyun }
2234*4882a593Smuzhiyun if (likely(type == LAST_NORM)) {
2235*4882a593Smuzhiyun struct dentry *parent = nd->path.dentry;
2236*4882a593Smuzhiyun nd->flags &= ~LOOKUP_JUMPED;
2237*4882a593Smuzhiyun if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
2238*4882a593Smuzhiyun struct qstr this = { { .hash_len = hash_len }, .name = name };
2239*4882a593Smuzhiyun err = parent->d_op->d_hash(parent, &this);
2240*4882a593Smuzhiyun if (err < 0)
2241*4882a593Smuzhiyun return err;
2242*4882a593Smuzhiyun hash_len = this.hash_len;
2243*4882a593Smuzhiyun name = this.name;
2244*4882a593Smuzhiyun }
2245*4882a593Smuzhiyun }
2246*4882a593Smuzhiyun
2247*4882a593Smuzhiyun nd->last.hash_len = hash_len;
2248*4882a593Smuzhiyun nd->last.name = name;
2249*4882a593Smuzhiyun nd->last_type = type;
2250*4882a593Smuzhiyun
2251*4882a593Smuzhiyun name += hashlen_len(hash_len);
2252*4882a593Smuzhiyun if (!*name)
2253*4882a593Smuzhiyun goto OK;
2254*4882a593Smuzhiyun /*
2255*4882a593Smuzhiyun * If it wasn't NUL, we know it was '/'. Skip that
2256*4882a593Smuzhiyun * slash, and continue until no more slashes.
2257*4882a593Smuzhiyun */
2258*4882a593Smuzhiyun do {
2259*4882a593Smuzhiyun name++;
2260*4882a593Smuzhiyun } while (unlikely(*name == '/'));
2261*4882a593Smuzhiyun if (unlikely(!*name)) {
2262*4882a593Smuzhiyun OK:
2263*4882a593Smuzhiyun /* pathname or trailing symlink, done */
2264*4882a593Smuzhiyun if (!depth) {
2265*4882a593Smuzhiyun nd->dir_uid = nd->inode->i_uid;
2266*4882a593Smuzhiyun nd->dir_mode = nd->inode->i_mode;
2267*4882a593Smuzhiyun nd->flags &= ~LOOKUP_PARENT;
2268*4882a593Smuzhiyun return 0;
2269*4882a593Smuzhiyun }
2270*4882a593Smuzhiyun /* last component of nested symlink */
2271*4882a593Smuzhiyun name = nd->stack[--depth].name;
2272*4882a593Smuzhiyun link = walk_component(nd, 0);
2273*4882a593Smuzhiyun } else {
2274*4882a593Smuzhiyun /* not the last component */
2275*4882a593Smuzhiyun link = walk_component(nd, WALK_MORE);
2276*4882a593Smuzhiyun }
2277*4882a593Smuzhiyun if (unlikely(link)) {
2278*4882a593Smuzhiyun if (IS_ERR(link))
2279*4882a593Smuzhiyun return PTR_ERR(link);
2280*4882a593Smuzhiyun /* a symlink to follow */
2281*4882a593Smuzhiyun nd->stack[depth++].name = name;
2282*4882a593Smuzhiyun name = link;
2283*4882a593Smuzhiyun continue;
2284*4882a593Smuzhiyun }
2285*4882a593Smuzhiyun if (unlikely(!d_can_lookup(nd->path.dentry))) {
2286*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
2287*4882a593Smuzhiyun if (!try_to_unlazy(nd))
2288*4882a593Smuzhiyun return -ECHILD;
2289*4882a593Smuzhiyun }
2290*4882a593Smuzhiyun return -ENOTDIR;
2291*4882a593Smuzhiyun }
2292*4882a593Smuzhiyun }
2293*4882a593Smuzhiyun }
2294*4882a593Smuzhiyun
2295*4882a593Smuzhiyun /* must be paired with terminate_walk() */
path_init(struct nameidata * nd,unsigned flags)2296*4882a593Smuzhiyun static const char *path_init(struct nameidata *nd, unsigned flags)
2297*4882a593Smuzhiyun {
2298*4882a593Smuzhiyun int error;
2299*4882a593Smuzhiyun const char *s = nd->name->name;
2300*4882a593Smuzhiyun
2301*4882a593Smuzhiyun /* LOOKUP_CACHED requires RCU, ask caller to retry */
2302*4882a593Smuzhiyun if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)
2303*4882a593Smuzhiyun return ERR_PTR(-EAGAIN);
2304*4882a593Smuzhiyun
2305*4882a593Smuzhiyun if (!*s)
2306*4882a593Smuzhiyun flags &= ~LOOKUP_RCU;
2307*4882a593Smuzhiyun if (flags & LOOKUP_RCU)
2308*4882a593Smuzhiyun rcu_read_lock();
2309*4882a593Smuzhiyun
2310*4882a593Smuzhiyun nd->flags = flags | LOOKUP_JUMPED;
2311*4882a593Smuzhiyun nd->depth = 0;
2312*4882a593Smuzhiyun
2313*4882a593Smuzhiyun nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
2314*4882a593Smuzhiyun nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
2315*4882a593Smuzhiyun smp_rmb();
2316*4882a593Smuzhiyun
2317*4882a593Smuzhiyun if (flags & LOOKUP_ROOT) {
2318*4882a593Smuzhiyun struct dentry *root = nd->root.dentry;
2319*4882a593Smuzhiyun struct inode *inode = root->d_inode;
2320*4882a593Smuzhiyun if (*s && unlikely(!d_can_lookup(root)))
2321*4882a593Smuzhiyun return ERR_PTR(-ENOTDIR);
2322*4882a593Smuzhiyun nd->path = nd->root;
2323*4882a593Smuzhiyun nd->inode = inode;
2324*4882a593Smuzhiyun if (flags & LOOKUP_RCU) {
2325*4882a593Smuzhiyun nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
2326*4882a593Smuzhiyun nd->root_seq = nd->seq;
2327*4882a593Smuzhiyun } else {
2328*4882a593Smuzhiyun path_get(&nd->path);
2329*4882a593Smuzhiyun }
2330*4882a593Smuzhiyun return s;
2331*4882a593Smuzhiyun }
2332*4882a593Smuzhiyun
2333*4882a593Smuzhiyun nd->root.mnt = NULL;
2334*4882a593Smuzhiyun
2335*4882a593Smuzhiyun /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
2336*4882a593Smuzhiyun if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
2337*4882a593Smuzhiyun error = nd_jump_root(nd);
2338*4882a593Smuzhiyun if (unlikely(error))
2339*4882a593Smuzhiyun return ERR_PTR(error);
2340*4882a593Smuzhiyun return s;
2341*4882a593Smuzhiyun }
2342*4882a593Smuzhiyun
2343*4882a593Smuzhiyun /* Relative pathname -- get the starting-point it is relative to. */
2344*4882a593Smuzhiyun if (nd->dfd == AT_FDCWD) {
2345*4882a593Smuzhiyun if (flags & LOOKUP_RCU) {
2346*4882a593Smuzhiyun struct fs_struct *fs = current->fs;
2347*4882a593Smuzhiyun unsigned seq;
2348*4882a593Smuzhiyun
2349*4882a593Smuzhiyun do {
2350*4882a593Smuzhiyun seq = read_seqcount_begin(&fs->seq);
2351*4882a593Smuzhiyun nd->path = fs->pwd;
2352*4882a593Smuzhiyun nd->inode = nd->path.dentry->d_inode;
2353*4882a593Smuzhiyun nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
2354*4882a593Smuzhiyun } while (read_seqcount_retry(&fs->seq, seq));
2355*4882a593Smuzhiyun } else {
2356*4882a593Smuzhiyun get_fs_pwd(current->fs, &nd->path);
2357*4882a593Smuzhiyun nd->inode = nd->path.dentry->d_inode;
2358*4882a593Smuzhiyun }
2359*4882a593Smuzhiyun } else {
2360*4882a593Smuzhiyun /* Caller must check execute permissions on the starting path component */
2361*4882a593Smuzhiyun struct fd f = fdget_raw(nd->dfd);
2362*4882a593Smuzhiyun struct dentry *dentry;
2363*4882a593Smuzhiyun
2364*4882a593Smuzhiyun if (!f.file)
2365*4882a593Smuzhiyun return ERR_PTR(-EBADF);
2366*4882a593Smuzhiyun
2367*4882a593Smuzhiyun dentry = f.file->f_path.dentry;
2368*4882a593Smuzhiyun
2369*4882a593Smuzhiyun if (*s && unlikely(!d_can_lookup(dentry))) {
2370*4882a593Smuzhiyun fdput(f);
2371*4882a593Smuzhiyun return ERR_PTR(-ENOTDIR);
2372*4882a593Smuzhiyun }
2373*4882a593Smuzhiyun
2374*4882a593Smuzhiyun nd->path = f.file->f_path;
2375*4882a593Smuzhiyun if (flags & LOOKUP_RCU) {
2376*4882a593Smuzhiyun nd->inode = nd->path.dentry->d_inode;
2377*4882a593Smuzhiyun nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
2378*4882a593Smuzhiyun } else {
2379*4882a593Smuzhiyun path_get(&nd->path);
2380*4882a593Smuzhiyun nd->inode = nd->path.dentry->d_inode;
2381*4882a593Smuzhiyun }
2382*4882a593Smuzhiyun fdput(f);
2383*4882a593Smuzhiyun }
2384*4882a593Smuzhiyun
2385*4882a593Smuzhiyun /* For scoped-lookups we need to set the root to the dirfd as well. */
2386*4882a593Smuzhiyun if (flags & LOOKUP_IS_SCOPED) {
2387*4882a593Smuzhiyun nd->root = nd->path;
2388*4882a593Smuzhiyun if (flags & LOOKUP_RCU) {
2389*4882a593Smuzhiyun nd->root_seq = nd->seq;
2390*4882a593Smuzhiyun } else {
2391*4882a593Smuzhiyun path_get(&nd->root);
2392*4882a593Smuzhiyun nd->flags |= LOOKUP_ROOT_GRABBED;
2393*4882a593Smuzhiyun }
2394*4882a593Smuzhiyun }
2395*4882a593Smuzhiyun return s;
2396*4882a593Smuzhiyun }
2397*4882a593Smuzhiyun
lookup_last(struct nameidata * nd)2398*4882a593Smuzhiyun static inline const char *lookup_last(struct nameidata *nd)
2399*4882a593Smuzhiyun {
2400*4882a593Smuzhiyun if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
2401*4882a593Smuzhiyun nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2402*4882a593Smuzhiyun
2403*4882a593Smuzhiyun return walk_component(nd, WALK_TRAILING);
2404*4882a593Smuzhiyun }
2405*4882a593Smuzhiyun
handle_lookup_down(struct nameidata * nd)2406*4882a593Smuzhiyun static int handle_lookup_down(struct nameidata *nd)
2407*4882a593Smuzhiyun {
2408*4882a593Smuzhiyun if (!(nd->flags & LOOKUP_RCU))
2409*4882a593Smuzhiyun dget(nd->path.dentry);
2410*4882a593Smuzhiyun return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
2411*4882a593Smuzhiyun nd->path.dentry, nd->inode, nd->seq));
2412*4882a593Smuzhiyun }
2413*4882a593Smuzhiyun
2414*4882a593Smuzhiyun /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
path_lookupat(struct nameidata * nd,unsigned flags,struct path * path)2415*4882a593Smuzhiyun static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
2416*4882a593Smuzhiyun {
2417*4882a593Smuzhiyun const char *s = path_init(nd, flags);
2418*4882a593Smuzhiyun int err;
2419*4882a593Smuzhiyun
2420*4882a593Smuzhiyun if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
2421*4882a593Smuzhiyun err = handle_lookup_down(nd);
2422*4882a593Smuzhiyun if (unlikely(err < 0))
2423*4882a593Smuzhiyun s = ERR_PTR(err);
2424*4882a593Smuzhiyun }
2425*4882a593Smuzhiyun
2426*4882a593Smuzhiyun while (!(err = link_path_walk(s, nd)) &&
2427*4882a593Smuzhiyun (s = lookup_last(nd)) != NULL)
2428*4882a593Smuzhiyun ;
2429*4882a593Smuzhiyun if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
2430*4882a593Smuzhiyun err = handle_lookup_down(nd);
2431*4882a593Smuzhiyun nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please...
2432*4882a593Smuzhiyun }
2433*4882a593Smuzhiyun if (!err)
2434*4882a593Smuzhiyun err = complete_walk(nd);
2435*4882a593Smuzhiyun
2436*4882a593Smuzhiyun if (!err && nd->flags & LOOKUP_DIRECTORY)
2437*4882a593Smuzhiyun if (!d_can_lookup(nd->path.dentry))
2438*4882a593Smuzhiyun err = -ENOTDIR;
2439*4882a593Smuzhiyun if (!err) {
2440*4882a593Smuzhiyun *path = nd->path;
2441*4882a593Smuzhiyun nd->path.mnt = NULL;
2442*4882a593Smuzhiyun nd->path.dentry = NULL;
2443*4882a593Smuzhiyun }
2444*4882a593Smuzhiyun terminate_walk(nd);
2445*4882a593Smuzhiyun return err;
2446*4882a593Smuzhiyun }
2447*4882a593Smuzhiyun
filename_lookup(int dfd,struct filename * name,unsigned flags,struct path * path,struct path * root)2448*4882a593Smuzhiyun int filename_lookup(int dfd, struct filename *name, unsigned flags,
2449*4882a593Smuzhiyun struct path *path, struct path *root)
2450*4882a593Smuzhiyun {
2451*4882a593Smuzhiyun int retval;
2452*4882a593Smuzhiyun struct nameidata nd;
2453*4882a593Smuzhiyun if (IS_ERR(name))
2454*4882a593Smuzhiyun return PTR_ERR(name);
2455*4882a593Smuzhiyun if (unlikely(root)) {
2456*4882a593Smuzhiyun nd.root = *root;
2457*4882a593Smuzhiyun flags |= LOOKUP_ROOT;
2458*4882a593Smuzhiyun }
2459*4882a593Smuzhiyun set_nameidata(&nd, dfd, name);
2460*4882a593Smuzhiyun retval = path_lookupat(&nd, flags | LOOKUP_RCU, path);
2461*4882a593Smuzhiyun if (unlikely(retval == -ECHILD))
2462*4882a593Smuzhiyun retval = path_lookupat(&nd, flags, path);
2463*4882a593Smuzhiyun if (unlikely(retval == -ESTALE))
2464*4882a593Smuzhiyun retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
2465*4882a593Smuzhiyun
2466*4882a593Smuzhiyun if (likely(!retval))
2467*4882a593Smuzhiyun audit_inode(name, path->dentry,
2468*4882a593Smuzhiyun flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
2469*4882a593Smuzhiyun restore_nameidata();
2470*4882a593Smuzhiyun putname(name);
2471*4882a593Smuzhiyun return retval;
2472*4882a593Smuzhiyun }
2473*4882a593Smuzhiyun
2474*4882a593Smuzhiyun /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
path_parentat(struct nameidata * nd,unsigned flags,struct path * parent)2475*4882a593Smuzhiyun static int path_parentat(struct nameidata *nd, unsigned flags,
2476*4882a593Smuzhiyun struct path *parent)
2477*4882a593Smuzhiyun {
2478*4882a593Smuzhiyun const char *s = path_init(nd, flags);
2479*4882a593Smuzhiyun int err = link_path_walk(s, nd);
2480*4882a593Smuzhiyun if (!err)
2481*4882a593Smuzhiyun err = complete_walk(nd);
2482*4882a593Smuzhiyun if (!err) {
2483*4882a593Smuzhiyun *parent = nd->path;
2484*4882a593Smuzhiyun nd->path.mnt = NULL;
2485*4882a593Smuzhiyun nd->path.dentry = NULL;
2486*4882a593Smuzhiyun }
2487*4882a593Smuzhiyun terminate_walk(nd);
2488*4882a593Smuzhiyun return err;
2489*4882a593Smuzhiyun }
2490*4882a593Smuzhiyun
filename_parentat(int dfd,struct filename * name,unsigned int flags,struct path * parent,struct qstr * last,int * type)2491*4882a593Smuzhiyun static struct filename *filename_parentat(int dfd, struct filename *name,
2492*4882a593Smuzhiyun unsigned int flags, struct path *parent,
2493*4882a593Smuzhiyun struct qstr *last, int *type)
2494*4882a593Smuzhiyun {
2495*4882a593Smuzhiyun int retval;
2496*4882a593Smuzhiyun struct nameidata nd;
2497*4882a593Smuzhiyun
2498*4882a593Smuzhiyun if (IS_ERR(name))
2499*4882a593Smuzhiyun return name;
2500*4882a593Smuzhiyun set_nameidata(&nd, dfd, name);
2501*4882a593Smuzhiyun retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
2502*4882a593Smuzhiyun if (unlikely(retval == -ECHILD))
2503*4882a593Smuzhiyun retval = path_parentat(&nd, flags, parent);
2504*4882a593Smuzhiyun if (unlikely(retval == -ESTALE))
2505*4882a593Smuzhiyun retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
2506*4882a593Smuzhiyun if (likely(!retval)) {
2507*4882a593Smuzhiyun *last = nd.last;
2508*4882a593Smuzhiyun *type = nd.last_type;
2509*4882a593Smuzhiyun audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
2510*4882a593Smuzhiyun } else {
2511*4882a593Smuzhiyun putname(name);
2512*4882a593Smuzhiyun name = ERR_PTR(retval);
2513*4882a593Smuzhiyun }
2514*4882a593Smuzhiyun restore_nameidata();
2515*4882a593Smuzhiyun return name;
2516*4882a593Smuzhiyun }
2517*4882a593Smuzhiyun
2518*4882a593Smuzhiyun /* does lookup, returns the object with parent locked */
kern_path_locked(const char * name,struct path * path)2519*4882a593Smuzhiyun struct dentry *kern_path_locked(const char *name, struct path *path)
2520*4882a593Smuzhiyun {
2521*4882a593Smuzhiyun struct filename *filename;
2522*4882a593Smuzhiyun struct dentry *d;
2523*4882a593Smuzhiyun struct qstr last;
2524*4882a593Smuzhiyun int type;
2525*4882a593Smuzhiyun
2526*4882a593Smuzhiyun filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
2527*4882a593Smuzhiyun &last, &type);
2528*4882a593Smuzhiyun if (IS_ERR(filename))
2529*4882a593Smuzhiyun return ERR_CAST(filename);
2530*4882a593Smuzhiyun if (unlikely(type != LAST_NORM)) {
2531*4882a593Smuzhiyun path_put(path);
2532*4882a593Smuzhiyun putname(filename);
2533*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
2534*4882a593Smuzhiyun }
2535*4882a593Smuzhiyun inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
2536*4882a593Smuzhiyun d = __lookup_hash(&last, path->dentry, 0);
2537*4882a593Smuzhiyun if (IS_ERR(d)) {
2538*4882a593Smuzhiyun inode_unlock(path->dentry->d_inode);
2539*4882a593Smuzhiyun path_put(path);
2540*4882a593Smuzhiyun }
2541*4882a593Smuzhiyun putname(filename);
2542*4882a593Smuzhiyun return d;
2543*4882a593Smuzhiyun }
2544*4882a593Smuzhiyun
kern_path(const char * name,unsigned int flags,struct path * path)2545*4882a593Smuzhiyun int kern_path(const char *name, unsigned int flags, struct path *path)
2546*4882a593Smuzhiyun {
2547*4882a593Smuzhiyun return filename_lookup(AT_FDCWD, getname_kernel(name),
2548*4882a593Smuzhiyun flags, path, NULL);
2549*4882a593Smuzhiyun }
2550*4882a593Smuzhiyun EXPORT_SYMBOL(kern_path);
2551*4882a593Smuzhiyun
2552*4882a593Smuzhiyun /**
2553*4882a593Smuzhiyun * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
2554*4882a593Smuzhiyun * @dentry: pointer to dentry of the base directory
2555*4882a593Smuzhiyun * @mnt: pointer to vfs mount of the base directory
2556*4882a593Smuzhiyun * @name: pointer to file name
2557*4882a593Smuzhiyun * @flags: lookup flags
2558*4882a593Smuzhiyun * @path: pointer to struct path to fill
2559*4882a593Smuzhiyun */
vfs_path_lookup(struct dentry * dentry,struct vfsmount * mnt,const char * name,unsigned int flags,struct path * path)2560*4882a593Smuzhiyun int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
2561*4882a593Smuzhiyun const char *name, unsigned int flags,
2562*4882a593Smuzhiyun struct path *path)
2563*4882a593Smuzhiyun {
2564*4882a593Smuzhiyun struct path root = {.mnt = mnt, .dentry = dentry};
2565*4882a593Smuzhiyun /* the first argument of filename_lookup() is ignored with root */
2566*4882a593Smuzhiyun return filename_lookup(AT_FDCWD, getname_kernel(name),
2567*4882a593Smuzhiyun flags , path, &root);
2568*4882a593Smuzhiyun }
2569*4882a593Smuzhiyun EXPORT_SYMBOL(vfs_path_lookup);
2570*4882a593Smuzhiyun
lookup_one_len_common(const char * name,struct dentry * base,int len,struct qstr * this)2571*4882a593Smuzhiyun static int lookup_one_len_common(const char *name, struct dentry *base,
2572*4882a593Smuzhiyun int len, struct qstr *this)
2573*4882a593Smuzhiyun {
2574*4882a593Smuzhiyun this->name = name;
2575*4882a593Smuzhiyun this->len = len;
2576*4882a593Smuzhiyun this->hash = full_name_hash(base, name, len);
2577*4882a593Smuzhiyun if (!len)
2578*4882a593Smuzhiyun return -EACCES;
2579*4882a593Smuzhiyun
2580*4882a593Smuzhiyun if (unlikely(name[0] == '.')) {
2581*4882a593Smuzhiyun if (len < 2 || (len == 2 && name[1] == '.'))
2582*4882a593Smuzhiyun return -EACCES;
2583*4882a593Smuzhiyun }
2584*4882a593Smuzhiyun
2585*4882a593Smuzhiyun while (len--) {
2586*4882a593Smuzhiyun unsigned int c = *(const unsigned char *)name++;
2587*4882a593Smuzhiyun if (c == '/' || c == '\0')
2588*4882a593Smuzhiyun return -EACCES;
2589*4882a593Smuzhiyun }
2590*4882a593Smuzhiyun /*
2591*4882a593Smuzhiyun * See if the low-level filesystem might want
2592*4882a593Smuzhiyun * to use its own hash..
2593*4882a593Smuzhiyun */
2594*4882a593Smuzhiyun if (base->d_flags & DCACHE_OP_HASH) {
2595*4882a593Smuzhiyun int err = base->d_op->d_hash(base, this);
2596*4882a593Smuzhiyun if (err < 0)
2597*4882a593Smuzhiyun return err;
2598*4882a593Smuzhiyun }
2599*4882a593Smuzhiyun
2600*4882a593Smuzhiyun return inode_permission(base->d_inode, MAY_EXEC);
2601*4882a593Smuzhiyun }
2602*4882a593Smuzhiyun
2603*4882a593Smuzhiyun /**
2604*4882a593Smuzhiyun * try_lookup_one_len - filesystem helper to lookup single pathname component
2605*4882a593Smuzhiyun * @name: pathname component to lookup
2606*4882a593Smuzhiyun * @base: base directory to lookup from
2607*4882a593Smuzhiyun * @len: maximum length @len should be interpreted to
2608*4882a593Smuzhiyun *
2609*4882a593Smuzhiyun * Look up a dentry by name in the dcache, returning NULL if it does not
2610*4882a593Smuzhiyun * currently exist. The function does not try to create a dentry.
2611*4882a593Smuzhiyun *
2612*4882a593Smuzhiyun * Note that this routine is purely a helper for filesystem usage and should
2613*4882a593Smuzhiyun * not be called by generic code.
2614*4882a593Smuzhiyun *
2615*4882a593Smuzhiyun * The caller must hold base->i_mutex.
2616*4882a593Smuzhiyun */
try_lookup_one_len(const char * name,struct dentry * base,int len)2617*4882a593Smuzhiyun struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
2618*4882a593Smuzhiyun {
2619*4882a593Smuzhiyun struct qstr this;
2620*4882a593Smuzhiyun int err;
2621*4882a593Smuzhiyun
2622*4882a593Smuzhiyun WARN_ON_ONCE(!inode_is_locked(base->d_inode));
2623*4882a593Smuzhiyun
2624*4882a593Smuzhiyun err = lookup_one_len_common(name, base, len, &this);
2625*4882a593Smuzhiyun if (err)
2626*4882a593Smuzhiyun return ERR_PTR(err);
2627*4882a593Smuzhiyun
2628*4882a593Smuzhiyun return lookup_dcache(&this, base, 0);
2629*4882a593Smuzhiyun }
2630*4882a593Smuzhiyun EXPORT_SYMBOL(try_lookup_one_len);
2631*4882a593Smuzhiyun
2632*4882a593Smuzhiyun /**
2633*4882a593Smuzhiyun * lookup_one_len - filesystem helper to lookup single pathname component
2634*4882a593Smuzhiyun * @name: pathname component to lookup
2635*4882a593Smuzhiyun * @base: base directory to lookup from
2636*4882a593Smuzhiyun * @len: maximum length @len should be interpreted to
2637*4882a593Smuzhiyun *
2638*4882a593Smuzhiyun * Note that this routine is purely a helper for filesystem usage and should
2639*4882a593Smuzhiyun * not be called by generic code.
2640*4882a593Smuzhiyun *
2641*4882a593Smuzhiyun * The caller must hold base->i_mutex.
2642*4882a593Smuzhiyun */
lookup_one_len(const char * name,struct dentry * base,int len)2643*4882a593Smuzhiyun struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2644*4882a593Smuzhiyun {
2645*4882a593Smuzhiyun struct dentry *dentry;
2646*4882a593Smuzhiyun struct qstr this;
2647*4882a593Smuzhiyun int err;
2648*4882a593Smuzhiyun
2649*4882a593Smuzhiyun WARN_ON_ONCE(!inode_is_locked(base->d_inode));
2650*4882a593Smuzhiyun
2651*4882a593Smuzhiyun err = lookup_one_len_common(name, base, len, &this);
2652*4882a593Smuzhiyun if (err)
2653*4882a593Smuzhiyun return ERR_PTR(err);
2654*4882a593Smuzhiyun
2655*4882a593Smuzhiyun dentry = lookup_dcache(&this, base, 0);
2656*4882a593Smuzhiyun return dentry ? dentry : __lookup_slow(&this, base, 0);
2657*4882a593Smuzhiyun }
2658*4882a593Smuzhiyun EXPORT_SYMBOL(lookup_one_len);
2659*4882a593Smuzhiyun
2660*4882a593Smuzhiyun /**
2661*4882a593Smuzhiyun * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
2662*4882a593Smuzhiyun * @name: pathname component to lookup
2663*4882a593Smuzhiyun * @base: base directory to lookup from
2664*4882a593Smuzhiyun * @len: maximum length @len should be interpreted to
2665*4882a593Smuzhiyun *
2666*4882a593Smuzhiyun * Note that this routine is purely a helper for filesystem usage and should
2667*4882a593Smuzhiyun * not be called by generic code.
2668*4882a593Smuzhiyun *
2669*4882a593Smuzhiyun * Unlike lookup_one_len, it should be called without the parent
2670*4882a593Smuzhiyun * i_mutex held, and will take the i_mutex itself if necessary.
2671*4882a593Smuzhiyun */
lookup_one_len_unlocked(const char * name,struct dentry * base,int len)2672*4882a593Smuzhiyun struct dentry *lookup_one_len_unlocked(const char *name,
2673*4882a593Smuzhiyun struct dentry *base, int len)
2674*4882a593Smuzhiyun {
2675*4882a593Smuzhiyun struct qstr this;
2676*4882a593Smuzhiyun int err;
2677*4882a593Smuzhiyun struct dentry *ret;
2678*4882a593Smuzhiyun
2679*4882a593Smuzhiyun err = lookup_one_len_common(name, base, len, &this);
2680*4882a593Smuzhiyun if (err)
2681*4882a593Smuzhiyun return ERR_PTR(err);
2682*4882a593Smuzhiyun
2683*4882a593Smuzhiyun ret = lookup_dcache(&this, base, 0);
2684*4882a593Smuzhiyun if (!ret)
2685*4882a593Smuzhiyun ret = lookup_slow(&this, base, 0);
2686*4882a593Smuzhiyun return ret;
2687*4882a593Smuzhiyun }
2688*4882a593Smuzhiyun EXPORT_SYMBOL(lookup_one_len_unlocked);
2689*4882a593Smuzhiyun
2690*4882a593Smuzhiyun /*
2691*4882a593Smuzhiyun * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
2692*4882a593Smuzhiyun * on negatives. Returns known positive or ERR_PTR(); that's what
2693*4882a593Smuzhiyun * most of the users want. Note that pinned negative with unlocked parent
2694*4882a593Smuzhiyun * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
2695*4882a593Smuzhiyun * need to be very careful; pinned positives have ->d_inode stable, so
2696*4882a593Smuzhiyun * this one avoids such problems.
2697*4882a593Smuzhiyun */
lookup_positive_unlocked(const char * name,struct dentry * base,int len)2698*4882a593Smuzhiyun struct dentry *lookup_positive_unlocked(const char *name,
2699*4882a593Smuzhiyun struct dentry *base, int len)
2700*4882a593Smuzhiyun {
2701*4882a593Smuzhiyun struct dentry *ret = lookup_one_len_unlocked(name, base, len);
2702*4882a593Smuzhiyun if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
2703*4882a593Smuzhiyun dput(ret);
2704*4882a593Smuzhiyun ret = ERR_PTR(-ENOENT);
2705*4882a593Smuzhiyun }
2706*4882a593Smuzhiyun return ret;
2707*4882a593Smuzhiyun }
2708*4882a593Smuzhiyun EXPORT_SYMBOL(lookup_positive_unlocked);
2709*4882a593Smuzhiyun
2710*4882a593Smuzhiyun #ifdef CONFIG_UNIX98_PTYS
path_pts(struct path * path)2711*4882a593Smuzhiyun int path_pts(struct path *path)
2712*4882a593Smuzhiyun {
2713*4882a593Smuzhiyun /* Find something mounted on "pts" in the same directory as
2714*4882a593Smuzhiyun * the input path.
2715*4882a593Smuzhiyun */
2716*4882a593Smuzhiyun struct dentry *parent = dget_parent(path->dentry);
2717*4882a593Smuzhiyun struct dentry *child;
2718*4882a593Smuzhiyun struct qstr this = QSTR_INIT("pts", 3);
2719*4882a593Smuzhiyun
2720*4882a593Smuzhiyun if (unlikely(!path_connected(path->mnt, parent))) {
2721*4882a593Smuzhiyun dput(parent);
2722*4882a593Smuzhiyun return -ENOENT;
2723*4882a593Smuzhiyun }
2724*4882a593Smuzhiyun dput(path->dentry);
2725*4882a593Smuzhiyun path->dentry = parent;
2726*4882a593Smuzhiyun child = d_hash_and_lookup(parent, &this);
2727*4882a593Smuzhiyun if (!child)
2728*4882a593Smuzhiyun return -ENOENT;
2729*4882a593Smuzhiyun
2730*4882a593Smuzhiyun path->dentry = child;
2731*4882a593Smuzhiyun dput(parent);
2732*4882a593Smuzhiyun follow_down(path);
2733*4882a593Smuzhiyun return 0;
2734*4882a593Smuzhiyun }
2735*4882a593Smuzhiyun #endif
2736*4882a593Smuzhiyun
user_path_at_empty(int dfd,const char __user * name,unsigned flags,struct path * path,int * empty)2737*4882a593Smuzhiyun int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
2738*4882a593Smuzhiyun struct path *path, int *empty)
2739*4882a593Smuzhiyun {
2740*4882a593Smuzhiyun return filename_lookup(dfd, getname_flags(name, flags, empty),
2741*4882a593Smuzhiyun flags, path, NULL);
2742*4882a593Smuzhiyun }
2743*4882a593Smuzhiyun EXPORT_SYMBOL(user_path_at_empty);
2744*4882a593Smuzhiyun
__check_sticky(struct inode * dir,struct inode * inode)2745*4882a593Smuzhiyun int __check_sticky(struct inode *dir, struct inode *inode)
2746*4882a593Smuzhiyun {
2747*4882a593Smuzhiyun kuid_t fsuid = current_fsuid();
2748*4882a593Smuzhiyun
2749*4882a593Smuzhiyun if (uid_eq(inode->i_uid, fsuid))
2750*4882a593Smuzhiyun return 0;
2751*4882a593Smuzhiyun if (uid_eq(dir->i_uid, fsuid))
2752*4882a593Smuzhiyun return 0;
2753*4882a593Smuzhiyun return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
2754*4882a593Smuzhiyun }
2755*4882a593Smuzhiyun EXPORT_SYMBOL(__check_sticky);
2756*4882a593Smuzhiyun
2757*4882a593Smuzhiyun /*
2758*4882a593Smuzhiyun * Check whether we can remove a link victim from directory dir, check
2759*4882a593Smuzhiyun * whether the type of victim is right.
2760*4882a593Smuzhiyun * 1. We can't do it if dir is read-only (done in permission())
2761*4882a593Smuzhiyun * 2. We should have write and exec permissions on dir
2762*4882a593Smuzhiyun * 3. We can't remove anything from append-only dir
2763*4882a593Smuzhiyun * 4. We can't do anything with immutable dir (done in permission())
2764*4882a593Smuzhiyun * 5. If the sticky bit on dir is set we should either
2765*4882a593Smuzhiyun * a. be owner of dir, or
2766*4882a593Smuzhiyun * b. be owner of victim, or
2767*4882a593Smuzhiyun * c. have CAP_FOWNER capability
2768*4882a593Smuzhiyun * 6. If the victim is append-only or immutable we can't do antyhing with
2769*4882a593Smuzhiyun * links pointing to it.
2770*4882a593Smuzhiyun * 7. If the victim has an unknown uid or gid we can't change the inode.
2771*4882a593Smuzhiyun * 8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
2772*4882a593Smuzhiyun * 9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
2773*4882a593Smuzhiyun * 10. We can't remove a root or mountpoint.
2774*4882a593Smuzhiyun * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
2775*4882a593Smuzhiyun * nfs_async_unlink().
2776*4882a593Smuzhiyun */
may_delete(struct inode * dir,struct dentry * victim,bool isdir)2777*4882a593Smuzhiyun static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
2778*4882a593Smuzhiyun {
2779*4882a593Smuzhiyun struct inode *inode = d_backing_inode(victim);
2780*4882a593Smuzhiyun int error;
2781*4882a593Smuzhiyun
2782*4882a593Smuzhiyun if (d_is_negative(victim))
2783*4882a593Smuzhiyun return -ENOENT;
2784*4882a593Smuzhiyun BUG_ON(!inode);
2785*4882a593Smuzhiyun
2786*4882a593Smuzhiyun BUG_ON(victim->d_parent->d_inode != dir);
2787*4882a593Smuzhiyun
2788*4882a593Smuzhiyun /* Inode writeback is not safe when the uid or gid are invalid. */
2789*4882a593Smuzhiyun if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
2790*4882a593Smuzhiyun return -EOVERFLOW;
2791*4882a593Smuzhiyun
2792*4882a593Smuzhiyun audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
2793*4882a593Smuzhiyun
2794*4882a593Smuzhiyun error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
2795*4882a593Smuzhiyun if (error)
2796*4882a593Smuzhiyun return error;
2797*4882a593Smuzhiyun if (IS_APPEND(dir))
2798*4882a593Smuzhiyun return -EPERM;
2799*4882a593Smuzhiyun
2800*4882a593Smuzhiyun if (check_sticky(dir, inode) || IS_APPEND(inode) ||
2801*4882a593Smuzhiyun IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
2802*4882a593Smuzhiyun return -EPERM;
2803*4882a593Smuzhiyun if (isdir) {
2804*4882a593Smuzhiyun if (!d_is_dir(victim))
2805*4882a593Smuzhiyun return -ENOTDIR;
2806*4882a593Smuzhiyun if (IS_ROOT(victim))
2807*4882a593Smuzhiyun return -EBUSY;
2808*4882a593Smuzhiyun } else if (d_is_dir(victim))
2809*4882a593Smuzhiyun return -EISDIR;
2810*4882a593Smuzhiyun if (IS_DEADDIR(dir))
2811*4882a593Smuzhiyun return -ENOENT;
2812*4882a593Smuzhiyun if (victim->d_flags & DCACHE_NFSFS_RENAMED)
2813*4882a593Smuzhiyun return -EBUSY;
2814*4882a593Smuzhiyun return 0;
2815*4882a593Smuzhiyun }
2816*4882a593Smuzhiyun
2817*4882a593Smuzhiyun /* Check whether we can create an object with dentry child in directory
2818*4882a593Smuzhiyun * dir.
2819*4882a593Smuzhiyun * 1. We can't do it if child already exists (open has special treatment for
2820*4882a593Smuzhiyun * this case, but since we are inlined it's OK)
2821*4882a593Smuzhiyun * 2. We can't do it if dir is read-only (done in permission())
2822*4882a593Smuzhiyun * 3. We can't do it if the fs can't represent the fsuid or fsgid.
2823*4882a593Smuzhiyun * 4. We should have write and exec permissions on dir
2824*4882a593Smuzhiyun * 5. We can't do it if dir is immutable (done in permission())
2825*4882a593Smuzhiyun */
may_create(struct inode * dir,struct dentry * child)2826*4882a593Smuzhiyun static inline int may_create(struct inode *dir, struct dentry *child)
2827*4882a593Smuzhiyun {
2828*4882a593Smuzhiyun struct user_namespace *s_user_ns;
2829*4882a593Smuzhiyun audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
2830*4882a593Smuzhiyun if (child->d_inode)
2831*4882a593Smuzhiyun return -EEXIST;
2832*4882a593Smuzhiyun if (IS_DEADDIR(dir))
2833*4882a593Smuzhiyun return -ENOENT;
2834*4882a593Smuzhiyun s_user_ns = dir->i_sb->s_user_ns;
2835*4882a593Smuzhiyun if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
2836*4882a593Smuzhiyun !kgid_has_mapping(s_user_ns, current_fsgid()))
2837*4882a593Smuzhiyun return -EOVERFLOW;
2838*4882a593Smuzhiyun return inode_permission(dir, MAY_WRITE | MAY_EXEC);
2839*4882a593Smuzhiyun }
2840*4882a593Smuzhiyun
2841*4882a593Smuzhiyun /*
2842*4882a593Smuzhiyun * p1 and p2 should be directories on the same fs.
2843*4882a593Smuzhiyun */
lock_rename(struct dentry * p1,struct dentry * p2)2844*4882a593Smuzhiyun struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
2845*4882a593Smuzhiyun {
2846*4882a593Smuzhiyun struct dentry *p;
2847*4882a593Smuzhiyun
2848*4882a593Smuzhiyun if (p1 == p2) {
2849*4882a593Smuzhiyun inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
2850*4882a593Smuzhiyun return NULL;
2851*4882a593Smuzhiyun }
2852*4882a593Smuzhiyun
2853*4882a593Smuzhiyun mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
2854*4882a593Smuzhiyun
2855*4882a593Smuzhiyun p = d_ancestor(p2, p1);
2856*4882a593Smuzhiyun if (p) {
2857*4882a593Smuzhiyun inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
2858*4882a593Smuzhiyun inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
2859*4882a593Smuzhiyun return p;
2860*4882a593Smuzhiyun }
2861*4882a593Smuzhiyun
2862*4882a593Smuzhiyun p = d_ancestor(p1, p2);
2863*4882a593Smuzhiyun if (p) {
2864*4882a593Smuzhiyun inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
2865*4882a593Smuzhiyun inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
2866*4882a593Smuzhiyun return p;
2867*4882a593Smuzhiyun }
2868*4882a593Smuzhiyun
2869*4882a593Smuzhiyun inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
2870*4882a593Smuzhiyun inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
2871*4882a593Smuzhiyun return NULL;
2872*4882a593Smuzhiyun }
2873*4882a593Smuzhiyun EXPORT_SYMBOL(lock_rename);
2874*4882a593Smuzhiyun
unlock_rename(struct dentry * p1,struct dentry * p2)2875*4882a593Smuzhiyun void unlock_rename(struct dentry *p1, struct dentry *p2)
2876*4882a593Smuzhiyun {
2877*4882a593Smuzhiyun inode_unlock(p1->d_inode);
2878*4882a593Smuzhiyun if (p1 != p2) {
2879*4882a593Smuzhiyun inode_unlock(p2->d_inode);
2880*4882a593Smuzhiyun mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
2881*4882a593Smuzhiyun }
2882*4882a593Smuzhiyun }
2883*4882a593Smuzhiyun EXPORT_SYMBOL(unlock_rename);
2884*4882a593Smuzhiyun
vfs_create(struct inode * dir,struct dentry * dentry,umode_t mode,bool want_excl)2885*4882a593Smuzhiyun int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2886*4882a593Smuzhiyun bool want_excl)
2887*4882a593Smuzhiyun {
2888*4882a593Smuzhiyun int error = may_create(dir, dentry);
2889*4882a593Smuzhiyun if (error)
2890*4882a593Smuzhiyun return error;
2891*4882a593Smuzhiyun
2892*4882a593Smuzhiyun if (!dir->i_op->create)
2893*4882a593Smuzhiyun return -EACCES; /* shouldn't it be ENOSYS? */
2894*4882a593Smuzhiyun mode &= S_IALLUGO;
2895*4882a593Smuzhiyun mode |= S_IFREG;
2896*4882a593Smuzhiyun error = security_inode_create(dir, dentry, mode);
2897*4882a593Smuzhiyun if (error)
2898*4882a593Smuzhiyun return error;
2899*4882a593Smuzhiyun error = dir->i_op->create(dir, dentry, mode, want_excl);
2900*4882a593Smuzhiyun if (!error)
2901*4882a593Smuzhiyun fsnotify_create(dir, dentry);
2902*4882a593Smuzhiyun return error;
2903*4882a593Smuzhiyun }
2904*4882a593Smuzhiyun EXPORT_SYMBOL_NS(vfs_create, ANDROID_GKI_VFS_EXPORT_ONLY);
2905*4882a593Smuzhiyun
vfs_mkobj(struct dentry * dentry,umode_t mode,int (* f)(struct dentry *,umode_t,void *),void * arg)2906*4882a593Smuzhiyun int vfs_mkobj(struct dentry *dentry, umode_t mode,
2907*4882a593Smuzhiyun int (*f)(struct dentry *, umode_t, void *),
2908*4882a593Smuzhiyun void *arg)
2909*4882a593Smuzhiyun {
2910*4882a593Smuzhiyun struct inode *dir = dentry->d_parent->d_inode;
2911*4882a593Smuzhiyun int error = may_create(dir, dentry);
2912*4882a593Smuzhiyun if (error)
2913*4882a593Smuzhiyun return error;
2914*4882a593Smuzhiyun
2915*4882a593Smuzhiyun mode &= S_IALLUGO;
2916*4882a593Smuzhiyun mode |= S_IFREG;
2917*4882a593Smuzhiyun error = security_inode_create(dir, dentry, mode);
2918*4882a593Smuzhiyun if (error)
2919*4882a593Smuzhiyun return error;
2920*4882a593Smuzhiyun error = f(dentry, mode, arg);
2921*4882a593Smuzhiyun if (!error)
2922*4882a593Smuzhiyun fsnotify_create(dir, dentry);
2923*4882a593Smuzhiyun return error;
2924*4882a593Smuzhiyun }
2925*4882a593Smuzhiyun EXPORT_SYMBOL(vfs_mkobj);
2926*4882a593Smuzhiyun
may_open_dev(const struct path * path)2927*4882a593Smuzhiyun bool may_open_dev(const struct path *path)
2928*4882a593Smuzhiyun {
2929*4882a593Smuzhiyun return !(path->mnt->mnt_flags & MNT_NODEV) &&
2930*4882a593Smuzhiyun !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
2931*4882a593Smuzhiyun }
2932*4882a593Smuzhiyun
may_open(const struct path * path,int acc_mode,int flag)2933*4882a593Smuzhiyun static int may_open(const struct path *path, int acc_mode, int flag)
2934*4882a593Smuzhiyun {
2935*4882a593Smuzhiyun struct dentry *dentry = path->dentry;
2936*4882a593Smuzhiyun struct inode *inode = dentry->d_inode;
2937*4882a593Smuzhiyun int error;
2938*4882a593Smuzhiyun
2939*4882a593Smuzhiyun if (!inode)
2940*4882a593Smuzhiyun return -ENOENT;
2941*4882a593Smuzhiyun
2942*4882a593Smuzhiyun switch (inode->i_mode & S_IFMT) {
2943*4882a593Smuzhiyun case S_IFLNK:
2944*4882a593Smuzhiyun return -ELOOP;
2945*4882a593Smuzhiyun case S_IFDIR:
2946*4882a593Smuzhiyun if (acc_mode & MAY_WRITE)
2947*4882a593Smuzhiyun return -EISDIR;
2948*4882a593Smuzhiyun if (acc_mode & MAY_EXEC)
2949*4882a593Smuzhiyun return -EACCES;
2950*4882a593Smuzhiyun break;
2951*4882a593Smuzhiyun case S_IFBLK:
2952*4882a593Smuzhiyun case S_IFCHR:
2953*4882a593Smuzhiyun if (!may_open_dev(path))
2954*4882a593Smuzhiyun return -EACCES;
2955*4882a593Smuzhiyun fallthrough;
2956*4882a593Smuzhiyun case S_IFIFO:
2957*4882a593Smuzhiyun case S_IFSOCK:
2958*4882a593Smuzhiyun if (acc_mode & MAY_EXEC)
2959*4882a593Smuzhiyun return -EACCES;
2960*4882a593Smuzhiyun flag &= ~O_TRUNC;
2961*4882a593Smuzhiyun break;
2962*4882a593Smuzhiyun case S_IFREG:
2963*4882a593Smuzhiyun if ((acc_mode & MAY_EXEC) && path_noexec(path))
2964*4882a593Smuzhiyun return -EACCES;
2965*4882a593Smuzhiyun break;
2966*4882a593Smuzhiyun }
2967*4882a593Smuzhiyun
2968*4882a593Smuzhiyun error = inode_permission(inode, MAY_OPEN | acc_mode);
2969*4882a593Smuzhiyun if (error)
2970*4882a593Smuzhiyun return error;
2971*4882a593Smuzhiyun
2972*4882a593Smuzhiyun /*
2973*4882a593Smuzhiyun * An append-only file must be opened in append mode for writing.
2974*4882a593Smuzhiyun */
2975*4882a593Smuzhiyun if (IS_APPEND(inode)) {
2976*4882a593Smuzhiyun if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
2977*4882a593Smuzhiyun return -EPERM;
2978*4882a593Smuzhiyun if (flag & O_TRUNC)
2979*4882a593Smuzhiyun return -EPERM;
2980*4882a593Smuzhiyun }
2981*4882a593Smuzhiyun
2982*4882a593Smuzhiyun /* O_NOATIME can only be set by the owner or superuser */
2983*4882a593Smuzhiyun if (flag & O_NOATIME && !inode_owner_or_capable(inode))
2984*4882a593Smuzhiyun return -EPERM;
2985*4882a593Smuzhiyun
2986*4882a593Smuzhiyun return 0;
2987*4882a593Smuzhiyun }
2988*4882a593Smuzhiyun
handle_truncate(struct file * filp)2989*4882a593Smuzhiyun static int handle_truncate(struct file *filp)
2990*4882a593Smuzhiyun {
2991*4882a593Smuzhiyun const struct path *path = &filp->f_path;
2992*4882a593Smuzhiyun struct inode *inode = path->dentry->d_inode;
2993*4882a593Smuzhiyun int error = get_write_access(inode);
2994*4882a593Smuzhiyun if (error)
2995*4882a593Smuzhiyun return error;
2996*4882a593Smuzhiyun /*
2997*4882a593Smuzhiyun * Refuse to truncate files with mandatory locks held on them.
2998*4882a593Smuzhiyun */
2999*4882a593Smuzhiyun error = locks_verify_locked(filp);
3000*4882a593Smuzhiyun if (!error)
3001*4882a593Smuzhiyun error = security_path_truncate(path);
3002*4882a593Smuzhiyun if (!error) {
3003*4882a593Smuzhiyun error = do_truncate(path->dentry, 0,
3004*4882a593Smuzhiyun ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
3005*4882a593Smuzhiyun filp);
3006*4882a593Smuzhiyun }
3007*4882a593Smuzhiyun put_write_access(inode);
3008*4882a593Smuzhiyun return error;
3009*4882a593Smuzhiyun }
3010*4882a593Smuzhiyun
open_to_namei_flags(int flag)3011*4882a593Smuzhiyun static inline int open_to_namei_flags(int flag)
3012*4882a593Smuzhiyun {
3013*4882a593Smuzhiyun if ((flag & O_ACCMODE) == 3)
3014*4882a593Smuzhiyun flag--;
3015*4882a593Smuzhiyun return flag;
3016*4882a593Smuzhiyun }
3017*4882a593Smuzhiyun
may_o_create(const struct path * dir,struct dentry * dentry,umode_t mode)3018*4882a593Smuzhiyun static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t mode)
3019*4882a593Smuzhiyun {
3020*4882a593Smuzhiyun struct user_namespace *s_user_ns;
3021*4882a593Smuzhiyun int error = security_path_mknod(dir, dentry, mode, 0);
3022*4882a593Smuzhiyun if (error)
3023*4882a593Smuzhiyun return error;
3024*4882a593Smuzhiyun
3025*4882a593Smuzhiyun s_user_ns = dir->dentry->d_sb->s_user_ns;
3026*4882a593Smuzhiyun if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
3027*4882a593Smuzhiyun !kgid_has_mapping(s_user_ns, current_fsgid()))
3028*4882a593Smuzhiyun return -EOVERFLOW;
3029*4882a593Smuzhiyun
3030*4882a593Smuzhiyun error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
3031*4882a593Smuzhiyun if (error)
3032*4882a593Smuzhiyun return error;
3033*4882a593Smuzhiyun
3034*4882a593Smuzhiyun return security_inode_create(dir->dentry->d_inode, dentry, mode);
3035*4882a593Smuzhiyun }
3036*4882a593Smuzhiyun
3037*4882a593Smuzhiyun /*
3038*4882a593Smuzhiyun * Attempt to atomically look up, create and open a file from a negative
3039*4882a593Smuzhiyun * dentry.
3040*4882a593Smuzhiyun *
3041*4882a593Smuzhiyun * Returns 0 if successful. The file will have been created and attached to
3042*4882a593Smuzhiyun * @file by the filesystem calling finish_open().
3043*4882a593Smuzhiyun *
3044*4882a593Smuzhiyun * If the file was looked up only or didn't need creating, FMODE_OPENED won't
3045*4882a593Smuzhiyun * be set. The caller will need to perform the open themselves. @path will
3046*4882a593Smuzhiyun * have been updated to point to the new dentry. This may be negative.
3047*4882a593Smuzhiyun *
3048*4882a593Smuzhiyun * Returns an error code otherwise.
3049*4882a593Smuzhiyun */
atomic_open(struct nameidata * nd,struct dentry * dentry,struct file * file,int open_flag,umode_t mode)3050*4882a593Smuzhiyun static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
3051*4882a593Smuzhiyun struct file *file,
3052*4882a593Smuzhiyun int open_flag, umode_t mode)
3053*4882a593Smuzhiyun {
3054*4882a593Smuzhiyun struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
3055*4882a593Smuzhiyun struct inode *dir = nd->path.dentry->d_inode;
3056*4882a593Smuzhiyun int error;
3057*4882a593Smuzhiyun
3058*4882a593Smuzhiyun if (nd->flags & LOOKUP_DIRECTORY)
3059*4882a593Smuzhiyun open_flag |= O_DIRECTORY;
3060*4882a593Smuzhiyun
3061*4882a593Smuzhiyun file->f_path.dentry = DENTRY_NOT_SET;
3062*4882a593Smuzhiyun file->f_path.mnt = nd->path.mnt;
3063*4882a593Smuzhiyun error = dir->i_op->atomic_open(dir, dentry, file,
3064*4882a593Smuzhiyun open_to_namei_flags(open_flag), mode);
3065*4882a593Smuzhiyun d_lookup_done(dentry);
3066*4882a593Smuzhiyun if (!error) {
3067*4882a593Smuzhiyun if (file->f_mode & FMODE_OPENED) {
3068*4882a593Smuzhiyun if (unlikely(dentry != file->f_path.dentry)) {
3069*4882a593Smuzhiyun dput(dentry);
3070*4882a593Smuzhiyun dentry = dget(file->f_path.dentry);
3071*4882a593Smuzhiyun }
3072*4882a593Smuzhiyun } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
3073*4882a593Smuzhiyun error = -EIO;
3074*4882a593Smuzhiyun } else {
3075*4882a593Smuzhiyun if (file->f_path.dentry) {
3076*4882a593Smuzhiyun dput(dentry);
3077*4882a593Smuzhiyun dentry = file->f_path.dentry;
3078*4882a593Smuzhiyun }
3079*4882a593Smuzhiyun if (unlikely(d_is_negative(dentry)))
3080*4882a593Smuzhiyun error = -ENOENT;
3081*4882a593Smuzhiyun }
3082*4882a593Smuzhiyun }
3083*4882a593Smuzhiyun if (error) {
3084*4882a593Smuzhiyun dput(dentry);
3085*4882a593Smuzhiyun dentry = ERR_PTR(error);
3086*4882a593Smuzhiyun }
3087*4882a593Smuzhiyun return dentry;
3088*4882a593Smuzhiyun }
3089*4882a593Smuzhiyun
3090*4882a593Smuzhiyun /*
3091*4882a593Smuzhiyun * Look up and maybe create and open the last component.
3092*4882a593Smuzhiyun *
3093*4882a593Smuzhiyun * Must be called with parent locked (exclusive in O_CREAT case).
3094*4882a593Smuzhiyun *
3095*4882a593Smuzhiyun * Returns 0 on success, that is, if
3096*4882a593Smuzhiyun * the file was successfully atomically created (if necessary) and opened, or
3097*4882a593Smuzhiyun * the file was not completely opened at this time, though lookups and
3098*4882a593Smuzhiyun * creations were performed.
3099*4882a593Smuzhiyun * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
3100*4882a593Smuzhiyun * In the latter case dentry returned in @path might be negative if O_CREAT
3101*4882a593Smuzhiyun * hadn't been specified.
3102*4882a593Smuzhiyun *
3103*4882a593Smuzhiyun * An error code is returned on failure.
3104*4882a593Smuzhiyun */
lookup_open(struct nameidata * nd,struct file * file,const struct open_flags * op,bool got_write)3105*4882a593Smuzhiyun static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
3106*4882a593Smuzhiyun const struct open_flags *op,
3107*4882a593Smuzhiyun bool got_write)
3108*4882a593Smuzhiyun {
3109*4882a593Smuzhiyun struct dentry *dir = nd->path.dentry;
3110*4882a593Smuzhiyun struct inode *dir_inode = dir->d_inode;
3111*4882a593Smuzhiyun int open_flag = op->open_flag;
3112*4882a593Smuzhiyun struct dentry *dentry;
3113*4882a593Smuzhiyun int error, create_error = 0;
3114*4882a593Smuzhiyun umode_t mode = op->mode;
3115*4882a593Smuzhiyun DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
3116*4882a593Smuzhiyun
3117*4882a593Smuzhiyun if (unlikely(IS_DEADDIR(dir_inode)))
3118*4882a593Smuzhiyun return ERR_PTR(-ENOENT);
3119*4882a593Smuzhiyun
3120*4882a593Smuzhiyun file->f_mode &= ~FMODE_CREATED;
3121*4882a593Smuzhiyun dentry = d_lookup(dir, &nd->last);
3122*4882a593Smuzhiyun for (;;) {
3123*4882a593Smuzhiyun if (!dentry) {
3124*4882a593Smuzhiyun dentry = d_alloc_parallel(dir, &nd->last, &wq);
3125*4882a593Smuzhiyun if (IS_ERR(dentry))
3126*4882a593Smuzhiyun return dentry;
3127*4882a593Smuzhiyun }
3128*4882a593Smuzhiyun if (d_in_lookup(dentry))
3129*4882a593Smuzhiyun break;
3130*4882a593Smuzhiyun
3131*4882a593Smuzhiyun error = d_revalidate(dentry, nd->flags);
3132*4882a593Smuzhiyun if (likely(error > 0))
3133*4882a593Smuzhiyun break;
3134*4882a593Smuzhiyun if (error)
3135*4882a593Smuzhiyun goto out_dput;
3136*4882a593Smuzhiyun d_invalidate(dentry);
3137*4882a593Smuzhiyun dput(dentry);
3138*4882a593Smuzhiyun dentry = NULL;
3139*4882a593Smuzhiyun }
3140*4882a593Smuzhiyun if (dentry->d_inode) {
3141*4882a593Smuzhiyun /* Cached positive dentry: will open in f_op->open */
3142*4882a593Smuzhiyun return dentry;
3143*4882a593Smuzhiyun }
3144*4882a593Smuzhiyun
3145*4882a593Smuzhiyun /*
3146*4882a593Smuzhiyun * Checking write permission is tricky, bacuse we don't know if we are
3147*4882a593Smuzhiyun * going to actually need it: O_CREAT opens should work as long as the
3148*4882a593Smuzhiyun * file exists. But checking existence breaks atomicity. The trick is
3149*4882a593Smuzhiyun * to check access and if not granted clear O_CREAT from the flags.
3150*4882a593Smuzhiyun *
3151*4882a593Smuzhiyun * Another problem is returing the "right" error value (e.g. for an
3152*4882a593Smuzhiyun * O_EXCL open we want to return EEXIST not EROFS).
3153*4882a593Smuzhiyun */
3154*4882a593Smuzhiyun if (unlikely(!got_write))
3155*4882a593Smuzhiyun open_flag &= ~O_TRUNC;
3156*4882a593Smuzhiyun if (open_flag & O_CREAT) {
3157*4882a593Smuzhiyun if (open_flag & O_EXCL)
3158*4882a593Smuzhiyun open_flag &= ~O_TRUNC;
3159*4882a593Smuzhiyun if (!IS_POSIXACL(dir->d_inode))
3160*4882a593Smuzhiyun mode &= ~current_umask();
3161*4882a593Smuzhiyun if (likely(got_write))
3162*4882a593Smuzhiyun create_error = may_o_create(&nd->path, dentry, mode);
3163*4882a593Smuzhiyun else
3164*4882a593Smuzhiyun create_error = -EROFS;
3165*4882a593Smuzhiyun }
3166*4882a593Smuzhiyun if (create_error)
3167*4882a593Smuzhiyun open_flag &= ~O_CREAT;
3168*4882a593Smuzhiyun if (dir_inode->i_op->atomic_open) {
3169*4882a593Smuzhiyun dentry = atomic_open(nd, dentry, file, open_flag, mode);
3170*4882a593Smuzhiyun if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
3171*4882a593Smuzhiyun dentry = ERR_PTR(create_error);
3172*4882a593Smuzhiyun return dentry;
3173*4882a593Smuzhiyun }
3174*4882a593Smuzhiyun
3175*4882a593Smuzhiyun if (d_in_lookup(dentry)) {
3176*4882a593Smuzhiyun struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
3177*4882a593Smuzhiyun nd->flags);
3178*4882a593Smuzhiyun d_lookup_done(dentry);
3179*4882a593Smuzhiyun if (unlikely(res)) {
3180*4882a593Smuzhiyun if (IS_ERR(res)) {
3181*4882a593Smuzhiyun error = PTR_ERR(res);
3182*4882a593Smuzhiyun goto out_dput;
3183*4882a593Smuzhiyun }
3184*4882a593Smuzhiyun dput(dentry);
3185*4882a593Smuzhiyun dentry = res;
3186*4882a593Smuzhiyun }
3187*4882a593Smuzhiyun }
3188*4882a593Smuzhiyun
3189*4882a593Smuzhiyun /* Negative dentry, just create the file */
3190*4882a593Smuzhiyun if (!dentry->d_inode && (open_flag & O_CREAT)) {
3191*4882a593Smuzhiyun file->f_mode |= FMODE_CREATED;
3192*4882a593Smuzhiyun audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
3193*4882a593Smuzhiyun if (!dir_inode->i_op->create) {
3194*4882a593Smuzhiyun error = -EACCES;
3195*4882a593Smuzhiyun goto out_dput;
3196*4882a593Smuzhiyun }
3197*4882a593Smuzhiyun error = dir_inode->i_op->create(dir_inode, dentry, mode,
3198*4882a593Smuzhiyun open_flag & O_EXCL);
3199*4882a593Smuzhiyun if (error)
3200*4882a593Smuzhiyun goto out_dput;
3201*4882a593Smuzhiyun }
3202*4882a593Smuzhiyun if (unlikely(create_error) && !dentry->d_inode) {
3203*4882a593Smuzhiyun error = create_error;
3204*4882a593Smuzhiyun goto out_dput;
3205*4882a593Smuzhiyun }
3206*4882a593Smuzhiyun return dentry;
3207*4882a593Smuzhiyun
3208*4882a593Smuzhiyun out_dput:
3209*4882a593Smuzhiyun dput(dentry);
3210*4882a593Smuzhiyun return ERR_PTR(error);
3211*4882a593Smuzhiyun }
3212*4882a593Smuzhiyun
open_last_lookups(struct nameidata * nd,struct file * file,const struct open_flags * op)3213*4882a593Smuzhiyun static const char *open_last_lookups(struct nameidata *nd,
3214*4882a593Smuzhiyun struct file *file, const struct open_flags *op)
3215*4882a593Smuzhiyun {
3216*4882a593Smuzhiyun struct dentry *dir = nd->path.dentry;
3217*4882a593Smuzhiyun int open_flag = op->open_flag;
3218*4882a593Smuzhiyun bool got_write = false;
3219*4882a593Smuzhiyun unsigned seq;
3220*4882a593Smuzhiyun struct inode *inode;
3221*4882a593Smuzhiyun struct dentry *dentry;
3222*4882a593Smuzhiyun const char *res;
3223*4882a593Smuzhiyun
3224*4882a593Smuzhiyun nd->flags |= op->intent;
3225*4882a593Smuzhiyun
3226*4882a593Smuzhiyun if (nd->last_type != LAST_NORM) {
3227*4882a593Smuzhiyun if (nd->depth)
3228*4882a593Smuzhiyun put_link(nd);
3229*4882a593Smuzhiyun return handle_dots(nd, nd->last_type);
3230*4882a593Smuzhiyun }
3231*4882a593Smuzhiyun
3232*4882a593Smuzhiyun if (!(open_flag & O_CREAT)) {
3233*4882a593Smuzhiyun if (nd->last.name[nd->last.len])
3234*4882a593Smuzhiyun nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
3235*4882a593Smuzhiyun /* we _can_ be in RCU mode here */
3236*4882a593Smuzhiyun dentry = lookup_fast(nd, &inode, &seq);
3237*4882a593Smuzhiyun if (IS_ERR(dentry))
3238*4882a593Smuzhiyun return ERR_CAST(dentry);
3239*4882a593Smuzhiyun if (likely(dentry))
3240*4882a593Smuzhiyun goto finish_lookup;
3241*4882a593Smuzhiyun
3242*4882a593Smuzhiyun BUG_ON(nd->flags & LOOKUP_RCU);
3243*4882a593Smuzhiyun } else {
3244*4882a593Smuzhiyun /* create side of things */
3245*4882a593Smuzhiyun if (nd->flags & LOOKUP_RCU) {
3246*4882a593Smuzhiyun if (!try_to_unlazy(nd))
3247*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
3248*4882a593Smuzhiyun }
3249*4882a593Smuzhiyun audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
3250*4882a593Smuzhiyun /* trailing slashes? */
3251*4882a593Smuzhiyun if (unlikely(nd->last.name[nd->last.len]))
3252*4882a593Smuzhiyun return ERR_PTR(-EISDIR);
3253*4882a593Smuzhiyun }
3254*4882a593Smuzhiyun
3255*4882a593Smuzhiyun if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
3256*4882a593Smuzhiyun got_write = !mnt_want_write(nd->path.mnt);
3257*4882a593Smuzhiyun /*
3258*4882a593Smuzhiyun * do _not_ fail yet - we might not need that or fail with
3259*4882a593Smuzhiyun * a different error; let lookup_open() decide; we'll be
3260*4882a593Smuzhiyun * dropping this one anyway.
3261*4882a593Smuzhiyun */
3262*4882a593Smuzhiyun }
3263*4882a593Smuzhiyun if (open_flag & O_CREAT)
3264*4882a593Smuzhiyun inode_lock(dir->d_inode);
3265*4882a593Smuzhiyun else
3266*4882a593Smuzhiyun inode_lock_shared(dir->d_inode);
3267*4882a593Smuzhiyun dentry = lookup_open(nd, file, op, got_write);
3268*4882a593Smuzhiyun if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
3269*4882a593Smuzhiyun fsnotify_create(dir->d_inode, dentry);
3270*4882a593Smuzhiyun if (open_flag & O_CREAT)
3271*4882a593Smuzhiyun inode_unlock(dir->d_inode);
3272*4882a593Smuzhiyun else
3273*4882a593Smuzhiyun inode_unlock_shared(dir->d_inode);
3274*4882a593Smuzhiyun
3275*4882a593Smuzhiyun if (got_write)
3276*4882a593Smuzhiyun mnt_drop_write(nd->path.mnt);
3277*4882a593Smuzhiyun
3278*4882a593Smuzhiyun if (IS_ERR(dentry))
3279*4882a593Smuzhiyun return ERR_CAST(dentry);
3280*4882a593Smuzhiyun
3281*4882a593Smuzhiyun if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
3282*4882a593Smuzhiyun dput(nd->path.dentry);
3283*4882a593Smuzhiyun nd->path.dentry = dentry;
3284*4882a593Smuzhiyun return NULL;
3285*4882a593Smuzhiyun }
3286*4882a593Smuzhiyun
3287*4882a593Smuzhiyun finish_lookup:
3288*4882a593Smuzhiyun if (nd->depth)
3289*4882a593Smuzhiyun put_link(nd);
3290*4882a593Smuzhiyun res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
3291*4882a593Smuzhiyun if (unlikely(res))
3292*4882a593Smuzhiyun nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
3293*4882a593Smuzhiyun return res;
3294*4882a593Smuzhiyun }
3295*4882a593Smuzhiyun
3296*4882a593Smuzhiyun /*
3297*4882a593Smuzhiyun * Handle the last step of open()
3298*4882a593Smuzhiyun */
do_open(struct nameidata * nd,struct file * file,const struct open_flags * op)3299*4882a593Smuzhiyun static int do_open(struct nameidata *nd,
3300*4882a593Smuzhiyun struct file *file, const struct open_flags *op)
3301*4882a593Smuzhiyun {
3302*4882a593Smuzhiyun int open_flag = op->open_flag;
3303*4882a593Smuzhiyun bool do_truncate;
3304*4882a593Smuzhiyun int acc_mode;
3305*4882a593Smuzhiyun int error;
3306*4882a593Smuzhiyun
3307*4882a593Smuzhiyun if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
3308*4882a593Smuzhiyun error = complete_walk(nd);
3309*4882a593Smuzhiyun if (error)
3310*4882a593Smuzhiyun return error;
3311*4882a593Smuzhiyun }
3312*4882a593Smuzhiyun if (!(file->f_mode & FMODE_CREATED))
3313*4882a593Smuzhiyun audit_inode(nd->name, nd->path.dentry, 0);
3314*4882a593Smuzhiyun if (open_flag & O_CREAT) {
3315*4882a593Smuzhiyun if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
3316*4882a593Smuzhiyun return -EEXIST;
3317*4882a593Smuzhiyun if (d_is_dir(nd->path.dentry))
3318*4882a593Smuzhiyun return -EISDIR;
3319*4882a593Smuzhiyun error = may_create_in_sticky(nd->dir_mode, nd->dir_uid,
3320*4882a593Smuzhiyun d_backing_inode(nd->path.dentry));
3321*4882a593Smuzhiyun if (unlikely(error))
3322*4882a593Smuzhiyun return error;
3323*4882a593Smuzhiyun }
3324*4882a593Smuzhiyun if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
3325*4882a593Smuzhiyun return -ENOTDIR;
3326*4882a593Smuzhiyun
3327*4882a593Smuzhiyun do_truncate = false;
3328*4882a593Smuzhiyun acc_mode = op->acc_mode;
3329*4882a593Smuzhiyun if (file->f_mode & FMODE_CREATED) {
3330*4882a593Smuzhiyun /* Don't check for write permission, don't truncate */
3331*4882a593Smuzhiyun open_flag &= ~O_TRUNC;
3332*4882a593Smuzhiyun acc_mode = 0;
3333*4882a593Smuzhiyun } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
3334*4882a593Smuzhiyun error = mnt_want_write(nd->path.mnt);
3335*4882a593Smuzhiyun if (error)
3336*4882a593Smuzhiyun return error;
3337*4882a593Smuzhiyun do_truncate = true;
3338*4882a593Smuzhiyun }
3339*4882a593Smuzhiyun error = may_open(&nd->path, acc_mode, open_flag);
3340*4882a593Smuzhiyun if (!error && !(file->f_mode & FMODE_OPENED))
3341*4882a593Smuzhiyun error = vfs_open(&nd->path, file);
3342*4882a593Smuzhiyun if (!error)
3343*4882a593Smuzhiyun error = ima_file_check(file, op->acc_mode);
3344*4882a593Smuzhiyun if (!error && do_truncate)
3345*4882a593Smuzhiyun error = handle_truncate(file);
3346*4882a593Smuzhiyun if (unlikely(error > 0)) {
3347*4882a593Smuzhiyun WARN_ON(1);
3348*4882a593Smuzhiyun error = -EINVAL;
3349*4882a593Smuzhiyun }
3350*4882a593Smuzhiyun if (do_truncate)
3351*4882a593Smuzhiyun mnt_drop_write(nd->path.mnt);
3352*4882a593Smuzhiyun return error;
3353*4882a593Smuzhiyun }
3354*4882a593Smuzhiyun
vfs_tmpfile(struct dentry * dentry,umode_t mode,int open_flag)3355*4882a593Smuzhiyun struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
3356*4882a593Smuzhiyun {
3357*4882a593Smuzhiyun struct dentry *child = NULL;
3358*4882a593Smuzhiyun struct inode *dir = dentry->d_inode;
3359*4882a593Smuzhiyun struct inode *inode;
3360*4882a593Smuzhiyun int error;
3361*4882a593Smuzhiyun
3362*4882a593Smuzhiyun /* we want directory to be writable */
3363*4882a593Smuzhiyun error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
3364*4882a593Smuzhiyun if (error)
3365*4882a593Smuzhiyun goto out_err;
3366*4882a593Smuzhiyun error = -EOPNOTSUPP;
3367*4882a593Smuzhiyun if (!dir->i_op->tmpfile)
3368*4882a593Smuzhiyun goto out_err;
3369*4882a593Smuzhiyun error = -ENOMEM;
3370*4882a593Smuzhiyun child = d_alloc(dentry, &slash_name);
3371*4882a593Smuzhiyun if (unlikely(!child))
3372*4882a593Smuzhiyun goto out_err;
3373*4882a593Smuzhiyun if (!IS_POSIXACL(dir))
3374*4882a593Smuzhiyun mode &= ~current_umask();
3375*4882a593Smuzhiyun error = dir->i_op->tmpfile(dir, child, mode);
3376*4882a593Smuzhiyun if (error)
3377*4882a593Smuzhiyun goto out_err;
3378*4882a593Smuzhiyun error = -ENOENT;
3379*4882a593Smuzhiyun inode = child->d_inode;
3380*4882a593Smuzhiyun if (unlikely(!inode))
3381*4882a593Smuzhiyun goto out_err;
3382*4882a593Smuzhiyun if (!(open_flag & O_EXCL)) {
3383*4882a593Smuzhiyun spin_lock(&inode->i_lock);
3384*4882a593Smuzhiyun inode->i_state |= I_LINKABLE;
3385*4882a593Smuzhiyun spin_unlock(&inode->i_lock);
3386*4882a593Smuzhiyun }
3387*4882a593Smuzhiyun ima_post_create_tmpfile(inode);
3388*4882a593Smuzhiyun return child;
3389*4882a593Smuzhiyun
3390*4882a593Smuzhiyun out_err:
3391*4882a593Smuzhiyun dput(child);
3392*4882a593Smuzhiyun return ERR_PTR(error);
3393*4882a593Smuzhiyun }
3394*4882a593Smuzhiyun EXPORT_SYMBOL(vfs_tmpfile);
3395*4882a593Smuzhiyun
do_tmpfile(struct nameidata * nd,unsigned flags,const struct open_flags * op,struct file * file)3396*4882a593Smuzhiyun static int do_tmpfile(struct nameidata *nd, unsigned flags,
3397*4882a593Smuzhiyun const struct open_flags *op,
3398*4882a593Smuzhiyun struct file *file)
3399*4882a593Smuzhiyun {
3400*4882a593Smuzhiyun struct dentry *child;
3401*4882a593Smuzhiyun struct path path;
3402*4882a593Smuzhiyun int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
3403*4882a593Smuzhiyun if (unlikely(error))
3404*4882a593Smuzhiyun return error;
3405*4882a593Smuzhiyun error = mnt_want_write(path.mnt);
3406*4882a593Smuzhiyun if (unlikely(error))
3407*4882a593Smuzhiyun goto out;
3408*4882a593Smuzhiyun child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
3409*4882a593Smuzhiyun error = PTR_ERR(child);
3410*4882a593Smuzhiyun if (IS_ERR(child))
3411*4882a593Smuzhiyun goto out2;
3412*4882a593Smuzhiyun dput(path.dentry);
3413*4882a593Smuzhiyun path.dentry = child;
3414*4882a593Smuzhiyun audit_inode(nd->name, child, 0);
3415*4882a593Smuzhiyun /* Don't check for other permissions, the inode was just created */
3416*4882a593Smuzhiyun error = may_open(&path, 0, op->open_flag);
3417*4882a593Smuzhiyun if (error)
3418*4882a593Smuzhiyun goto out2;
3419*4882a593Smuzhiyun file->f_path.mnt = path.mnt;
3420*4882a593Smuzhiyun error = finish_open(file, child, NULL);
3421*4882a593Smuzhiyun out2:
3422*4882a593Smuzhiyun mnt_drop_write(path.mnt);
3423*4882a593Smuzhiyun out:
3424*4882a593Smuzhiyun path_put(&path);
3425*4882a593Smuzhiyun return error;
3426*4882a593Smuzhiyun }
3427*4882a593Smuzhiyun
do_o_path(struct nameidata * nd,unsigned flags,struct file * file)3428*4882a593Smuzhiyun static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
3429*4882a593Smuzhiyun {
3430*4882a593Smuzhiyun struct path path;
3431*4882a593Smuzhiyun int error = path_lookupat(nd, flags, &path);
3432*4882a593Smuzhiyun if (!error) {
3433*4882a593Smuzhiyun audit_inode(nd->name, path.dentry, 0);
3434*4882a593Smuzhiyun error = vfs_open(&path, file);
3435*4882a593Smuzhiyun path_put(&path);
3436*4882a593Smuzhiyun }
3437*4882a593Smuzhiyun return error;
3438*4882a593Smuzhiyun }
3439*4882a593Smuzhiyun
path_openat(struct nameidata * nd,const struct open_flags * op,unsigned flags)3440*4882a593Smuzhiyun static struct file *path_openat(struct nameidata *nd,
3441*4882a593Smuzhiyun const struct open_flags *op, unsigned flags)
3442*4882a593Smuzhiyun {
3443*4882a593Smuzhiyun struct file *file;
3444*4882a593Smuzhiyun int error;
3445*4882a593Smuzhiyun
3446*4882a593Smuzhiyun file = alloc_empty_file(op->open_flag, current_cred());
3447*4882a593Smuzhiyun if (IS_ERR(file))
3448*4882a593Smuzhiyun return file;
3449*4882a593Smuzhiyun
3450*4882a593Smuzhiyun if (unlikely(file->f_flags & __O_TMPFILE)) {
3451*4882a593Smuzhiyun error = do_tmpfile(nd, flags, op, file);
3452*4882a593Smuzhiyun } else if (unlikely(file->f_flags & O_PATH)) {
3453*4882a593Smuzhiyun error = do_o_path(nd, flags, file);
3454*4882a593Smuzhiyun } else {
3455*4882a593Smuzhiyun const char *s = path_init(nd, flags);
3456*4882a593Smuzhiyun while (!(error = link_path_walk(s, nd)) &&
3457*4882a593Smuzhiyun (s = open_last_lookups(nd, file, op)) != NULL)
3458*4882a593Smuzhiyun ;
3459*4882a593Smuzhiyun if (!error)
3460*4882a593Smuzhiyun error = do_open(nd, file, op);
3461*4882a593Smuzhiyun terminate_walk(nd);
3462*4882a593Smuzhiyun }
3463*4882a593Smuzhiyun if (likely(!error)) {
3464*4882a593Smuzhiyun if (likely(file->f_mode & FMODE_OPENED))
3465*4882a593Smuzhiyun return file;
3466*4882a593Smuzhiyun WARN_ON(1);
3467*4882a593Smuzhiyun error = -EINVAL;
3468*4882a593Smuzhiyun }
3469*4882a593Smuzhiyun fput(file);
3470*4882a593Smuzhiyun if (error == -EOPENSTALE) {
3471*4882a593Smuzhiyun if (flags & LOOKUP_RCU)
3472*4882a593Smuzhiyun error = -ECHILD;
3473*4882a593Smuzhiyun else
3474*4882a593Smuzhiyun error = -ESTALE;
3475*4882a593Smuzhiyun }
3476*4882a593Smuzhiyun return ERR_PTR(error);
3477*4882a593Smuzhiyun }
3478*4882a593Smuzhiyun
do_filp_open(int dfd,struct filename * pathname,const struct open_flags * op)3479*4882a593Smuzhiyun struct file *do_filp_open(int dfd, struct filename *pathname,
3480*4882a593Smuzhiyun const struct open_flags *op)
3481*4882a593Smuzhiyun {
3482*4882a593Smuzhiyun struct nameidata nd;
3483*4882a593Smuzhiyun int flags = op->lookup_flags;
3484*4882a593Smuzhiyun struct file *filp;
3485*4882a593Smuzhiyun
3486*4882a593Smuzhiyun set_nameidata(&nd, dfd, pathname);
3487*4882a593Smuzhiyun filp = path_openat(&nd, op, flags | LOOKUP_RCU);
3488*4882a593Smuzhiyun if (unlikely(filp == ERR_PTR(-ECHILD)))
3489*4882a593Smuzhiyun filp = path_openat(&nd, op, flags);
3490*4882a593Smuzhiyun if (unlikely(filp == ERR_PTR(-ESTALE)))
3491*4882a593Smuzhiyun filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
3492*4882a593Smuzhiyun restore_nameidata();
3493*4882a593Smuzhiyun return filp;
3494*4882a593Smuzhiyun }
3495*4882a593Smuzhiyun
do_file_open_root(struct dentry * dentry,struct vfsmount * mnt,const char * name,const struct open_flags * op)3496*4882a593Smuzhiyun struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
3497*4882a593Smuzhiyun const char *name, const struct open_flags *op)
3498*4882a593Smuzhiyun {
3499*4882a593Smuzhiyun struct nameidata nd;
3500*4882a593Smuzhiyun struct file *file;
3501*4882a593Smuzhiyun struct filename *filename;
3502*4882a593Smuzhiyun int flags = op->lookup_flags | LOOKUP_ROOT;
3503*4882a593Smuzhiyun
3504*4882a593Smuzhiyun nd.root.mnt = mnt;
3505*4882a593Smuzhiyun nd.root.dentry = dentry;
3506*4882a593Smuzhiyun
3507*4882a593Smuzhiyun if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
3508*4882a593Smuzhiyun return ERR_PTR(-ELOOP);
3509*4882a593Smuzhiyun
3510*4882a593Smuzhiyun filename = getname_kernel(name);
3511*4882a593Smuzhiyun if (IS_ERR(filename))
3512*4882a593Smuzhiyun return ERR_CAST(filename);
3513*4882a593Smuzhiyun
3514*4882a593Smuzhiyun set_nameidata(&nd, -1, filename);
3515*4882a593Smuzhiyun file = path_openat(&nd, op, flags | LOOKUP_RCU);
3516*4882a593Smuzhiyun if (unlikely(file == ERR_PTR(-ECHILD)))
3517*4882a593Smuzhiyun file = path_openat(&nd, op, flags);
3518*4882a593Smuzhiyun if (unlikely(file == ERR_PTR(-ESTALE)))
3519*4882a593Smuzhiyun file = path_openat(&nd, op, flags | LOOKUP_REVAL);
3520*4882a593Smuzhiyun restore_nameidata();
3521*4882a593Smuzhiyun putname(filename);
3522*4882a593Smuzhiyun return file;
3523*4882a593Smuzhiyun }
3524*4882a593Smuzhiyun
filename_create(int dfd,struct filename * name,struct path * path,unsigned int lookup_flags)3525*4882a593Smuzhiyun static struct dentry *filename_create(int dfd, struct filename *name,
3526*4882a593Smuzhiyun struct path *path, unsigned int lookup_flags)
3527*4882a593Smuzhiyun {
3528*4882a593Smuzhiyun struct dentry *dentry = ERR_PTR(-EEXIST);
3529*4882a593Smuzhiyun struct qstr last;
3530*4882a593Smuzhiyun int type;
3531*4882a593Smuzhiyun int err2;
3532*4882a593Smuzhiyun int error;
3533*4882a593Smuzhiyun bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
3534*4882a593Smuzhiyun
3535*4882a593Smuzhiyun /*
3536*4882a593Smuzhiyun * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
3537*4882a593Smuzhiyun * other flags passed in are ignored!
3538*4882a593Smuzhiyun */
3539*4882a593Smuzhiyun lookup_flags &= LOOKUP_REVAL;
3540*4882a593Smuzhiyun
3541*4882a593Smuzhiyun name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
3542*4882a593Smuzhiyun if (IS_ERR(name))
3543*4882a593Smuzhiyun return ERR_CAST(name);
3544*4882a593Smuzhiyun
3545*4882a593Smuzhiyun /*
3546*4882a593Smuzhiyun * Yucky last component or no last component at all?
3547*4882a593Smuzhiyun * (foo/., foo/.., /////)
3548*4882a593Smuzhiyun */
3549*4882a593Smuzhiyun if (unlikely(type != LAST_NORM))
3550*4882a593Smuzhiyun goto out;
3551*4882a593Smuzhiyun
3552*4882a593Smuzhiyun /* don't fail immediately if it's r/o, at least try to report other errors */
3553*4882a593Smuzhiyun err2 = mnt_want_write(path->mnt);
3554*4882a593Smuzhiyun /*
3555*4882a593Smuzhiyun * Do the final lookup.
3556*4882a593Smuzhiyun */
3557*4882a593Smuzhiyun lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
3558*4882a593Smuzhiyun inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
3559*4882a593Smuzhiyun dentry = __lookup_hash(&last, path->dentry, lookup_flags);
3560*4882a593Smuzhiyun if (IS_ERR(dentry))
3561*4882a593Smuzhiyun goto unlock;
3562*4882a593Smuzhiyun
3563*4882a593Smuzhiyun error = -EEXIST;
3564*4882a593Smuzhiyun if (d_is_positive(dentry))
3565*4882a593Smuzhiyun goto fail;
3566*4882a593Smuzhiyun
3567*4882a593Smuzhiyun /*
3568*4882a593Smuzhiyun * Special case - lookup gave negative, but... we had foo/bar/
3569*4882a593Smuzhiyun * From the vfs_mknod() POV we just have a negative dentry -
3570*4882a593Smuzhiyun * all is fine. Let's be bastards - you had / on the end, you've
3571*4882a593Smuzhiyun * been asking for (non-existent) directory. -ENOENT for you.
3572*4882a593Smuzhiyun */
3573*4882a593Smuzhiyun if (unlikely(!is_dir && last.name[last.len])) {
3574*4882a593Smuzhiyun error = -ENOENT;
3575*4882a593Smuzhiyun goto fail;
3576*4882a593Smuzhiyun }
3577*4882a593Smuzhiyun if (unlikely(err2)) {
3578*4882a593Smuzhiyun error = err2;
3579*4882a593Smuzhiyun goto fail;
3580*4882a593Smuzhiyun }
3581*4882a593Smuzhiyun putname(name);
3582*4882a593Smuzhiyun return dentry;
3583*4882a593Smuzhiyun fail:
3584*4882a593Smuzhiyun dput(dentry);
3585*4882a593Smuzhiyun dentry = ERR_PTR(error);
3586*4882a593Smuzhiyun unlock:
3587*4882a593Smuzhiyun inode_unlock(path->dentry->d_inode);
3588*4882a593Smuzhiyun if (!err2)
3589*4882a593Smuzhiyun mnt_drop_write(path->mnt);
3590*4882a593Smuzhiyun out:
3591*4882a593Smuzhiyun path_put(path);
3592*4882a593Smuzhiyun putname(name);
3593*4882a593Smuzhiyun return dentry;
3594*4882a593Smuzhiyun }
3595*4882a593Smuzhiyun
kern_path_create(int dfd,const char * pathname,struct path * path,unsigned int lookup_flags)3596*4882a593Smuzhiyun struct dentry *kern_path_create(int dfd, const char *pathname,
3597*4882a593Smuzhiyun struct path *path, unsigned int lookup_flags)
3598*4882a593Smuzhiyun {
3599*4882a593Smuzhiyun return filename_create(dfd, getname_kernel(pathname),
3600*4882a593Smuzhiyun path, lookup_flags);
3601*4882a593Smuzhiyun }
3602*4882a593Smuzhiyun EXPORT_SYMBOL(kern_path_create);
3603*4882a593Smuzhiyun
done_path_create(struct path * path,struct dentry * dentry)3604*4882a593Smuzhiyun void done_path_create(struct path *path, struct dentry *dentry)
3605*4882a593Smuzhiyun {
3606*4882a593Smuzhiyun dput(dentry);
3607*4882a593Smuzhiyun inode_unlock(path->dentry->d_inode);
3608*4882a593Smuzhiyun mnt_drop_write(path->mnt);
3609*4882a593Smuzhiyun path_put(path);
3610*4882a593Smuzhiyun }
3611*4882a593Smuzhiyun EXPORT_SYMBOL(done_path_create);
3612*4882a593Smuzhiyun
user_path_create(int dfd,const char __user * pathname,struct path * path,unsigned int lookup_flags)3613*4882a593Smuzhiyun inline struct dentry *user_path_create(int dfd, const char __user *pathname,
3614*4882a593Smuzhiyun struct path *path, unsigned int lookup_flags)
3615*4882a593Smuzhiyun {
3616*4882a593Smuzhiyun return filename_create(dfd, getname(pathname), path, lookup_flags);
3617*4882a593Smuzhiyun }
3618*4882a593Smuzhiyun EXPORT_SYMBOL(user_path_create);
3619*4882a593Smuzhiyun
vfs_mknod(struct inode * dir,struct dentry * dentry,umode_t mode,dev_t dev)3620*4882a593Smuzhiyun int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
3621*4882a593Smuzhiyun {
3622*4882a593Smuzhiyun bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
3623*4882a593Smuzhiyun int error = may_create(dir, dentry);
3624*4882a593Smuzhiyun
3625*4882a593Smuzhiyun if (error)
3626*4882a593Smuzhiyun return error;
3627*4882a593Smuzhiyun
3628*4882a593Smuzhiyun if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
3629*4882a593Smuzhiyun !capable(CAP_MKNOD))
3630*4882a593Smuzhiyun return -EPERM;
3631*4882a593Smuzhiyun
3632*4882a593Smuzhiyun if (!dir->i_op->mknod)
3633*4882a593Smuzhiyun return -EPERM;
3634*4882a593Smuzhiyun
3635*4882a593Smuzhiyun error = devcgroup_inode_mknod(mode, dev);
3636*4882a593Smuzhiyun if (error)
3637*4882a593Smuzhiyun return error;
3638*4882a593Smuzhiyun
3639*4882a593Smuzhiyun error = security_inode_mknod(dir, dentry, mode, dev);
3640*4882a593Smuzhiyun if (error)
3641*4882a593Smuzhiyun return error;
3642*4882a593Smuzhiyun
3643*4882a593Smuzhiyun error = dir->i_op->mknod(dir, dentry, mode, dev);
3644*4882a593Smuzhiyun if (!error)
3645*4882a593Smuzhiyun fsnotify_create(dir, dentry);
3646*4882a593Smuzhiyun return error;
3647*4882a593Smuzhiyun }
3648*4882a593Smuzhiyun EXPORT_SYMBOL(vfs_mknod);
3649*4882a593Smuzhiyun
may_mknod(umode_t mode)3650*4882a593Smuzhiyun static int may_mknod(umode_t mode)
3651*4882a593Smuzhiyun {
3652*4882a593Smuzhiyun switch (mode & S_IFMT) {
3653*4882a593Smuzhiyun case S_IFREG:
3654*4882a593Smuzhiyun case S_IFCHR:
3655*4882a593Smuzhiyun case S_IFBLK:
3656*4882a593Smuzhiyun case S_IFIFO:
3657*4882a593Smuzhiyun case S_IFSOCK:
3658*4882a593Smuzhiyun case 0: /* zero mode translates to S_IFREG */
3659*4882a593Smuzhiyun return 0;
3660*4882a593Smuzhiyun case S_IFDIR:
3661*4882a593Smuzhiyun return -EPERM;
3662*4882a593Smuzhiyun default:
3663*4882a593Smuzhiyun return -EINVAL;
3664*4882a593Smuzhiyun }
3665*4882a593Smuzhiyun }
3666*4882a593Smuzhiyun
do_mknodat(int dfd,const char __user * filename,umode_t mode,unsigned int dev)3667*4882a593Smuzhiyun static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
3668*4882a593Smuzhiyun unsigned int dev)
3669*4882a593Smuzhiyun {
3670*4882a593Smuzhiyun struct dentry *dentry;
3671*4882a593Smuzhiyun struct path path;
3672*4882a593Smuzhiyun int error;
3673*4882a593Smuzhiyun unsigned int lookup_flags = 0;
3674*4882a593Smuzhiyun
3675*4882a593Smuzhiyun error = may_mknod(mode);
3676*4882a593Smuzhiyun if (error)
3677*4882a593Smuzhiyun return error;
3678*4882a593Smuzhiyun retry:
3679*4882a593Smuzhiyun dentry = user_path_create(dfd, filename, &path, lookup_flags);
3680*4882a593Smuzhiyun if (IS_ERR(dentry))
3681*4882a593Smuzhiyun return PTR_ERR(dentry);
3682*4882a593Smuzhiyun
3683*4882a593Smuzhiyun if (!IS_POSIXACL(path.dentry->d_inode))
3684*4882a593Smuzhiyun mode &= ~current_umask();
3685*4882a593Smuzhiyun error = security_path_mknod(&path, dentry, mode, dev);
3686*4882a593Smuzhiyun if (error)
3687*4882a593Smuzhiyun goto out;
3688*4882a593Smuzhiyun switch (mode & S_IFMT) {
3689*4882a593Smuzhiyun case 0: case S_IFREG:
3690*4882a593Smuzhiyun error = vfs_create(path.dentry->d_inode,dentry,mode,true);
3691*4882a593Smuzhiyun if (!error)
3692*4882a593Smuzhiyun ima_post_path_mknod(dentry);
3693*4882a593Smuzhiyun break;
3694*4882a593Smuzhiyun case S_IFCHR: case S_IFBLK:
3695*4882a593Smuzhiyun error = vfs_mknod(path.dentry->d_inode,dentry,mode,
3696*4882a593Smuzhiyun new_decode_dev(dev));
3697*4882a593Smuzhiyun break;
3698*4882a593Smuzhiyun case S_IFIFO: case S_IFSOCK:
3699*4882a593Smuzhiyun error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
3700*4882a593Smuzhiyun break;
3701*4882a593Smuzhiyun }
3702*4882a593Smuzhiyun out:
3703*4882a593Smuzhiyun done_path_create(&path, dentry);
3704*4882a593Smuzhiyun if (retry_estale(error, lookup_flags)) {
3705*4882a593Smuzhiyun lookup_flags |= LOOKUP_REVAL;
3706*4882a593Smuzhiyun goto retry;
3707*4882a593Smuzhiyun }
3708*4882a593Smuzhiyun return error;
3709*4882a593Smuzhiyun }
3710*4882a593Smuzhiyun
SYSCALL_DEFINE4(mknodat,int,dfd,const char __user *,filename,umode_t,mode,unsigned int,dev)3711*4882a593Smuzhiyun SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
3712*4882a593Smuzhiyun unsigned int, dev)
3713*4882a593Smuzhiyun {
3714*4882a593Smuzhiyun return do_mknodat(dfd, filename, mode, dev);
3715*4882a593Smuzhiyun }
3716*4882a593Smuzhiyun
SYSCALL_DEFINE3(mknod,const char __user *,filename,umode_t,mode,unsigned,dev)3717*4882a593Smuzhiyun SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
3718*4882a593Smuzhiyun {
3719*4882a593Smuzhiyun return do_mknodat(AT_FDCWD, filename, mode, dev);
3720*4882a593Smuzhiyun }
3721*4882a593Smuzhiyun
vfs_mkdir(struct inode * dir,struct dentry * dentry,umode_t mode)3722*4882a593Smuzhiyun int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
3723*4882a593Smuzhiyun {
3724*4882a593Smuzhiyun int error = may_create(dir, dentry);
3725*4882a593Smuzhiyun unsigned max_links = dir->i_sb->s_max_links;
3726*4882a593Smuzhiyun
3727*4882a593Smuzhiyun if (error)
3728*4882a593Smuzhiyun return error;
3729*4882a593Smuzhiyun
3730*4882a593Smuzhiyun if (!dir->i_op->mkdir)
3731*4882a593Smuzhiyun return -EPERM;
3732*4882a593Smuzhiyun
3733*4882a593Smuzhiyun mode &= (S_IRWXUGO|S_ISVTX);
3734*4882a593Smuzhiyun error = security_inode_mkdir(dir, dentry, mode);
3735*4882a593Smuzhiyun if (error)
3736*4882a593Smuzhiyun return error;
3737*4882a593Smuzhiyun
3738*4882a593Smuzhiyun if (max_links && dir->i_nlink >= max_links)
3739*4882a593Smuzhiyun return -EMLINK;
3740*4882a593Smuzhiyun
3741*4882a593Smuzhiyun error = dir->i_op->mkdir(dir, dentry, mode);
3742*4882a593Smuzhiyun if (!error)
3743*4882a593Smuzhiyun fsnotify_mkdir(dir, dentry);
3744*4882a593Smuzhiyun return error;
3745*4882a593Smuzhiyun }
3746*4882a593Smuzhiyun EXPORT_SYMBOL_NS(vfs_mkdir, ANDROID_GKI_VFS_EXPORT_ONLY);
3747*4882a593Smuzhiyun
do_mkdirat(int dfd,const char __user * pathname,umode_t mode)3748*4882a593Smuzhiyun static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
3749*4882a593Smuzhiyun {
3750*4882a593Smuzhiyun struct dentry *dentry;
3751*4882a593Smuzhiyun struct path path;
3752*4882a593Smuzhiyun int error;
3753*4882a593Smuzhiyun unsigned int lookup_flags = LOOKUP_DIRECTORY;
3754*4882a593Smuzhiyun
3755*4882a593Smuzhiyun retry:
3756*4882a593Smuzhiyun dentry = user_path_create(dfd, pathname, &path, lookup_flags);
3757*4882a593Smuzhiyun if (IS_ERR(dentry))
3758*4882a593Smuzhiyun return PTR_ERR(dentry);
3759*4882a593Smuzhiyun
3760*4882a593Smuzhiyun if (!IS_POSIXACL(path.dentry->d_inode))
3761*4882a593Smuzhiyun mode &= ~current_umask();
3762*4882a593Smuzhiyun error = security_path_mkdir(&path, dentry, mode);
3763*4882a593Smuzhiyun if (!error)
3764*4882a593Smuzhiyun error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
3765*4882a593Smuzhiyun done_path_create(&path, dentry);
3766*4882a593Smuzhiyun if (retry_estale(error, lookup_flags)) {
3767*4882a593Smuzhiyun lookup_flags |= LOOKUP_REVAL;
3768*4882a593Smuzhiyun goto retry;
3769*4882a593Smuzhiyun }
3770*4882a593Smuzhiyun return error;
3771*4882a593Smuzhiyun }
3772*4882a593Smuzhiyun
SYSCALL_DEFINE3(mkdirat,int,dfd,const char __user *,pathname,umode_t,mode)3773*4882a593Smuzhiyun SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
3774*4882a593Smuzhiyun {
3775*4882a593Smuzhiyun return do_mkdirat(dfd, pathname, mode);
3776*4882a593Smuzhiyun }
3777*4882a593Smuzhiyun
SYSCALL_DEFINE2(mkdir,const char __user *,pathname,umode_t,mode)3778*4882a593Smuzhiyun SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
3779*4882a593Smuzhiyun {
3780*4882a593Smuzhiyun return do_mkdirat(AT_FDCWD, pathname, mode);
3781*4882a593Smuzhiyun }
3782*4882a593Smuzhiyun
vfs_rmdir(struct inode * dir,struct dentry * dentry)3783*4882a593Smuzhiyun int vfs_rmdir(struct inode *dir, struct dentry *dentry)
3784*4882a593Smuzhiyun {
3785*4882a593Smuzhiyun int error = may_delete(dir, dentry, 1);
3786*4882a593Smuzhiyun
3787*4882a593Smuzhiyun if (error)
3788*4882a593Smuzhiyun return error;
3789*4882a593Smuzhiyun
3790*4882a593Smuzhiyun if (!dir->i_op->rmdir)
3791*4882a593Smuzhiyun return -EPERM;
3792*4882a593Smuzhiyun
3793*4882a593Smuzhiyun dget(dentry);
3794*4882a593Smuzhiyun inode_lock(dentry->d_inode);
3795*4882a593Smuzhiyun
3796*4882a593Smuzhiyun error = -EBUSY;
3797*4882a593Smuzhiyun if (is_local_mountpoint(dentry))
3798*4882a593Smuzhiyun goto out;
3799*4882a593Smuzhiyun
3800*4882a593Smuzhiyun error = security_inode_rmdir(dir, dentry);
3801*4882a593Smuzhiyun if (error)
3802*4882a593Smuzhiyun goto out;
3803*4882a593Smuzhiyun
3804*4882a593Smuzhiyun error = dir->i_op->rmdir(dir, dentry);
3805*4882a593Smuzhiyun if (error)
3806*4882a593Smuzhiyun goto out;
3807*4882a593Smuzhiyun
3808*4882a593Smuzhiyun shrink_dcache_parent(dentry);
3809*4882a593Smuzhiyun dentry->d_inode->i_flags |= S_DEAD;
3810*4882a593Smuzhiyun dont_mount(dentry);
3811*4882a593Smuzhiyun detach_mounts(dentry);
3812*4882a593Smuzhiyun
3813*4882a593Smuzhiyun out:
3814*4882a593Smuzhiyun inode_unlock(dentry->d_inode);
3815*4882a593Smuzhiyun dput(dentry);
3816*4882a593Smuzhiyun if (!error)
3817*4882a593Smuzhiyun d_delete_notify(dir, dentry);
3818*4882a593Smuzhiyun return error;
3819*4882a593Smuzhiyun }
3820*4882a593Smuzhiyun EXPORT_SYMBOL_NS(vfs_rmdir, ANDROID_GKI_VFS_EXPORT_ONLY);
3821*4882a593Smuzhiyun
do_rmdir(int dfd,struct filename * name)3822*4882a593Smuzhiyun long do_rmdir(int dfd, struct filename *name)
3823*4882a593Smuzhiyun {
3824*4882a593Smuzhiyun int error = 0;
3825*4882a593Smuzhiyun struct dentry *dentry;
3826*4882a593Smuzhiyun struct path path;
3827*4882a593Smuzhiyun struct qstr last;
3828*4882a593Smuzhiyun int type;
3829*4882a593Smuzhiyun unsigned int lookup_flags = 0;
3830*4882a593Smuzhiyun retry:
3831*4882a593Smuzhiyun name = filename_parentat(dfd, name, lookup_flags,
3832*4882a593Smuzhiyun &path, &last, &type);
3833*4882a593Smuzhiyun if (IS_ERR(name))
3834*4882a593Smuzhiyun return PTR_ERR(name);
3835*4882a593Smuzhiyun
3836*4882a593Smuzhiyun switch (type) {
3837*4882a593Smuzhiyun case LAST_DOTDOT:
3838*4882a593Smuzhiyun error = -ENOTEMPTY;
3839*4882a593Smuzhiyun goto exit1;
3840*4882a593Smuzhiyun case LAST_DOT:
3841*4882a593Smuzhiyun error = -EINVAL;
3842*4882a593Smuzhiyun goto exit1;
3843*4882a593Smuzhiyun case LAST_ROOT:
3844*4882a593Smuzhiyun error = -EBUSY;
3845*4882a593Smuzhiyun goto exit1;
3846*4882a593Smuzhiyun }
3847*4882a593Smuzhiyun
3848*4882a593Smuzhiyun error = mnt_want_write(path.mnt);
3849*4882a593Smuzhiyun if (error)
3850*4882a593Smuzhiyun goto exit1;
3851*4882a593Smuzhiyun
3852*4882a593Smuzhiyun inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
3853*4882a593Smuzhiyun dentry = __lookup_hash(&last, path.dentry, lookup_flags);
3854*4882a593Smuzhiyun error = PTR_ERR(dentry);
3855*4882a593Smuzhiyun if (IS_ERR(dentry))
3856*4882a593Smuzhiyun goto exit2;
3857*4882a593Smuzhiyun if (!dentry->d_inode) {
3858*4882a593Smuzhiyun error = -ENOENT;
3859*4882a593Smuzhiyun goto exit3;
3860*4882a593Smuzhiyun }
3861*4882a593Smuzhiyun error = security_path_rmdir(&path, dentry);
3862*4882a593Smuzhiyun if (error)
3863*4882a593Smuzhiyun goto exit3;
3864*4882a593Smuzhiyun error = vfs_rmdir(path.dentry->d_inode, dentry);
3865*4882a593Smuzhiyun exit3:
3866*4882a593Smuzhiyun dput(dentry);
3867*4882a593Smuzhiyun exit2:
3868*4882a593Smuzhiyun inode_unlock(path.dentry->d_inode);
3869*4882a593Smuzhiyun mnt_drop_write(path.mnt);
3870*4882a593Smuzhiyun exit1:
3871*4882a593Smuzhiyun path_put(&path);
3872*4882a593Smuzhiyun if (retry_estale(error, lookup_flags)) {
3873*4882a593Smuzhiyun lookup_flags |= LOOKUP_REVAL;
3874*4882a593Smuzhiyun goto retry;
3875*4882a593Smuzhiyun }
3876*4882a593Smuzhiyun putname(name);
3877*4882a593Smuzhiyun return error;
3878*4882a593Smuzhiyun }
3879*4882a593Smuzhiyun
SYSCALL_DEFINE1(rmdir,const char __user *,pathname)3880*4882a593Smuzhiyun SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
3881*4882a593Smuzhiyun {
3882*4882a593Smuzhiyun return do_rmdir(AT_FDCWD, getname(pathname));
3883*4882a593Smuzhiyun }
3884*4882a593Smuzhiyun
3885*4882a593Smuzhiyun /**
3886*4882a593Smuzhiyun * vfs_unlink - unlink a filesystem object
3887*4882a593Smuzhiyun * @dir: parent directory
3888*4882a593Smuzhiyun * @dentry: victim
3889*4882a593Smuzhiyun * @delegated_inode: returns victim inode, if the inode is delegated.
3890*4882a593Smuzhiyun *
3891*4882a593Smuzhiyun * The caller must hold dir->i_mutex.
3892*4882a593Smuzhiyun *
3893*4882a593Smuzhiyun * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
3894*4882a593Smuzhiyun * return a reference to the inode in delegated_inode. The caller
3895*4882a593Smuzhiyun * should then break the delegation on that inode and retry. Because
3896*4882a593Smuzhiyun * breaking a delegation may take a long time, the caller should drop
3897*4882a593Smuzhiyun * dir->i_mutex before doing so.
3898*4882a593Smuzhiyun *
3899*4882a593Smuzhiyun * Alternatively, a caller may pass NULL for delegated_inode. This may
3900*4882a593Smuzhiyun * be appropriate for callers that expect the underlying filesystem not
3901*4882a593Smuzhiyun * to be NFS exported.
3902*4882a593Smuzhiyun */
vfs_unlink(struct inode * dir,struct dentry * dentry,struct inode ** delegated_inode)3903*4882a593Smuzhiyun int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
3904*4882a593Smuzhiyun {
3905*4882a593Smuzhiyun struct inode *target = dentry->d_inode;
3906*4882a593Smuzhiyun int error = may_delete(dir, dentry, 0);
3907*4882a593Smuzhiyun
3908*4882a593Smuzhiyun if (error)
3909*4882a593Smuzhiyun return error;
3910*4882a593Smuzhiyun
3911*4882a593Smuzhiyun if (!dir->i_op->unlink)
3912*4882a593Smuzhiyun return -EPERM;
3913*4882a593Smuzhiyun
3914*4882a593Smuzhiyun inode_lock(target);
3915*4882a593Smuzhiyun if (is_local_mountpoint(dentry))
3916*4882a593Smuzhiyun error = -EBUSY;
3917*4882a593Smuzhiyun else {
3918*4882a593Smuzhiyun error = security_inode_unlink(dir, dentry);
3919*4882a593Smuzhiyun if (!error) {
3920*4882a593Smuzhiyun error = try_break_deleg(target, delegated_inode);
3921*4882a593Smuzhiyun if (error)
3922*4882a593Smuzhiyun goto out;
3923*4882a593Smuzhiyun error = dir->i_op->unlink(dir, dentry);
3924*4882a593Smuzhiyun if (!error) {
3925*4882a593Smuzhiyun dont_mount(dentry);
3926*4882a593Smuzhiyun detach_mounts(dentry);
3927*4882a593Smuzhiyun }
3928*4882a593Smuzhiyun }
3929*4882a593Smuzhiyun }
3930*4882a593Smuzhiyun out:
3931*4882a593Smuzhiyun inode_unlock(target);
3932*4882a593Smuzhiyun
3933*4882a593Smuzhiyun /* We don't d_delete() NFS sillyrenamed files--they still exist. */
3934*4882a593Smuzhiyun if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
3935*4882a593Smuzhiyun fsnotify_unlink(dir, dentry);
3936*4882a593Smuzhiyun } else if (!error) {
3937*4882a593Smuzhiyun fsnotify_link_count(target);
3938*4882a593Smuzhiyun d_delete_notify(dir, dentry);
3939*4882a593Smuzhiyun }
3940*4882a593Smuzhiyun
3941*4882a593Smuzhiyun return error;
3942*4882a593Smuzhiyun }
3943*4882a593Smuzhiyun EXPORT_SYMBOL_NS(vfs_unlink, ANDROID_GKI_VFS_EXPORT_ONLY);
3944*4882a593Smuzhiyun
3945*4882a593Smuzhiyun /*
3946*4882a593Smuzhiyun * Make sure that the actual truncation of the file will occur outside its
3947*4882a593Smuzhiyun * directory's i_mutex. Truncate can take a long time if there is a lot of
3948*4882a593Smuzhiyun * writeout happening, and we don't want to prevent access to the directory
3949*4882a593Smuzhiyun * while waiting on the I/O.
3950*4882a593Smuzhiyun */
do_unlinkat(int dfd,struct filename * name)3951*4882a593Smuzhiyun long do_unlinkat(int dfd, struct filename *name)
3952*4882a593Smuzhiyun {
3953*4882a593Smuzhiyun int error;
3954*4882a593Smuzhiyun struct dentry *dentry;
3955*4882a593Smuzhiyun struct path path;
3956*4882a593Smuzhiyun struct qstr last;
3957*4882a593Smuzhiyun int type;
3958*4882a593Smuzhiyun struct inode *inode = NULL;
3959*4882a593Smuzhiyun struct inode *delegated_inode = NULL;
3960*4882a593Smuzhiyun unsigned int lookup_flags = 0;
3961*4882a593Smuzhiyun retry:
3962*4882a593Smuzhiyun name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
3963*4882a593Smuzhiyun if (IS_ERR(name))
3964*4882a593Smuzhiyun return PTR_ERR(name);
3965*4882a593Smuzhiyun
3966*4882a593Smuzhiyun error = -EISDIR;
3967*4882a593Smuzhiyun if (type != LAST_NORM)
3968*4882a593Smuzhiyun goto exit1;
3969*4882a593Smuzhiyun
3970*4882a593Smuzhiyun error = mnt_want_write(path.mnt);
3971*4882a593Smuzhiyun if (error)
3972*4882a593Smuzhiyun goto exit1;
3973*4882a593Smuzhiyun retry_deleg:
3974*4882a593Smuzhiyun inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
3975*4882a593Smuzhiyun dentry = __lookup_hash(&last, path.dentry, lookup_flags);
3976*4882a593Smuzhiyun error = PTR_ERR(dentry);
3977*4882a593Smuzhiyun if (!IS_ERR(dentry)) {
3978*4882a593Smuzhiyun /* Why not before? Because we want correct error value */
3979*4882a593Smuzhiyun if (last.name[last.len])
3980*4882a593Smuzhiyun goto slashes;
3981*4882a593Smuzhiyun inode = dentry->d_inode;
3982*4882a593Smuzhiyun if (d_is_negative(dentry))
3983*4882a593Smuzhiyun goto slashes;
3984*4882a593Smuzhiyun ihold(inode);
3985*4882a593Smuzhiyun error = security_path_unlink(&path, dentry);
3986*4882a593Smuzhiyun if (error)
3987*4882a593Smuzhiyun goto exit2;
3988*4882a593Smuzhiyun error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
3989*4882a593Smuzhiyun exit2:
3990*4882a593Smuzhiyun dput(dentry);
3991*4882a593Smuzhiyun }
3992*4882a593Smuzhiyun inode_unlock(path.dentry->d_inode);
3993*4882a593Smuzhiyun if (inode)
3994*4882a593Smuzhiyun iput(inode); /* truncate the inode here */
3995*4882a593Smuzhiyun inode = NULL;
3996*4882a593Smuzhiyun if (delegated_inode) {
3997*4882a593Smuzhiyun error = break_deleg_wait(&delegated_inode);
3998*4882a593Smuzhiyun if (!error)
3999*4882a593Smuzhiyun goto retry_deleg;
4000*4882a593Smuzhiyun }
4001*4882a593Smuzhiyun mnt_drop_write(path.mnt);
4002*4882a593Smuzhiyun exit1:
4003*4882a593Smuzhiyun path_put(&path);
4004*4882a593Smuzhiyun if (retry_estale(error, lookup_flags)) {
4005*4882a593Smuzhiyun lookup_flags |= LOOKUP_REVAL;
4006*4882a593Smuzhiyun inode = NULL;
4007*4882a593Smuzhiyun goto retry;
4008*4882a593Smuzhiyun }
4009*4882a593Smuzhiyun putname(name);
4010*4882a593Smuzhiyun return error;
4011*4882a593Smuzhiyun
4012*4882a593Smuzhiyun slashes:
4013*4882a593Smuzhiyun if (d_is_negative(dentry))
4014*4882a593Smuzhiyun error = -ENOENT;
4015*4882a593Smuzhiyun else if (d_is_dir(dentry))
4016*4882a593Smuzhiyun error = -EISDIR;
4017*4882a593Smuzhiyun else
4018*4882a593Smuzhiyun error = -ENOTDIR;
4019*4882a593Smuzhiyun goto exit2;
4020*4882a593Smuzhiyun }
4021*4882a593Smuzhiyun
SYSCALL_DEFINE3(unlinkat,int,dfd,const char __user *,pathname,int,flag)4022*4882a593Smuzhiyun SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
4023*4882a593Smuzhiyun {
4024*4882a593Smuzhiyun if ((flag & ~AT_REMOVEDIR) != 0)
4025*4882a593Smuzhiyun return -EINVAL;
4026*4882a593Smuzhiyun
4027*4882a593Smuzhiyun if (flag & AT_REMOVEDIR)
4028*4882a593Smuzhiyun return do_rmdir(dfd, getname(pathname));
4029*4882a593Smuzhiyun return do_unlinkat(dfd, getname(pathname));
4030*4882a593Smuzhiyun }
4031*4882a593Smuzhiyun
SYSCALL_DEFINE1(unlink,const char __user *,pathname)4032*4882a593Smuzhiyun SYSCALL_DEFINE1(unlink, const char __user *, pathname)
4033*4882a593Smuzhiyun {
4034*4882a593Smuzhiyun return do_unlinkat(AT_FDCWD, getname(pathname));
4035*4882a593Smuzhiyun }
4036*4882a593Smuzhiyun
vfs_symlink(struct inode * dir,struct dentry * dentry,const char * oldname)4037*4882a593Smuzhiyun int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
4038*4882a593Smuzhiyun {
4039*4882a593Smuzhiyun int error = may_create(dir, dentry);
4040*4882a593Smuzhiyun
4041*4882a593Smuzhiyun if (error)
4042*4882a593Smuzhiyun return error;
4043*4882a593Smuzhiyun
4044*4882a593Smuzhiyun if (!dir->i_op->symlink)
4045*4882a593Smuzhiyun return -EPERM;
4046*4882a593Smuzhiyun
4047*4882a593Smuzhiyun error = security_inode_symlink(dir, dentry, oldname);
4048*4882a593Smuzhiyun if (error)
4049*4882a593Smuzhiyun return error;
4050*4882a593Smuzhiyun
4051*4882a593Smuzhiyun error = dir->i_op->symlink(dir, dentry, oldname);
4052*4882a593Smuzhiyun if (!error)
4053*4882a593Smuzhiyun fsnotify_create(dir, dentry);
4054*4882a593Smuzhiyun return error;
4055*4882a593Smuzhiyun }
4056*4882a593Smuzhiyun EXPORT_SYMBOL(vfs_symlink);
4057*4882a593Smuzhiyun
do_symlinkat(const char __user * oldname,int newdfd,const char __user * newname)4058*4882a593Smuzhiyun static long do_symlinkat(const char __user *oldname, int newdfd,
4059*4882a593Smuzhiyun const char __user *newname)
4060*4882a593Smuzhiyun {
4061*4882a593Smuzhiyun int error;
4062*4882a593Smuzhiyun struct filename *from;
4063*4882a593Smuzhiyun struct dentry *dentry;
4064*4882a593Smuzhiyun struct path path;
4065*4882a593Smuzhiyun unsigned int lookup_flags = 0;
4066*4882a593Smuzhiyun
4067*4882a593Smuzhiyun from = getname(oldname);
4068*4882a593Smuzhiyun if (IS_ERR(from))
4069*4882a593Smuzhiyun return PTR_ERR(from);
4070*4882a593Smuzhiyun retry:
4071*4882a593Smuzhiyun dentry = user_path_create(newdfd, newname, &path, lookup_flags);
4072*4882a593Smuzhiyun error = PTR_ERR(dentry);
4073*4882a593Smuzhiyun if (IS_ERR(dentry))
4074*4882a593Smuzhiyun goto out_putname;
4075*4882a593Smuzhiyun
4076*4882a593Smuzhiyun error = security_path_symlink(&path, dentry, from->name);
4077*4882a593Smuzhiyun if (!error)
4078*4882a593Smuzhiyun error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
4079*4882a593Smuzhiyun done_path_create(&path, dentry);
4080*4882a593Smuzhiyun if (retry_estale(error, lookup_flags)) {
4081*4882a593Smuzhiyun lookup_flags |= LOOKUP_REVAL;
4082*4882a593Smuzhiyun goto retry;
4083*4882a593Smuzhiyun }
4084*4882a593Smuzhiyun out_putname:
4085*4882a593Smuzhiyun putname(from);
4086*4882a593Smuzhiyun return error;
4087*4882a593Smuzhiyun }
4088*4882a593Smuzhiyun
SYSCALL_DEFINE3(symlinkat,const char __user *,oldname,int,newdfd,const char __user *,newname)4089*4882a593Smuzhiyun SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
4090*4882a593Smuzhiyun int, newdfd, const char __user *, newname)
4091*4882a593Smuzhiyun {
4092*4882a593Smuzhiyun return do_symlinkat(oldname, newdfd, newname);
4093*4882a593Smuzhiyun }
4094*4882a593Smuzhiyun
SYSCALL_DEFINE2(symlink,const char __user *,oldname,const char __user *,newname)4095*4882a593Smuzhiyun SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
4096*4882a593Smuzhiyun {
4097*4882a593Smuzhiyun return do_symlinkat(oldname, AT_FDCWD, newname);
4098*4882a593Smuzhiyun }
4099*4882a593Smuzhiyun
4100*4882a593Smuzhiyun /**
4101*4882a593Smuzhiyun * vfs_link - create a new link
4102*4882a593Smuzhiyun * @old_dentry: object to be linked
4103*4882a593Smuzhiyun * @dir: new parent
4104*4882a593Smuzhiyun * @new_dentry: where to create the new link
4105*4882a593Smuzhiyun * @delegated_inode: returns inode needing a delegation break
4106*4882a593Smuzhiyun *
4107*4882a593Smuzhiyun * The caller must hold dir->i_mutex
4108*4882a593Smuzhiyun *
4109*4882a593Smuzhiyun * If vfs_link discovers a delegation on the to-be-linked file in need
4110*4882a593Smuzhiyun * of breaking, it will return -EWOULDBLOCK and return a reference to the
4111*4882a593Smuzhiyun * inode in delegated_inode. The caller should then break the delegation
4112*4882a593Smuzhiyun * and retry. Because breaking a delegation may take a long time, the
4113*4882a593Smuzhiyun * caller should drop the i_mutex before doing so.
4114*4882a593Smuzhiyun *
4115*4882a593Smuzhiyun * Alternatively, a caller may pass NULL for delegated_inode. This may
4116*4882a593Smuzhiyun * be appropriate for callers that expect the underlying filesystem not
4117*4882a593Smuzhiyun * to be NFS exported.
4118*4882a593Smuzhiyun */
vfs_link(struct dentry * old_dentry,struct inode * dir,struct dentry * new_dentry,struct inode ** delegated_inode)4119*4882a593Smuzhiyun int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
4120*4882a593Smuzhiyun {
4121*4882a593Smuzhiyun struct inode *inode = old_dentry->d_inode;
4122*4882a593Smuzhiyun unsigned max_links = dir->i_sb->s_max_links;
4123*4882a593Smuzhiyun int error;
4124*4882a593Smuzhiyun
4125*4882a593Smuzhiyun if (!inode)
4126*4882a593Smuzhiyun return -ENOENT;
4127*4882a593Smuzhiyun
4128*4882a593Smuzhiyun error = may_create(dir, new_dentry);
4129*4882a593Smuzhiyun if (error)
4130*4882a593Smuzhiyun return error;
4131*4882a593Smuzhiyun
4132*4882a593Smuzhiyun if (dir->i_sb != inode->i_sb)
4133*4882a593Smuzhiyun return -EXDEV;
4134*4882a593Smuzhiyun
4135*4882a593Smuzhiyun /*
4136*4882a593Smuzhiyun * A link to an append-only or immutable file cannot be created.
4137*4882a593Smuzhiyun */
4138*4882a593Smuzhiyun if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4139*4882a593Smuzhiyun return -EPERM;
4140*4882a593Smuzhiyun /*
4141*4882a593Smuzhiyun * Updating the link count will likely cause i_uid and i_gid to
4142*4882a593Smuzhiyun * be writen back improperly if their true value is unknown to
4143*4882a593Smuzhiyun * the vfs.
4144*4882a593Smuzhiyun */
4145*4882a593Smuzhiyun if (HAS_UNMAPPED_ID(inode))
4146*4882a593Smuzhiyun return -EPERM;
4147*4882a593Smuzhiyun if (!dir->i_op->link)
4148*4882a593Smuzhiyun return -EPERM;
4149*4882a593Smuzhiyun if (S_ISDIR(inode->i_mode))
4150*4882a593Smuzhiyun return -EPERM;
4151*4882a593Smuzhiyun
4152*4882a593Smuzhiyun error = security_inode_link(old_dentry, dir, new_dentry);
4153*4882a593Smuzhiyun if (error)
4154*4882a593Smuzhiyun return error;
4155*4882a593Smuzhiyun
4156*4882a593Smuzhiyun inode_lock(inode);
4157*4882a593Smuzhiyun /* Make sure we don't allow creating hardlink to an unlinked file */
4158*4882a593Smuzhiyun if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
4159*4882a593Smuzhiyun error = -ENOENT;
4160*4882a593Smuzhiyun else if (max_links && inode->i_nlink >= max_links)
4161*4882a593Smuzhiyun error = -EMLINK;
4162*4882a593Smuzhiyun else {
4163*4882a593Smuzhiyun error = try_break_deleg(inode, delegated_inode);
4164*4882a593Smuzhiyun if (!error)
4165*4882a593Smuzhiyun error = dir->i_op->link(old_dentry, dir, new_dentry);
4166*4882a593Smuzhiyun }
4167*4882a593Smuzhiyun
4168*4882a593Smuzhiyun if (!error && (inode->i_state & I_LINKABLE)) {
4169*4882a593Smuzhiyun spin_lock(&inode->i_lock);
4170*4882a593Smuzhiyun inode->i_state &= ~I_LINKABLE;
4171*4882a593Smuzhiyun spin_unlock(&inode->i_lock);
4172*4882a593Smuzhiyun }
4173*4882a593Smuzhiyun inode_unlock(inode);
4174*4882a593Smuzhiyun if (!error)
4175*4882a593Smuzhiyun fsnotify_link(dir, inode, new_dentry);
4176*4882a593Smuzhiyun return error;
4177*4882a593Smuzhiyun }
4178*4882a593Smuzhiyun EXPORT_SYMBOL_NS(vfs_link, ANDROID_GKI_VFS_EXPORT_ONLY);
4179*4882a593Smuzhiyun
4180*4882a593Smuzhiyun /*
4181*4882a593Smuzhiyun * Hardlinks are often used in delicate situations. We avoid
4182*4882a593Smuzhiyun * security-related surprises by not following symlinks on the
4183*4882a593Smuzhiyun * newname. --KAB
4184*4882a593Smuzhiyun *
4185*4882a593Smuzhiyun * We don't follow them on the oldname either to be compatible
4186*4882a593Smuzhiyun * with linux 2.0, and to avoid hard-linking to directories
4187*4882a593Smuzhiyun * and other special files. --ADM
4188*4882a593Smuzhiyun */
do_linkat(int olddfd,const char __user * oldname,int newdfd,const char __user * newname,int flags)4189*4882a593Smuzhiyun static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
4190*4882a593Smuzhiyun const char __user *newname, int flags)
4191*4882a593Smuzhiyun {
4192*4882a593Smuzhiyun struct dentry *new_dentry;
4193*4882a593Smuzhiyun struct path old_path, new_path;
4194*4882a593Smuzhiyun struct inode *delegated_inode = NULL;
4195*4882a593Smuzhiyun int how = 0;
4196*4882a593Smuzhiyun int error;
4197*4882a593Smuzhiyun
4198*4882a593Smuzhiyun if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
4199*4882a593Smuzhiyun return -EINVAL;
4200*4882a593Smuzhiyun /*
4201*4882a593Smuzhiyun * To use null names we require CAP_DAC_READ_SEARCH
4202*4882a593Smuzhiyun * This ensures that not everyone will be able to create
4203*4882a593Smuzhiyun * handlink using the passed filedescriptor.
4204*4882a593Smuzhiyun */
4205*4882a593Smuzhiyun if (flags & AT_EMPTY_PATH) {
4206*4882a593Smuzhiyun if (!capable(CAP_DAC_READ_SEARCH))
4207*4882a593Smuzhiyun return -ENOENT;
4208*4882a593Smuzhiyun how = LOOKUP_EMPTY;
4209*4882a593Smuzhiyun }
4210*4882a593Smuzhiyun
4211*4882a593Smuzhiyun if (flags & AT_SYMLINK_FOLLOW)
4212*4882a593Smuzhiyun how |= LOOKUP_FOLLOW;
4213*4882a593Smuzhiyun retry:
4214*4882a593Smuzhiyun error = user_path_at(olddfd, oldname, how, &old_path);
4215*4882a593Smuzhiyun if (error)
4216*4882a593Smuzhiyun return error;
4217*4882a593Smuzhiyun
4218*4882a593Smuzhiyun new_dentry = user_path_create(newdfd, newname, &new_path,
4219*4882a593Smuzhiyun (how & LOOKUP_REVAL));
4220*4882a593Smuzhiyun error = PTR_ERR(new_dentry);
4221*4882a593Smuzhiyun if (IS_ERR(new_dentry))
4222*4882a593Smuzhiyun goto out;
4223*4882a593Smuzhiyun
4224*4882a593Smuzhiyun error = -EXDEV;
4225*4882a593Smuzhiyun if (old_path.mnt != new_path.mnt)
4226*4882a593Smuzhiyun goto out_dput;
4227*4882a593Smuzhiyun error = may_linkat(&old_path);
4228*4882a593Smuzhiyun if (unlikely(error))
4229*4882a593Smuzhiyun goto out_dput;
4230*4882a593Smuzhiyun error = security_path_link(old_path.dentry, &new_path, new_dentry);
4231*4882a593Smuzhiyun if (error)
4232*4882a593Smuzhiyun goto out_dput;
4233*4882a593Smuzhiyun error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
4234*4882a593Smuzhiyun out_dput:
4235*4882a593Smuzhiyun done_path_create(&new_path, new_dentry);
4236*4882a593Smuzhiyun if (delegated_inode) {
4237*4882a593Smuzhiyun error = break_deleg_wait(&delegated_inode);
4238*4882a593Smuzhiyun if (!error) {
4239*4882a593Smuzhiyun path_put(&old_path);
4240*4882a593Smuzhiyun goto retry;
4241*4882a593Smuzhiyun }
4242*4882a593Smuzhiyun }
4243*4882a593Smuzhiyun if (retry_estale(error, how)) {
4244*4882a593Smuzhiyun path_put(&old_path);
4245*4882a593Smuzhiyun how |= LOOKUP_REVAL;
4246*4882a593Smuzhiyun goto retry;
4247*4882a593Smuzhiyun }
4248*4882a593Smuzhiyun out:
4249*4882a593Smuzhiyun path_put(&old_path);
4250*4882a593Smuzhiyun
4251*4882a593Smuzhiyun return error;
4252*4882a593Smuzhiyun }
4253*4882a593Smuzhiyun
SYSCALL_DEFINE5(linkat,int,olddfd,const char __user *,oldname,int,newdfd,const char __user *,newname,int,flags)4254*4882a593Smuzhiyun SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
4255*4882a593Smuzhiyun int, newdfd, const char __user *, newname, int, flags)
4256*4882a593Smuzhiyun {
4257*4882a593Smuzhiyun return do_linkat(olddfd, oldname, newdfd, newname, flags);
4258*4882a593Smuzhiyun }
4259*4882a593Smuzhiyun
SYSCALL_DEFINE2(link,const char __user *,oldname,const char __user *,newname)4260*4882a593Smuzhiyun SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
4261*4882a593Smuzhiyun {
4262*4882a593Smuzhiyun return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
4263*4882a593Smuzhiyun }
4264*4882a593Smuzhiyun
4265*4882a593Smuzhiyun /**
4266*4882a593Smuzhiyun * vfs_rename - rename a filesystem object
4267*4882a593Smuzhiyun * @old_dir: parent of source
4268*4882a593Smuzhiyun * @old_dentry: source
4269*4882a593Smuzhiyun * @new_dir: parent of destination
4270*4882a593Smuzhiyun * @new_dentry: destination
4271*4882a593Smuzhiyun * @delegated_inode: returns an inode needing a delegation break
4272*4882a593Smuzhiyun * @flags: rename flags
4273*4882a593Smuzhiyun *
4274*4882a593Smuzhiyun * The caller must hold multiple mutexes--see lock_rename()).
4275*4882a593Smuzhiyun *
4276*4882a593Smuzhiyun * If vfs_rename discovers a delegation in need of breaking at either
4277*4882a593Smuzhiyun * the source or destination, it will return -EWOULDBLOCK and return a
4278*4882a593Smuzhiyun * reference to the inode in delegated_inode. The caller should then
4279*4882a593Smuzhiyun * break the delegation and retry. Because breaking a delegation may
4280*4882a593Smuzhiyun * take a long time, the caller should drop all locks before doing
4281*4882a593Smuzhiyun * so.
4282*4882a593Smuzhiyun *
4283*4882a593Smuzhiyun * Alternatively, a caller may pass NULL for delegated_inode. This may
4284*4882a593Smuzhiyun * be appropriate for callers that expect the underlying filesystem not
4285*4882a593Smuzhiyun * to be NFS exported.
4286*4882a593Smuzhiyun *
4287*4882a593Smuzhiyun * The worst of all namespace operations - renaming directory. "Perverted"
4288*4882a593Smuzhiyun * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
4289*4882a593Smuzhiyun * Problems:
4290*4882a593Smuzhiyun *
4291*4882a593Smuzhiyun * a) we can get into loop creation.
4292*4882a593Smuzhiyun * b) race potential - two innocent renames can create a loop together.
4293*4882a593Smuzhiyun * That's where 4.4 screws up. Current fix: serialization on
4294*4882a593Smuzhiyun * sb->s_vfs_rename_mutex. We might be more accurate, but that's another
4295*4882a593Smuzhiyun * story.
4296*4882a593Smuzhiyun * c) we have to lock _four_ objects - parents and victim (if it exists),
4297*4882a593Smuzhiyun * and source (if it is not a directory).
4298*4882a593Smuzhiyun * And that - after we got ->i_mutex on parents (until then we don't know
4299*4882a593Smuzhiyun * whether the target exists). Solution: try to be smart with locking
4300*4882a593Smuzhiyun * order for inodes. We rely on the fact that tree topology may change
4301*4882a593Smuzhiyun * only under ->s_vfs_rename_mutex _and_ that parent of the object we
4302*4882a593Smuzhiyun * move will be locked. Thus we can rank directories by the tree
4303*4882a593Smuzhiyun * (ancestors first) and rank all non-directories after them.
4304*4882a593Smuzhiyun * That works since everybody except rename does "lock parent, lookup,
4305*4882a593Smuzhiyun * lock child" and rename is under ->s_vfs_rename_mutex.
4306*4882a593Smuzhiyun * HOWEVER, it relies on the assumption that any object with ->lookup()
4307*4882a593Smuzhiyun * has no more than 1 dentry. If "hybrid" objects will ever appear,
4308*4882a593Smuzhiyun * we'd better make sure that there's no link(2) for them.
4309*4882a593Smuzhiyun * d) conversion from fhandle to dentry may come in the wrong moment - when
4310*4882a593Smuzhiyun * we are removing the target. Solution: we will have to grab ->i_mutex
4311*4882a593Smuzhiyun * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
4312*4882a593Smuzhiyun * ->i_mutex on parents, which works but leads to some truly excessive
4313*4882a593Smuzhiyun * locking].
4314*4882a593Smuzhiyun */
vfs_rename(struct inode * old_dir,struct dentry * old_dentry,struct inode * new_dir,struct dentry * new_dentry,struct inode ** delegated_inode,unsigned int flags)4315*4882a593Smuzhiyun int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4316*4882a593Smuzhiyun struct inode *new_dir, struct dentry *new_dentry,
4317*4882a593Smuzhiyun struct inode **delegated_inode, unsigned int flags)
4318*4882a593Smuzhiyun {
4319*4882a593Smuzhiyun int error;
4320*4882a593Smuzhiyun bool is_dir = d_is_dir(old_dentry);
4321*4882a593Smuzhiyun struct inode *source = old_dentry->d_inode;
4322*4882a593Smuzhiyun struct inode *target = new_dentry->d_inode;
4323*4882a593Smuzhiyun bool new_is_dir = false;
4324*4882a593Smuzhiyun unsigned max_links = new_dir->i_sb->s_max_links;
4325*4882a593Smuzhiyun struct name_snapshot old_name;
4326*4882a593Smuzhiyun
4327*4882a593Smuzhiyun if (source == target)
4328*4882a593Smuzhiyun return 0;
4329*4882a593Smuzhiyun
4330*4882a593Smuzhiyun error = may_delete(old_dir, old_dentry, is_dir);
4331*4882a593Smuzhiyun if (error)
4332*4882a593Smuzhiyun return error;
4333*4882a593Smuzhiyun
4334*4882a593Smuzhiyun if (!target) {
4335*4882a593Smuzhiyun error = may_create(new_dir, new_dentry);
4336*4882a593Smuzhiyun } else {
4337*4882a593Smuzhiyun new_is_dir = d_is_dir(new_dentry);
4338*4882a593Smuzhiyun
4339*4882a593Smuzhiyun if (!(flags & RENAME_EXCHANGE))
4340*4882a593Smuzhiyun error = may_delete(new_dir, new_dentry, is_dir);
4341*4882a593Smuzhiyun else
4342*4882a593Smuzhiyun error = may_delete(new_dir, new_dentry, new_is_dir);
4343*4882a593Smuzhiyun }
4344*4882a593Smuzhiyun if (error)
4345*4882a593Smuzhiyun return error;
4346*4882a593Smuzhiyun
4347*4882a593Smuzhiyun if (!old_dir->i_op->rename)
4348*4882a593Smuzhiyun return -EPERM;
4349*4882a593Smuzhiyun
4350*4882a593Smuzhiyun /*
4351*4882a593Smuzhiyun * If we are going to change the parent - check write permissions,
4352*4882a593Smuzhiyun * we'll need to flip '..'.
4353*4882a593Smuzhiyun */
4354*4882a593Smuzhiyun if (new_dir != old_dir) {
4355*4882a593Smuzhiyun if (is_dir) {
4356*4882a593Smuzhiyun error = inode_permission(source, MAY_WRITE);
4357*4882a593Smuzhiyun if (error)
4358*4882a593Smuzhiyun return error;
4359*4882a593Smuzhiyun }
4360*4882a593Smuzhiyun if ((flags & RENAME_EXCHANGE) && new_is_dir) {
4361*4882a593Smuzhiyun error = inode_permission(target, MAY_WRITE);
4362*4882a593Smuzhiyun if (error)
4363*4882a593Smuzhiyun return error;
4364*4882a593Smuzhiyun }
4365*4882a593Smuzhiyun }
4366*4882a593Smuzhiyun
4367*4882a593Smuzhiyun error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
4368*4882a593Smuzhiyun flags);
4369*4882a593Smuzhiyun if (error)
4370*4882a593Smuzhiyun return error;
4371*4882a593Smuzhiyun
4372*4882a593Smuzhiyun take_dentry_name_snapshot(&old_name, old_dentry);
4373*4882a593Smuzhiyun dget(new_dentry);
4374*4882a593Smuzhiyun if (!is_dir || (flags & RENAME_EXCHANGE))
4375*4882a593Smuzhiyun lock_two_nondirectories(source, target);
4376*4882a593Smuzhiyun else if (target)
4377*4882a593Smuzhiyun inode_lock(target);
4378*4882a593Smuzhiyun
4379*4882a593Smuzhiyun error = -EBUSY;
4380*4882a593Smuzhiyun if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
4381*4882a593Smuzhiyun goto out;
4382*4882a593Smuzhiyun
4383*4882a593Smuzhiyun if (max_links && new_dir != old_dir) {
4384*4882a593Smuzhiyun error = -EMLINK;
4385*4882a593Smuzhiyun if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
4386*4882a593Smuzhiyun goto out;
4387*4882a593Smuzhiyun if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
4388*4882a593Smuzhiyun old_dir->i_nlink >= max_links)
4389*4882a593Smuzhiyun goto out;
4390*4882a593Smuzhiyun }
4391*4882a593Smuzhiyun if (!is_dir) {
4392*4882a593Smuzhiyun error = try_break_deleg(source, delegated_inode);
4393*4882a593Smuzhiyun if (error)
4394*4882a593Smuzhiyun goto out;
4395*4882a593Smuzhiyun }
4396*4882a593Smuzhiyun if (target && !new_is_dir) {
4397*4882a593Smuzhiyun error = try_break_deleg(target, delegated_inode);
4398*4882a593Smuzhiyun if (error)
4399*4882a593Smuzhiyun goto out;
4400*4882a593Smuzhiyun }
4401*4882a593Smuzhiyun error = old_dir->i_op->rename(old_dir, old_dentry,
4402*4882a593Smuzhiyun new_dir, new_dentry, flags);
4403*4882a593Smuzhiyun if (error)
4404*4882a593Smuzhiyun goto out;
4405*4882a593Smuzhiyun
4406*4882a593Smuzhiyun if (!(flags & RENAME_EXCHANGE) && target) {
4407*4882a593Smuzhiyun if (is_dir) {
4408*4882a593Smuzhiyun shrink_dcache_parent(new_dentry);
4409*4882a593Smuzhiyun target->i_flags |= S_DEAD;
4410*4882a593Smuzhiyun }
4411*4882a593Smuzhiyun dont_mount(new_dentry);
4412*4882a593Smuzhiyun detach_mounts(new_dentry);
4413*4882a593Smuzhiyun }
4414*4882a593Smuzhiyun if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
4415*4882a593Smuzhiyun if (!(flags & RENAME_EXCHANGE))
4416*4882a593Smuzhiyun d_move(old_dentry, new_dentry);
4417*4882a593Smuzhiyun else
4418*4882a593Smuzhiyun d_exchange(old_dentry, new_dentry);
4419*4882a593Smuzhiyun }
4420*4882a593Smuzhiyun out:
4421*4882a593Smuzhiyun if (!is_dir || (flags & RENAME_EXCHANGE))
4422*4882a593Smuzhiyun unlock_two_nondirectories(source, target);
4423*4882a593Smuzhiyun else if (target)
4424*4882a593Smuzhiyun inode_unlock(target);
4425*4882a593Smuzhiyun dput(new_dentry);
4426*4882a593Smuzhiyun if (!error) {
4427*4882a593Smuzhiyun fsnotify_move(old_dir, new_dir, &old_name.name, is_dir,
4428*4882a593Smuzhiyun !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
4429*4882a593Smuzhiyun if (flags & RENAME_EXCHANGE) {
4430*4882a593Smuzhiyun fsnotify_move(new_dir, old_dir, &old_dentry->d_name,
4431*4882a593Smuzhiyun new_is_dir, NULL, new_dentry);
4432*4882a593Smuzhiyun }
4433*4882a593Smuzhiyun }
4434*4882a593Smuzhiyun release_dentry_name_snapshot(&old_name);
4435*4882a593Smuzhiyun
4436*4882a593Smuzhiyun return error;
4437*4882a593Smuzhiyun }
4438*4882a593Smuzhiyun EXPORT_SYMBOL_NS(vfs_rename, ANDROID_GKI_VFS_EXPORT_ONLY);
4439*4882a593Smuzhiyun
do_renameat2(int olddfd,struct filename * from,int newdfd,struct filename * to,unsigned int flags)4440*4882a593Smuzhiyun int do_renameat2(int olddfd, struct filename *from, int newdfd,
4441*4882a593Smuzhiyun struct filename *to, unsigned int flags)
4442*4882a593Smuzhiyun {
4443*4882a593Smuzhiyun struct dentry *old_dentry, *new_dentry;
4444*4882a593Smuzhiyun struct dentry *trap;
4445*4882a593Smuzhiyun struct path old_path, new_path;
4446*4882a593Smuzhiyun struct qstr old_last, new_last;
4447*4882a593Smuzhiyun int old_type, new_type;
4448*4882a593Smuzhiyun struct inode *delegated_inode = NULL;
4449*4882a593Smuzhiyun unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
4450*4882a593Smuzhiyun bool should_retry = false;
4451*4882a593Smuzhiyun int error = -EINVAL;
4452*4882a593Smuzhiyun
4453*4882a593Smuzhiyun if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
4454*4882a593Smuzhiyun goto put_both;
4455*4882a593Smuzhiyun
4456*4882a593Smuzhiyun if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
4457*4882a593Smuzhiyun (flags & RENAME_EXCHANGE))
4458*4882a593Smuzhiyun goto put_both;
4459*4882a593Smuzhiyun
4460*4882a593Smuzhiyun if (flags & RENAME_EXCHANGE)
4461*4882a593Smuzhiyun target_flags = 0;
4462*4882a593Smuzhiyun
4463*4882a593Smuzhiyun retry:
4464*4882a593Smuzhiyun from = filename_parentat(olddfd, from, lookup_flags, &old_path,
4465*4882a593Smuzhiyun &old_last, &old_type);
4466*4882a593Smuzhiyun if (IS_ERR(from)) {
4467*4882a593Smuzhiyun error = PTR_ERR(from);
4468*4882a593Smuzhiyun goto put_new;
4469*4882a593Smuzhiyun }
4470*4882a593Smuzhiyun
4471*4882a593Smuzhiyun to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
4472*4882a593Smuzhiyun &new_type);
4473*4882a593Smuzhiyun if (IS_ERR(to)) {
4474*4882a593Smuzhiyun error = PTR_ERR(to);
4475*4882a593Smuzhiyun goto exit1;
4476*4882a593Smuzhiyun }
4477*4882a593Smuzhiyun
4478*4882a593Smuzhiyun error = -EXDEV;
4479*4882a593Smuzhiyun if (old_path.mnt != new_path.mnt)
4480*4882a593Smuzhiyun goto exit2;
4481*4882a593Smuzhiyun
4482*4882a593Smuzhiyun error = -EBUSY;
4483*4882a593Smuzhiyun if (old_type != LAST_NORM)
4484*4882a593Smuzhiyun goto exit2;
4485*4882a593Smuzhiyun
4486*4882a593Smuzhiyun if (flags & RENAME_NOREPLACE)
4487*4882a593Smuzhiyun error = -EEXIST;
4488*4882a593Smuzhiyun if (new_type != LAST_NORM)
4489*4882a593Smuzhiyun goto exit2;
4490*4882a593Smuzhiyun
4491*4882a593Smuzhiyun error = mnt_want_write(old_path.mnt);
4492*4882a593Smuzhiyun if (error)
4493*4882a593Smuzhiyun goto exit2;
4494*4882a593Smuzhiyun
4495*4882a593Smuzhiyun retry_deleg:
4496*4882a593Smuzhiyun trap = lock_rename(new_path.dentry, old_path.dentry);
4497*4882a593Smuzhiyun
4498*4882a593Smuzhiyun old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
4499*4882a593Smuzhiyun error = PTR_ERR(old_dentry);
4500*4882a593Smuzhiyun if (IS_ERR(old_dentry))
4501*4882a593Smuzhiyun goto exit3;
4502*4882a593Smuzhiyun /* source must exist */
4503*4882a593Smuzhiyun error = -ENOENT;
4504*4882a593Smuzhiyun if (d_is_negative(old_dentry))
4505*4882a593Smuzhiyun goto exit4;
4506*4882a593Smuzhiyun new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags);
4507*4882a593Smuzhiyun error = PTR_ERR(new_dentry);
4508*4882a593Smuzhiyun if (IS_ERR(new_dentry))
4509*4882a593Smuzhiyun goto exit4;
4510*4882a593Smuzhiyun error = -EEXIST;
4511*4882a593Smuzhiyun if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
4512*4882a593Smuzhiyun goto exit5;
4513*4882a593Smuzhiyun if (flags & RENAME_EXCHANGE) {
4514*4882a593Smuzhiyun error = -ENOENT;
4515*4882a593Smuzhiyun if (d_is_negative(new_dentry))
4516*4882a593Smuzhiyun goto exit5;
4517*4882a593Smuzhiyun
4518*4882a593Smuzhiyun if (!d_is_dir(new_dentry)) {
4519*4882a593Smuzhiyun error = -ENOTDIR;
4520*4882a593Smuzhiyun if (new_last.name[new_last.len])
4521*4882a593Smuzhiyun goto exit5;
4522*4882a593Smuzhiyun }
4523*4882a593Smuzhiyun }
4524*4882a593Smuzhiyun /* unless the source is a directory trailing slashes give -ENOTDIR */
4525*4882a593Smuzhiyun if (!d_is_dir(old_dentry)) {
4526*4882a593Smuzhiyun error = -ENOTDIR;
4527*4882a593Smuzhiyun if (old_last.name[old_last.len])
4528*4882a593Smuzhiyun goto exit5;
4529*4882a593Smuzhiyun if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
4530*4882a593Smuzhiyun goto exit5;
4531*4882a593Smuzhiyun }
4532*4882a593Smuzhiyun /* source should not be ancestor of target */
4533*4882a593Smuzhiyun error = -EINVAL;
4534*4882a593Smuzhiyun if (old_dentry == trap)
4535*4882a593Smuzhiyun goto exit5;
4536*4882a593Smuzhiyun /* target should not be an ancestor of source */
4537*4882a593Smuzhiyun if (!(flags & RENAME_EXCHANGE))
4538*4882a593Smuzhiyun error = -ENOTEMPTY;
4539*4882a593Smuzhiyun if (new_dentry == trap)
4540*4882a593Smuzhiyun goto exit5;
4541*4882a593Smuzhiyun
4542*4882a593Smuzhiyun error = security_path_rename(&old_path, old_dentry,
4543*4882a593Smuzhiyun &new_path, new_dentry, flags);
4544*4882a593Smuzhiyun if (error)
4545*4882a593Smuzhiyun goto exit5;
4546*4882a593Smuzhiyun error = vfs_rename(old_path.dentry->d_inode, old_dentry,
4547*4882a593Smuzhiyun new_path.dentry->d_inode, new_dentry,
4548*4882a593Smuzhiyun &delegated_inode, flags);
4549*4882a593Smuzhiyun exit5:
4550*4882a593Smuzhiyun dput(new_dentry);
4551*4882a593Smuzhiyun exit4:
4552*4882a593Smuzhiyun dput(old_dentry);
4553*4882a593Smuzhiyun exit3:
4554*4882a593Smuzhiyun unlock_rename(new_path.dentry, old_path.dentry);
4555*4882a593Smuzhiyun if (delegated_inode) {
4556*4882a593Smuzhiyun error = break_deleg_wait(&delegated_inode);
4557*4882a593Smuzhiyun if (!error)
4558*4882a593Smuzhiyun goto retry_deleg;
4559*4882a593Smuzhiyun }
4560*4882a593Smuzhiyun mnt_drop_write(old_path.mnt);
4561*4882a593Smuzhiyun exit2:
4562*4882a593Smuzhiyun if (retry_estale(error, lookup_flags))
4563*4882a593Smuzhiyun should_retry = true;
4564*4882a593Smuzhiyun path_put(&new_path);
4565*4882a593Smuzhiyun exit1:
4566*4882a593Smuzhiyun path_put(&old_path);
4567*4882a593Smuzhiyun if (should_retry) {
4568*4882a593Smuzhiyun should_retry = false;
4569*4882a593Smuzhiyun lookup_flags |= LOOKUP_REVAL;
4570*4882a593Smuzhiyun goto retry;
4571*4882a593Smuzhiyun }
4572*4882a593Smuzhiyun put_both:
4573*4882a593Smuzhiyun if (!IS_ERR(from))
4574*4882a593Smuzhiyun putname(from);
4575*4882a593Smuzhiyun put_new:
4576*4882a593Smuzhiyun if (!IS_ERR(to))
4577*4882a593Smuzhiyun putname(to);
4578*4882a593Smuzhiyun return error;
4579*4882a593Smuzhiyun }
4580*4882a593Smuzhiyun
SYSCALL_DEFINE5(renameat2,int,olddfd,const char __user *,oldname,int,newdfd,const char __user *,newname,unsigned int,flags)4581*4882a593Smuzhiyun SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
4582*4882a593Smuzhiyun int, newdfd, const char __user *, newname, unsigned int, flags)
4583*4882a593Smuzhiyun {
4584*4882a593Smuzhiyun return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
4585*4882a593Smuzhiyun flags);
4586*4882a593Smuzhiyun }
4587*4882a593Smuzhiyun
SYSCALL_DEFINE4(renameat,int,olddfd,const char __user *,oldname,int,newdfd,const char __user *,newname)4588*4882a593Smuzhiyun SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
4589*4882a593Smuzhiyun int, newdfd, const char __user *, newname)
4590*4882a593Smuzhiyun {
4591*4882a593Smuzhiyun return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
4592*4882a593Smuzhiyun 0);
4593*4882a593Smuzhiyun }
4594*4882a593Smuzhiyun
SYSCALL_DEFINE2(rename,const char __user *,oldname,const char __user *,newname)4595*4882a593Smuzhiyun SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
4596*4882a593Smuzhiyun {
4597*4882a593Smuzhiyun return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
4598*4882a593Smuzhiyun getname(newname), 0);
4599*4882a593Smuzhiyun }
4600*4882a593Smuzhiyun
readlink_copy(char __user * buffer,int buflen,const char * link)4601*4882a593Smuzhiyun int readlink_copy(char __user *buffer, int buflen, const char *link)
4602*4882a593Smuzhiyun {
4603*4882a593Smuzhiyun int len = PTR_ERR(link);
4604*4882a593Smuzhiyun if (IS_ERR(link))
4605*4882a593Smuzhiyun goto out;
4606*4882a593Smuzhiyun
4607*4882a593Smuzhiyun len = strlen(link);
4608*4882a593Smuzhiyun if (len > (unsigned) buflen)
4609*4882a593Smuzhiyun len = buflen;
4610*4882a593Smuzhiyun if (copy_to_user(buffer, link, len))
4611*4882a593Smuzhiyun len = -EFAULT;
4612*4882a593Smuzhiyun out:
4613*4882a593Smuzhiyun return len;
4614*4882a593Smuzhiyun }
4615*4882a593Smuzhiyun
4616*4882a593Smuzhiyun /**
4617*4882a593Smuzhiyun * vfs_readlink - copy symlink body into userspace buffer
4618*4882a593Smuzhiyun * @dentry: dentry on which to get symbolic link
4619*4882a593Smuzhiyun * @buffer: user memory pointer
4620*4882a593Smuzhiyun * @buflen: size of buffer
4621*4882a593Smuzhiyun *
4622*4882a593Smuzhiyun * Does not touch atime. That's up to the caller if necessary
4623*4882a593Smuzhiyun *
4624*4882a593Smuzhiyun * Does not call security hook.
4625*4882a593Smuzhiyun */
vfs_readlink(struct dentry * dentry,char __user * buffer,int buflen)4626*4882a593Smuzhiyun int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
4627*4882a593Smuzhiyun {
4628*4882a593Smuzhiyun struct inode *inode = d_inode(dentry);
4629*4882a593Smuzhiyun DEFINE_DELAYED_CALL(done);
4630*4882a593Smuzhiyun const char *link;
4631*4882a593Smuzhiyun int res;
4632*4882a593Smuzhiyun
4633*4882a593Smuzhiyun if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
4634*4882a593Smuzhiyun if (unlikely(inode->i_op->readlink))
4635*4882a593Smuzhiyun return inode->i_op->readlink(dentry, buffer, buflen);
4636*4882a593Smuzhiyun
4637*4882a593Smuzhiyun if (!d_is_symlink(dentry))
4638*4882a593Smuzhiyun return -EINVAL;
4639*4882a593Smuzhiyun
4640*4882a593Smuzhiyun spin_lock(&inode->i_lock);
4641*4882a593Smuzhiyun inode->i_opflags |= IOP_DEFAULT_READLINK;
4642*4882a593Smuzhiyun spin_unlock(&inode->i_lock);
4643*4882a593Smuzhiyun }
4644*4882a593Smuzhiyun
4645*4882a593Smuzhiyun link = READ_ONCE(inode->i_link);
4646*4882a593Smuzhiyun if (!link) {
4647*4882a593Smuzhiyun link = inode->i_op->get_link(dentry, inode, &done);
4648*4882a593Smuzhiyun if (IS_ERR(link))
4649*4882a593Smuzhiyun return PTR_ERR(link);
4650*4882a593Smuzhiyun }
4651*4882a593Smuzhiyun res = readlink_copy(buffer, buflen, link);
4652*4882a593Smuzhiyun do_delayed_call(&done);
4653*4882a593Smuzhiyun return res;
4654*4882a593Smuzhiyun }
4655*4882a593Smuzhiyun EXPORT_SYMBOL(vfs_readlink);
4656*4882a593Smuzhiyun
4657*4882a593Smuzhiyun /**
4658*4882a593Smuzhiyun * vfs_get_link - get symlink body
4659*4882a593Smuzhiyun * @dentry: dentry on which to get symbolic link
4660*4882a593Smuzhiyun * @done: caller needs to free returned data with this
4661*4882a593Smuzhiyun *
4662*4882a593Smuzhiyun * Calls security hook and i_op->get_link() on the supplied inode.
4663*4882a593Smuzhiyun *
4664*4882a593Smuzhiyun * It does not touch atime. That's up to the caller if necessary.
4665*4882a593Smuzhiyun *
4666*4882a593Smuzhiyun * Does not work on "special" symlinks like /proc/$$/fd/N
4667*4882a593Smuzhiyun */
vfs_get_link(struct dentry * dentry,struct delayed_call * done)4668*4882a593Smuzhiyun const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
4669*4882a593Smuzhiyun {
4670*4882a593Smuzhiyun const char *res = ERR_PTR(-EINVAL);
4671*4882a593Smuzhiyun struct inode *inode = d_inode(dentry);
4672*4882a593Smuzhiyun
4673*4882a593Smuzhiyun if (d_is_symlink(dentry)) {
4674*4882a593Smuzhiyun res = ERR_PTR(security_inode_readlink(dentry));
4675*4882a593Smuzhiyun if (!res)
4676*4882a593Smuzhiyun res = inode->i_op->get_link(dentry, inode, done);
4677*4882a593Smuzhiyun }
4678*4882a593Smuzhiyun return res;
4679*4882a593Smuzhiyun }
4680*4882a593Smuzhiyun EXPORT_SYMBOL(vfs_get_link);
4681*4882a593Smuzhiyun
4682*4882a593Smuzhiyun /* get the link contents into pagecache */
page_get_link(struct dentry * dentry,struct inode * inode,struct delayed_call * callback)4683*4882a593Smuzhiyun const char *page_get_link(struct dentry *dentry, struct inode *inode,
4684*4882a593Smuzhiyun struct delayed_call *callback)
4685*4882a593Smuzhiyun {
4686*4882a593Smuzhiyun char *kaddr;
4687*4882a593Smuzhiyun struct page *page;
4688*4882a593Smuzhiyun struct address_space *mapping = inode->i_mapping;
4689*4882a593Smuzhiyun
4690*4882a593Smuzhiyun if (!dentry) {
4691*4882a593Smuzhiyun page = find_get_page(mapping, 0);
4692*4882a593Smuzhiyun if (!page)
4693*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
4694*4882a593Smuzhiyun if (!PageUptodate(page)) {
4695*4882a593Smuzhiyun put_page(page);
4696*4882a593Smuzhiyun return ERR_PTR(-ECHILD);
4697*4882a593Smuzhiyun }
4698*4882a593Smuzhiyun } else {
4699*4882a593Smuzhiyun page = read_mapping_page(mapping, 0, NULL);
4700*4882a593Smuzhiyun if (IS_ERR(page))
4701*4882a593Smuzhiyun return (char*)page;
4702*4882a593Smuzhiyun }
4703*4882a593Smuzhiyun set_delayed_call(callback, page_put_link, page);
4704*4882a593Smuzhiyun BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
4705*4882a593Smuzhiyun kaddr = page_address(page);
4706*4882a593Smuzhiyun nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
4707*4882a593Smuzhiyun return kaddr;
4708*4882a593Smuzhiyun }
4709*4882a593Smuzhiyun
4710*4882a593Smuzhiyun EXPORT_SYMBOL(page_get_link);
4711*4882a593Smuzhiyun
page_put_link(void * arg)4712*4882a593Smuzhiyun void page_put_link(void *arg)
4713*4882a593Smuzhiyun {
4714*4882a593Smuzhiyun put_page(arg);
4715*4882a593Smuzhiyun }
4716*4882a593Smuzhiyun EXPORT_SYMBOL(page_put_link);
4717*4882a593Smuzhiyun
page_readlink(struct dentry * dentry,char __user * buffer,int buflen)4718*4882a593Smuzhiyun int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
4719*4882a593Smuzhiyun {
4720*4882a593Smuzhiyun DEFINE_DELAYED_CALL(done);
4721*4882a593Smuzhiyun int res = readlink_copy(buffer, buflen,
4722*4882a593Smuzhiyun page_get_link(dentry, d_inode(dentry),
4723*4882a593Smuzhiyun &done));
4724*4882a593Smuzhiyun do_delayed_call(&done);
4725*4882a593Smuzhiyun return res;
4726*4882a593Smuzhiyun }
4727*4882a593Smuzhiyun EXPORT_SYMBOL(page_readlink);
4728*4882a593Smuzhiyun
4729*4882a593Smuzhiyun /*
4730*4882a593Smuzhiyun * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
4731*4882a593Smuzhiyun */
__page_symlink(struct inode * inode,const char * symname,int len,int nofs)4732*4882a593Smuzhiyun int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
4733*4882a593Smuzhiyun {
4734*4882a593Smuzhiyun struct address_space *mapping = inode->i_mapping;
4735*4882a593Smuzhiyun struct page *page;
4736*4882a593Smuzhiyun void *fsdata = NULL;
4737*4882a593Smuzhiyun int err;
4738*4882a593Smuzhiyun unsigned int flags = 0;
4739*4882a593Smuzhiyun if (nofs)
4740*4882a593Smuzhiyun flags |= AOP_FLAG_NOFS;
4741*4882a593Smuzhiyun
4742*4882a593Smuzhiyun retry:
4743*4882a593Smuzhiyun err = pagecache_write_begin(NULL, mapping, 0, len-1,
4744*4882a593Smuzhiyun flags, &page, &fsdata);
4745*4882a593Smuzhiyun if (err)
4746*4882a593Smuzhiyun goto fail;
4747*4882a593Smuzhiyun
4748*4882a593Smuzhiyun memcpy(page_address(page), symname, len-1);
4749*4882a593Smuzhiyun
4750*4882a593Smuzhiyun err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
4751*4882a593Smuzhiyun page, fsdata);
4752*4882a593Smuzhiyun if (err < 0)
4753*4882a593Smuzhiyun goto fail;
4754*4882a593Smuzhiyun if (err < len-1)
4755*4882a593Smuzhiyun goto retry;
4756*4882a593Smuzhiyun
4757*4882a593Smuzhiyun mark_inode_dirty(inode);
4758*4882a593Smuzhiyun return 0;
4759*4882a593Smuzhiyun fail:
4760*4882a593Smuzhiyun return err;
4761*4882a593Smuzhiyun }
4762*4882a593Smuzhiyun EXPORT_SYMBOL(__page_symlink);
4763*4882a593Smuzhiyun
page_symlink(struct inode * inode,const char * symname,int len)4764*4882a593Smuzhiyun int page_symlink(struct inode *inode, const char *symname, int len)
4765*4882a593Smuzhiyun {
4766*4882a593Smuzhiyun return __page_symlink(inode, symname, len,
4767*4882a593Smuzhiyun !mapping_gfp_constraint(inode->i_mapping, __GFP_FS));
4768*4882a593Smuzhiyun }
4769*4882a593Smuzhiyun EXPORT_SYMBOL(page_symlink);
4770*4882a593Smuzhiyun
4771*4882a593Smuzhiyun const struct inode_operations page_symlink_inode_operations = {
4772*4882a593Smuzhiyun .get_link = page_get_link,
4773*4882a593Smuzhiyun };
4774*4882a593Smuzhiyun EXPORT_SYMBOL(page_symlink_inode_operations);
4775