1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 *
4 * Copyright (C) 2011 Novell Inc.
5 */
6
7 #include <uapi/linux/magic.h>
8 #include <linux/fs.h>
9 #include <linux/namei.h>
10 #include <linux/xattr.h>
11 #include <linux/mount.h>
12 #include <linux/parser.h>
13 #include <linux/module.h>
14 #include <linux/statfs.h>
15 #include <linux/seq_file.h>
16 #include <linux/posix_acl_xattr.h>
17 #include <linux/exportfs.h>
18 #include "overlayfs.h"
19
20 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
21 MODULE_DESCRIPTION("Overlay filesystem");
22 MODULE_LICENSE("GPL");
23 MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY);
24
25
26 struct ovl_dir_cache;
27
28 #define OVL_MAX_STACK 500
29
30 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
31 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
32 MODULE_PARM_DESC(redirect_dir,
33 "Default to on or off for the redirect_dir feature");
34
35 static bool ovl_redirect_always_follow =
36 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
37 module_param_named(redirect_always_follow, ovl_redirect_always_follow,
38 bool, 0644);
39 MODULE_PARM_DESC(redirect_always_follow,
40 "Follow redirects even if redirect_dir feature is turned off");
41
42 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
43 module_param_named(index, ovl_index_def, bool, 0644);
44 MODULE_PARM_DESC(index,
45 "Default to on or off for the inodes index feature");
46
47 static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
48 module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
49 MODULE_PARM_DESC(nfs_export,
50 "Default to on or off for the NFS export feature");
51
52 static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
53 module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
54 MODULE_PARM_DESC(xino_auto,
55 "Auto enable xino feature");
56
57 static bool __read_mostly ovl_override_creds_def = true;
58 module_param_named(override_creds, ovl_override_creds_def, bool, 0644);
59 MODULE_PARM_DESC(ovl_override_creds_def,
60 "Use mounter's credentials for accesses");
61
ovl_entry_stack_free(struct ovl_entry * oe)62 static void ovl_entry_stack_free(struct ovl_entry *oe)
63 {
64 unsigned int i;
65
66 for (i = 0; i < oe->numlower; i++)
67 dput(oe->lowerstack[i].dentry);
68 }
69
70 static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
71 module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
72 MODULE_PARM_DESC(metacopy,
73 "Default to on or off for the metadata only copy up feature");
74
ovl_dentry_release(struct dentry * dentry)75 static void ovl_dentry_release(struct dentry *dentry)
76 {
77 struct ovl_entry *oe = dentry->d_fsdata;
78
79 if (oe) {
80 ovl_entry_stack_free(oe);
81 kfree_rcu(oe, rcu);
82 }
83 }
84
ovl_d_real(struct dentry * dentry,const struct inode * inode)85 static struct dentry *ovl_d_real(struct dentry *dentry,
86 const struct inode *inode)
87 {
88 struct dentry *real = NULL, *lower;
89
90 /* It's an overlay file */
91 if (inode && d_inode(dentry) == inode)
92 return dentry;
93
94 if (!d_is_reg(dentry)) {
95 if (!inode || inode == d_inode(dentry))
96 return dentry;
97 goto bug;
98 }
99
100 real = ovl_dentry_upper(dentry);
101 if (real && (inode == d_inode(real)))
102 return real;
103
104 if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
105 return real;
106
107 lower = ovl_dentry_lowerdata(dentry);
108 if (!lower)
109 goto bug;
110 real = lower;
111
112 /* Handle recursion */
113 real = d_real(real, inode);
114
115 if (!inode || inode == d_inode(real))
116 return real;
117 bug:
118 WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
119 __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
120 inode ? inode->i_ino : 0, real,
121 real && d_inode(real) ? d_inode(real)->i_ino : 0);
122 return dentry;
123 }
124
ovl_revalidate_real(struct dentry * d,unsigned int flags,bool weak)125 static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
126 {
127 int ret = 1;
128
129 if (weak) {
130 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
131 ret = d->d_op->d_weak_revalidate(d, flags);
132 } else if (d->d_flags & DCACHE_OP_REVALIDATE) {
133 ret = d->d_op->d_revalidate(d, flags);
134 if (!ret) {
135 if (!(flags & LOOKUP_RCU))
136 d_invalidate(d);
137 ret = -ESTALE;
138 }
139 }
140 return ret;
141 }
142
ovl_dentry_revalidate_common(struct dentry * dentry,unsigned int flags,bool weak)143 static int ovl_dentry_revalidate_common(struct dentry *dentry,
144 unsigned int flags, bool weak)
145 {
146 struct ovl_entry *oe = dentry->d_fsdata;
147 struct dentry *upper;
148 unsigned int i;
149 int ret = 1;
150
151 upper = ovl_dentry_upper(dentry);
152 if (upper)
153 ret = ovl_revalidate_real(upper, flags, weak);
154
155 for (i = 0; ret > 0 && i < oe->numlower; i++) {
156 ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags,
157 weak);
158 }
159 return ret;
160 }
161
ovl_dentry_revalidate(struct dentry * dentry,unsigned int flags)162 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
163 {
164 return ovl_dentry_revalidate_common(dentry, flags, false);
165 }
166
ovl_dentry_weak_revalidate(struct dentry * dentry,unsigned int flags)167 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
168 {
169 return ovl_dentry_revalidate_common(dentry, flags, true);
170 }
171
172 static const struct dentry_operations ovl_dentry_operations = {
173 .d_release = ovl_dentry_release,
174 .d_real = ovl_d_real,
175 .d_revalidate = ovl_dentry_revalidate,
176 .d_weak_revalidate = ovl_dentry_weak_revalidate,
177 };
178
179 static struct kmem_cache *ovl_inode_cachep;
180
ovl_alloc_inode(struct super_block * sb)181 static struct inode *ovl_alloc_inode(struct super_block *sb)
182 {
183 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
184
185 if (!oi)
186 return NULL;
187
188 oi->cache = NULL;
189 oi->redirect = NULL;
190 oi->version = 0;
191 oi->flags = 0;
192 oi->__upperdentry = NULL;
193 oi->lower = NULL;
194 oi->lowerdata = NULL;
195 mutex_init(&oi->lock);
196
197 return &oi->vfs_inode;
198 }
199
ovl_free_inode(struct inode * inode)200 static void ovl_free_inode(struct inode *inode)
201 {
202 struct ovl_inode *oi = OVL_I(inode);
203
204 kfree(oi->redirect);
205 mutex_destroy(&oi->lock);
206 kmem_cache_free(ovl_inode_cachep, oi);
207 }
208
ovl_destroy_inode(struct inode * inode)209 static void ovl_destroy_inode(struct inode *inode)
210 {
211 struct ovl_inode *oi = OVL_I(inode);
212
213 dput(oi->__upperdentry);
214 iput(oi->lower);
215 if (S_ISDIR(inode->i_mode))
216 ovl_dir_cache_free(inode);
217 else
218 iput(oi->lowerdata);
219 }
220
ovl_free_fs(struct ovl_fs * ofs)221 static void ovl_free_fs(struct ovl_fs *ofs)
222 {
223 struct vfsmount **mounts;
224 unsigned i;
225
226 iput(ofs->workbasedir_trap);
227 iput(ofs->indexdir_trap);
228 iput(ofs->workdir_trap);
229 dput(ofs->whiteout);
230 dput(ofs->indexdir);
231 dput(ofs->workdir);
232 if (ofs->workdir_locked)
233 ovl_inuse_unlock(ofs->workbasedir);
234 dput(ofs->workbasedir);
235 if (ofs->upperdir_locked)
236 ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
237
238 /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */
239 mounts = (struct vfsmount **) ofs->layers;
240 for (i = 0; i < ofs->numlayer; i++) {
241 iput(ofs->layers[i].trap);
242 mounts[i] = ofs->layers[i].mnt;
243 }
244 kern_unmount_array(mounts, ofs->numlayer);
245 kfree(ofs->layers);
246 for (i = 0; i < ofs->numfs; i++)
247 free_anon_bdev(ofs->fs[i].pseudo_dev);
248 kfree(ofs->fs);
249
250 kfree(ofs->config.lowerdir);
251 kfree(ofs->config.upperdir);
252 kfree(ofs->config.workdir);
253 kfree(ofs->config.redirect_mode);
254 if (ofs->creator_cred)
255 put_cred(ofs->creator_cred);
256 kfree(ofs);
257 }
258
ovl_put_super(struct super_block * sb)259 static void ovl_put_super(struct super_block *sb)
260 {
261 struct ovl_fs *ofs = sb->s_fs_info;
262
263 ovl_free_fs(ofs);
264 }
265
266 /* Sync real dirty inodes in upper filesystem (if it exists) */
ovl_sync_fs(struct super_block * sb,int wait)267 static int ovl_sync_fs(struct super_block *sb, int wait)
268 {
269 struct ovl_fs *ofs = sb->s_fs_info;
270 struct super_block *upper_sb;
271 int ret;
272
273 ret = ovl_sync_status(ofs);
274 /*
275 * We have to always set the err, because the return value isn't
276 * checked in syncfs, and instead indirectly return an error via
277 * the sb's writeback errseq, which VFS inspects after this call.
278 */
279 if (ret < 0) {
280 errseq_set(&sb->s_wb_err, -EIO);
281 return -EIO;
282 }
283
284 if (!ret)
285 return ret;
286
287 /*
288 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
289 * All the super blocks will be iterated, including upper_sb.
290 *
291 * If this is a syncfs(2) call, then we do need to call
292 * sync_filesystem() on upper_sb, but enough if we do it when being
293 * called with wait == 1.
294 */
295 if (!wait)
296 return 0;
297
298 upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
299
300 down_read(&upper_sb->s_umount);
301 ret = sync_filesystem(upper_sb);
302 up_read(&upper_sb->s_umount);
303
304 return ret;
305 }
306
307 /**
308 * ovl_statfs
309 * @sb: The overlayfs super block
310 * @buf: The struct kstatfs to fill in with stats
311 *
312 * Get the filesystem statistics. As writes always target the upper layer
313 * filesystem pass the statfs to the upper filesystem (if it exists)
314 */
ovl_statfs(struct dentry * dentry,struct kstatfs * buf)315 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
316 {
317 struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
318 struct dentry *root_dentry = dentry->d_sb->s_root;
319 struct path path;
320 int err;
321
322 ovl_path_real(root_dentry, &path);
323
324 err = vfs_statfs(&path, buf);
325 if (!err) {
326 buf->f_namelen = ofs->namelen;
327 buf->f_type = OVERLAYFS_SUPER_MAGIC;
328 }
329
330 return err;
331 }
332
333 /* Will this overlay be forced to mount/remount ro? */
ovl_force_readonly(struct ovl_fs * ofs)334 static bool ovl_force_readonly(struct ovl_fs *ofs)
335 {
336 return (!ovl_upper_mnt(ofs) || !ofs->workdir);
337 }
338
ovl_redirect_mode_def(void)339 static const char *ovl_redirect_mode_def(void)
340 {
341 return ovl_redirect_dir_def ? "on" : "off";
342 }
343
344 static const char * const ovl_xino_str[] = {
345 "off",
346 "auto",
347 "on",
348 };
349
ovl_xino_def(void)350 static inline int ovl_xino_def(void)
351 {
352 return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
353 }
354
355 /**
356 * ovl_show_options
357 *
358 * Prints the mount options for a given superblock.
359 * Returns zero; does not fail.
360 */
ovl_show_options(struct seq_file * m,struct dentry * dentry)361 static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
362 {
363 struct super_block *sb = dentry->d_sb;
364 struct ovl_fs *ofs = sb->s_fs_info;
365
366 seq_show_option(m, "lowerdir", ofs->config.lowerdir);
367 if (ofs->config.upperdir) {
368 seq_show_option(m, "upperdir", ofs->config.upperdir);
369 seq_show_option(m, "workdir", ofs->config.workdir);
370 }
371 if (ofs->config.default_permissions)
372 seq_puts(m, ",default_permissions");
373 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
374 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
375 if (ofs->config.index != ovl_index_def)
376 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
377 if (ofs->config.nfs_export != ovl_nfs_export_def)
378 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
379 "on" : "off");
380 if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
381 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
382 if (ofs->config.metacopy != ovl_metacopy_def)
383 seq_printf(m, ",metacopy=%s",
384 ofs->config.metacopy ? "on" : "off");
385 if (ofs->config.ovl_volatile)
386 seq_puts(m, ",volatile");
387 if (ofs->config.override_creds != ovl_override_creds_def)
388 seq_show_option(m, "override_creds",
389 ofs->config.override_creds ? "on" : "off");
390 return 0;
391 }
392
ovl_remount(struct super_block * sb,int * flags,char * data)393 static int ovl_remount(struct super_block *sb, int *flags, char *data)
394 {
395 struct ovl_fs *ofs = sb->s_fs_info;
396 struct super_block *upper_sb;
397 int ret = 0;
398
399 if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
400 return -EROFS;
401
402 if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
403 upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
404 if (ovl_should_sync(ofs)) {
405 down_read(&upper_sb->s_umount);
406 ret = sync_filesystem(upper_sb);
407 up_read(&upper_sb->s_umount);
408 }
409 }
410
411 return ret;
412 }
413
414 static const struct super_operations ovl_super_operations = {
415 .alloc_inode = ovl_alloc_inode,
416 .free_inode = ovl_free_inode,
417 .destroy_inode = ovl_destroy_inode,
418 .drop_inode = generic_delete_inode,
419 .put_super = ovl_put_super,
420 .sync_fs = ovl_sync_fs,
421 .statfs = ovl_statfs,
422 .show_options = ovl_show_options,
423 .remount_fs = ovl_remount,
424 };
425
426 enum {
427 OPT_LOWERDIR,
428 OPT_UPPERDIR,
429 OPT_WORKDIR,
430 OPT_DEFAULT_PERMISSIONS,
431 OPT_REDIRECT_DIR,
432 OPT_INDEX_ON,
433 OPT_INDEX_OFF,
434 OPT_NFS_EXPORT_ON,
435 OPT_NFS_EXPORT_OFF,
436 OPT_XINO_ON,
437 OPT_XINO_OFF,
438 OPT_XINO_AUTO,
439 OPT_METACOPY_ON,
440 OPT_METACOPY_OFF,
441 OPT_VOLATILE,
442 OPT_OVERRIDE_CREDS_ON,
443 OPT_OVERRIDE_CREDS_OFF,
444 OPT_ERR,
445 };
446
447 static const match_table_t ovl_tokens = {
448 {OPT_LOWERDIR, "lowerdir=%s"},
449 {OPT_UPPERDIR, "upperdir=%s"},
450 {OPT_WORKDIR, "workdir=%s"},
451 {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
452 {OPT_REDIRECT_DIR, "redirect_dir=%s"},
453 {OPT_INDEX_ON, "index=on"},
454 {OPT_INDEX_OFF, "index=off"},
455 {OPT_NFS_EXPORT_ON, "nfs_export=on"},
456 {OPT_NFS_EXPORT_OFF, "nfs_export=off"},
457 {OPT_XINO_ON, "xino=on"},
458 {OPT_XINO_OFF, "xino=off"},
459 {OPT_XINO_AUTO, "xino=auto"},
460 {OPT_METACOPY_ON, "metacopy=on"},
461 {OPT_METACOPY_OFF, "metacopy=off"},
462 {OPT_VOLATILE, "volatile"},
463 {OPT_OVERRIDE_CREDS_ON, "override_creds=on"},
464 {OPT_OVERRIDE_CREDS_OFF, "override_creds=off"},
465 {OPT_ERR, NULL}
466 };
467
ovl_next_opt(char ** s)468 static char *ovl_next_opt(char **s)
469 {
470 char *sbegin = *s;
471 char *p;
472
473 if (sbegin == NULL)
474 return NULL;
475
476 for (p = sbegin; *p; p++) {
477 if (*p == '\\') {
478 p++;
479 if (!*p)
480 break;
481 } else if (*p == ',') {
482 *p = '\0';
483 *s = p + 1;
484 return sbegin;
485 }
486 }
487 *s = NULL;
488 return sbegin;
489 }
490
ovl_parse_redirect_mode(struct ovl_config * config,const char * mode)491 static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
492 {
493 if (strcmp(mode, "on") == 0) {
494 config->redirect_dir = true;
495 /*
496 * Does not make sense to have redirect creation without
497 * redirect following.
498 */
499 config->redirect_follow = true;
500 } else if (strcmp(mode, "follow") == 0) {
501 config->redirect_follow = true;
502 } else if (strcmp(mode, "off") == 0) {
503 if (ovl_redirect_always_follow)
504 config->redirect_follow = true;
505 } else if (strcmp(mode, "nofollow") != 0) {
506 pr_err("bad mount option \"redirect_dir=%s\"\n",
507 mode);
508 return -EINVAL;
509 }
510
511 return 0;
512 }
513
ovl_parse_opt(char * opt,struct ovl_config * config)514 static int ovl_parse_opt(char *opt, struct ovl_config *config)
515 {
516 char *p;
517 int err;
518 bool metacopy_opt = false, redirect_opt = false;
519 bool nfs_export_opt = false, index_opt = false;
520
521 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
522 if (!config->redirect_mode)
523 return -ENOMEM;
524 config->override_creds = ovl_override_creds_def;
525
526 while ((p = ovl_next_opt(&opt)) != NULL) {
527 int token;
528 substring_t args[MAX_OPT_ARGS];
529
530 if (!*p)
531 continue;
532
533 token = match_token(p, ovl_tokens, args);
534 switch (token) {
535 case OPT_UPPERDIR:
536 kfree(config->upperdir);
537 config->upperdir = match_strdup(&args[0]);
538 if (!config->upperdir)
539 return -ENOMEM;
540 break;
541
542 case OPT_LOWERDIR:
543 kfree(config->lowerdir);
544 config->lowerdir = match_strdup(&args[0]);
545 if (!config->lowerdir)
546 return -ENOMEM;
547 break;
548
549 case OPT_WORKDIR:
550 kfree(config->workdir);
551 config->workdir = match_strdup(&args[0]);
552 if (!config->workdir)
553 return -ENOMEM;
554 break;
555
556 case OPT_DEFAULT_PERMISSIONS:
557 config->default_permissions = true;
558 break;
559
560 case OPT_REDIRECT_DIR:
561 kfree(config->redirect_mode);
562 config->redirect_mode = match_strdup(&args[0]);
563 if (!config->redirect_mode)
564 return -ENOMEM;
565 redirect_opt = true;
566 break;
567
568 case OPT_INDEX_ON:
569 config->index = true;
570 index_opt = true;
571 break;
572
573 case OPT_INDEX_OFF:
574 config->index = false;
575 index_opt = true;
576 break;
577
578 case OPT_NFS_EXPORT_ON:
579 config->nfs_export = true;
580 nfs_export_opt = true;
581 break;
582
583 case OPT_NFS_EXPORT_OFF:
584 config->nfs_export = false;
585 nfs_export_opt = true;
586 break;
587
588 case OPT_XINO_ON:
589 config->xino = OVL_XINO_ON;
590 break;
591
592 case OPT_XINO_OFF:
593 config->xino = OVL_XINO_OFF;
594 break;
595
596 case OPT_XINO_AUTO:
597 config->xino = OVL_XINO_AUTO;
598 break;
599
600 case OPT_METACOPY_ON:
601 config->metacopy = true;
602 metacopy_opt = true;
603 break;
604
605 case OPT_METACOPY_OFF:
606 config->metacopy = false;
607 metacopy_opt = true;
608 break;
609
610 case OPT_VOLATILE:
611 config->ovl_volatile = true;
612 break;
613
614 case OPT_OVERRIDE_CREDS_ON:
615 config->override_creds = true;
616 break;
617
618 case OPT_OVERRIDE_CREDS_OFF:
619 config->override_creds = false;
620 break;
621
622 default:
623 pr_err("unrecognized mount option \"%s\" or missing value\n",
624 p);
625 return -EINVAL;
626 }
627 }
628
629 /* Workdir/index are useless in non-upper mount */
630 if (!config->upperdir) {
631 if (config->workdir) {
632 pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
633 config->workdir);
634 kfree(config->workdir);
635 config->workdir = NULL;
636 }
637 if (config->index && index_opt) {
638 pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
639 index_opt = false;
640 }
641 config->index = false;
642 }
643
644 if (!config->upperdir && config->ovl_volatile) {
645 pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
646 config->ovl_volatile = false;
647 }
648
649 err = ovl_parse_redirect_mode(config, config->redirect_mode);
650 if (err)
651 return err;
652
653 /*
654 * This is to make the logic below simpler. It doesn't make any other
655 * difference, since config->redirect_dir is only used for upper.
656 */
657 if (!config->upperdir && config->redirect_follow)
658 config->redirect_dir = true;
659
660 /* Resolve metacopy -> redirect_dir dependency */
661 if (config->metacopy && !config->redirect_dir) {
662 if (metacopy_opt && redirect_opt) {
663 pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
664 config->redirect_mode);
665 return -EINVAL;
666 }
667 if (redirect_opt) {
668 /*
669 * There was an explicit redirect_dir=... that resulted
670 * in this conflict.
671 */
672 pr_info("disabling metacopy due to redirect_dir=%s\n",
673 config->redirect_mode);
674 config->metacopy = false;
675 } else {
676 /* Automatically enable redirect otherwise. */
677 config->redirect_follow = config->redirect_dir = true;
678 }
679 }
680
681 /* Resolve nfs_export -> index dependency */
682 if (config->nfs_export && !config->index) {
683 if (!config->upperdir && config->redirect_follow) {
684 pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
685 config->nfs_export = false;
686 } else if (nfs_export_opt && index_opt) {
687 pr_err("conflicting options: nfs_export=on,index=off\n");
688 return -EINVAL;
689 } else if (index_opt) {
690 /*
691 * There was an explicit index=off that resulted
692 * in this conflict.
693 */
694 pr_info("disabling nfs_export due to index=off\n");
695 config->nfs_export = false;
696 } else {
697 /* Automatically enable index otherwise. */
698 config->index = true;
699 }
700 }
701
702 /* Resolve nfs_export -> !metacopy dependency */
703 if (config->nfs_export && config->metacopy) {
704 if (nfs_export_opt && metacopy_opt) {
705 pr_err("conflicting options: nfs_export=on,metacopy=on\n");
706 return -EINVAL;
707 }
708 if (metacopy_opt) {
709 /*
710 * There was an explicit metacopy=on that resulted
711 * in this conflict.
712 */
713 pr_info("disabling nfs_export due to metacopy=on\n");
714 config->nfs_export = false;
715 } else {
716 /*
717 * There was an explicit nfs_export=on that resulted
718 * in this conflict.
719 */
720 pr_info("disabling metacopy due to nfs_export=on\n");
721 config->metacopy = false;
722 }
723 }
724
725 return 0;
726 }
727
728 #define OVL_WORKDIR_NAME "work"
729 #define OVL_INDEXDIR_NAME "index"
730
ovl_workdir_create(struct ovl_fs * ofs,const char * name,bool persist)731 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
732 const char *name, bool persist)
733 {
734 struct inode *dir = ofs->workbasedir->d_inode;
735 struct vfsmount *mnt = ovl_upper_mnt(ofs);
736 struct dentry *work;
737 int err;
738 bool retried = false;
739
740 inode_lock_nested(dir, I_MUTEX_PARENT);
741 retry:
742 work = lookup_one_len(name, ofs->workbasedir, strlen(name));
743
744 if (!IS_ERR(work)) {
745 struct iattr attr = {
746 .ia_valid = ATTR_MODE,
747 .ia_mode = S_IFDIR | 0,
748 };
749
750 if (work->d_inode) {
751 err = -EEXIST;
752 if (retried)
753 goto out_dput;
754
755 if (persist)
756 goto out_unlock;
757
758 retried = true;
759 err = ovl_workdir_cleanup(dir, mnt, work, 0);
760 dput(work);
761 if (err == -EINVAL) {
762 work = ERR_PTR(err);
763 goto out_unlock;
764 }
765 goto retry;
766 }
767
768 err = ovl_mkdir_real(dir, &work, attr.ia_mode);
769 if (err)
770 goto out_dput;
771
772 /* Weird filesystem returning with hashed negative (kernfs)? */
773 err = -EINVAL;
774 if (d_really_is_negative(work))
775 goto out_dput;
776
777 /*
778 * Try to remove POSIX ACL xattrs from workdir. We are good if:
779 *
780 * a) success (there was a POSIX ACL xattr and was removed)
781 * b) -ENODATA (there was no POSIX ACL xattr)
782 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
783 *
784 * There are various other error values that could effectively
785 * mean that the xattr doesn't exist (e.g. -ERANGE is returned
786 * if the xattr name is too long), but the set of filesystems
787 * allowed as upper are limited to "normal" ones, where checking
788 * for the above two errors is sufficient.
789 */
790 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
791 if (err && err != -ENODATA && err != -EOPNOTSUPP)
792 goto out_dput;
793
794 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
795 if (err && err != -ENODATA && err != -EOPNOTSUPP)
796 goto out_dput;
797
798 /* Clear any inherited mode bits */
799 inode_lock(work->d_inode);
800 err = notify_change(work, &attr, NULL);
801 inode_unlock(work->d_inode);
802 if (err)
803 goto out_dput;
804 } else {
805 err = PTR_ERR(work);
806 goto out_err;
807 }
808 out_unlock:
809 inode_unlock(dir);
810 return work;
811
812 out_dput:
813 dput(work);
814 out_err:
815 pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
816 ofs->config.workdir, name, -err);
817 work = NULL;
818 goto out_unlock;
819 }
820
ovl_unescape(char * s)821 static void ovl_unescape(char *s)
822 {
823 char *d = s;
824
825 for (;; s++, d++) {
826 if (*s == '\\')
827 s++;
828 *d = *s;
829 if (!*s)
830 break;
831 }
832 }
833
ovl_mount_dir_noesc(const char * name,struct path * path)834 static int ovl_mount_dir_noesc(const char *name, struct path *path)
835 {
836 int err = -EINVAL;
837
838 if (!*name) {
839 pr_err("empty lowerdir\n");
840 goto out;
841 }
842 err = kern_path(name, LOOKUP_FOLLOW, path);
843 if (err) {
844 pr_err("failed to resolve '%s': %i\n", name, err);
845 goto out;
846 }
847 err = -EINVAL;
848 if (ovl_dentry_weird(path->dentry)) {
849 pr_err("filesystem on '%s' not supported\n", name);
850 goto out_put;
851 }
852 if (!d_is_dir(path->dentry)) {
853 pr_err("'%s' not a directory\n", name);
854 goto out_put;
855 }
856 return 0;
857
858 out_put:
859 path_put_init(path);
860 out:
861 return err;
862 }
863
ovl_mount_dir(const char * name,struct path * path)864 static int ovl_mount_dir(const char *name, struct path *path)
865 {
866 int err = -ENOMEM;
867 char *tmp = kstrdup(name, GFP_KERNEL);
868
869 if (tmp) {
870 ovl_unescape(tmp);
871 err = ovl_mount_dir_noesc(tmp, path);
872
873 if (!err && path->dentry->d_flags & DCACHE_OP_REAL) {
874 pr_err("filesystem on '%s' not supported as upperdir\n",
875 tmp);
876 path_put_init(path);
877 err = -EINVAL;
878 }
879 kfree(tmp);
880 }
881 return err;
882 }
883
ovl_check_namelen(struct path * path,struct ovl_fs * ofs,const char * name)884 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
885 const char *name)
886 {
887 struct kstatfs statfs;
888 int err = vfs_statfs(path, &statfs);
889
890 if (err)
891 pr_err("statfs failed on '%s'\n", name);
892 else
893 ofs->namelen = max(ofs->namelen, statfs.f_namelen);
894
895 return err;
896 }
897
ovl_lower_dir(const char * name,struct path * path,struct ovl_fs * ofs,int * stack_depth)898 static int ovl_lower_dir(const char *name, struct path *path,
899 struct ovl_fs *ofs, int *stack_depth)
900 {
901 int fh_type;
902 int err;
903
904 err = ovl_mount_dir_noesc(name, path);
905 if (err)
906 return err;
907
908 err = ovl_check_namelen(path, ofs, name);
909 if (err)
910 return err;
911
912 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
913
914 /*
915 * The inodes index feature and NFS export need to encode and decode
916 * file handles, so they require that all layers support them.
917 */
918 fh_type = ovl_can_decode_fh(path->dentry->d_sb);
919 if ((ofs->config.nfs_export ||
920 (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
921 ofs->config.index = false;
922 ofs->config.nfs_export = false;
923 pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
924 name);
925 }
926
927 /* Check if lower fs has 32bit inode numbers */
928 if (fh_type != FILEID_INO32_GEN)
929 ofs->xino_mode = -1;
930
931 return 0;
932 }
933
934 /* Workdir should not be subdir of upperdir and vice versa */
ovl_workdir_ok(struct dentry * workdir,struct dentry * upperdir)935 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
936 {
937 bool ok = false;
938
939 if (workdir != upperdir) {
940 ok = (lock_rename(workdir, upperdir) == NULL);
941 unlock_rename(workdir, upperdir);
942 }
943 return ok;
944 }
945
ovl_split_lowerdirs(char * str)946 static unsigned int ovl_split_lowerdirs(char *str)
947 {
948 unsigned int ctr = 1;
949 char *s, *d;
950
951 for (s = d = str;; s++, d++) {
952 if (*s == '\\') {
953 s++;
954 } else if (*s == ':') {
955 *d = '\0';
956 ctr++;
957 continue;
958 }
959 *d = *s;
960 if (!*s)
961 break;
962 }
963 return ctr;
964 }
965
966 static int __maybe_unused
ovl_posix_acl_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * buffer,size_t size,int flags)967 ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
968 struct dentry *dentry, struct inode *inode,
969 const char *name, void *buffer, size_t size, int flags)
970 {
971 return ovl_xattr_get(dentry, inode, handler->name, buffer, size, flags);
972 }
973
974 static int __maybe_unused
ovl_posix_acl_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)975 ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
976 struct dentry *dentry, struct inode *inode,
977 const char *name, const void *value,
978 size_t size, int flags)
979 {
980 struct dentry *workdir = ovl_workdir(dentry);
981 struct inode *realinode = ovl_inode_real(inode);
982 struct posix_acl *acl = NULL;
983 int err;
984
985 /* Check that everything is OK before copy-up */
986 if (value) {
987 acl = posix_acl_from_xattr(&init_user_ns, value, size);
988 if (IS_ERR(acl))
989 return PTR_ERR(acl);
990 }
991 err = -EOPNOTSUPP;
992 if (!IS_POSIXACL(d_inode(workdir)))
993 goto out_acl_release;
994 if (!realinode->i_op->set_acl)
995 goto out_acl_release;
996 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
997 err = acl ? -EACCES : 0;
998 goto out_acl_release;
999 }
1000 err = -EPERM;
1001 if (!inode_owner_or_capable(inode))
1002 goto out_acl_release;
1003
1004 posix_acl_release(acl);
1005
1006 /*
1007 * Check if sgid bit needs to be cleared (actual setacl operation will
1008 * be done with mounter's capabilities and so that won't do it for us).
1009 */
1010 if (unlikely(inode->i_mode & S_ISGID) &&
1011 handler->flags == ACL_TYPE_ACCESS &&
1012 !in_group_p(inode->i_gid) &&
1013 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
1014 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
1015
1016 err = ovl_setattr(dentry, &iattr);
1017 if (err)
1018 return err;
1019 }
1020
1021 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
1022 if (!err)
1023 ovl_copyattr(ovl_inode_real(inode), inode);
1024
1025 return err;
1026
1027 out_acl_release:
1028 posix_acl_release(acl);
1029 return err;
1030 }
1031
ovl_own_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * buffer,size_t size,int flags)1032 static int ovl_own_xattr_get(const struct xattr_handler *handler,
1033 struct dentry *dentry, struct inode *inode,
1034 const char *name, void *buffer, size_t size,
1035 int flags)
1036 {
1037 return -EOPNOTSUPP;
1038 }
1039
ovl_own_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)1040 static int ovl_own_xattr_set(const struct xattr_handler *handler,
1041 struct dentry *dentry, struct inode *inode,
1042 const char *name, const void *value,
1043 size_t size, int flags)
1044 {
1045 return -EOPNOTSUPP;
1046 }
1047
ovl_other_xattr_get(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,void * buffer,size_t size,int flags)1048 static int ovl_other_xattr_get(const struct xattr_handler *handler,
1049 struct dentry *dentry, struct inode *inode,
1050 const char *name, void *buffer, size_t size,
1051 int flags)
1052 {
1053 return ovl_xattr_get(dentry, inode, name, buffer, size, flags);
1054 }
1055
ovl_other_xattr_set(const struct xattr_handler * handler,struct dentry * dentry,struct inode * inode,const char * name,const void * value,size_t size,int flags)1056 static int ovl_other_xattr_set(const struct xattr_handler *handler,
1057 struct dentry *dentry, struct inode *inode,
1058 const char *name, const void *value,
1059 size_t size, int flags)
1060 {
1061 return ovl_xattr_set(dentry, inode, name, value, size, flags);
1062 }
1063
1064 static const struct xattr_handler __maybe_unused
1065 ovl_posix_acl_access_xattr_handler = {
1066 .name = XATTR_NAME_POSIX_ACL_ACCESS,
1067 .flags = ACL_TYPE_ACCESS,
1068 .get = ovl_posix_acl_xattr_get,
1069 .set = ovl_posix_acl_xattr_set,
1070 };
1071
1072 static const struct xattr_handler __maybe_unused
1073 ovl_posix_acl_default_xattr_handler = {
1074 .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1075 .flags = ACL_TYPE_DEFAULT,
1076 .get = ovl_posix_acl_xattr_get,
1077 .set = ovl_posix_acl_xattr_set,
1078 };
1079
1080 static const struct xattr_handler ovl_own_xattr_handler = {
1081 .prefix = OVL_XATTR_PREFIX,
1082 .get = ovl_own_xattr_get,
1083 .set = ovl_own_xattr_set,
1084 };
1085
1086 static const struct xattr_handler ovl_other_xattr_handler = {
1087 .prefix = "", /* catch all */
1088 .get = ovl_other_xattr_get,
1089 .set = ovl_other_xattr_set,
1090 };
1091
1092 static const struct xattr_handler *ovl_xattr_handlers[] = {
1093 #ifdef CONFIG_FS_POSIX_ACL
1094 &ovl_posix_acl_access_xattr_handler,
1095 &ovl_posix_acl_default_xattr_handler,
1096 #endif
1097 &ovl_own_xattr_handler,
1098 &ovl_other_xattr_handler,
1099 NULL
1100 };
1101
ovl_setup_trap(struct super_block * sb,struct dentry * dir,struct inode ** ptrap,const char * name)1102 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
1103 struct inode **ptrap, const char *name)
1104 {
1105 struct inode *trap;
1106 int err;
1107
1108 trap = ovl_get_trap_inode(sb, dir);
1109 err = PTR_ERR_OR_ZERO(trap);
1110 if (err) {
1111 if (err == -ELOOP)
1112 pr_err("conflicting %s path\n", name);
1113 return err;
1114 }
1115
1116 *ptrap = trap;
1117 return 0;
1118 }
1119
1120 /*
1121 * Determine how we treat concurrent use of upperdir/workdir based on the
1122 * index feature. This is papering over mount leaks of container runtimes,
1123 * for example, an old overlay mount is leaked and now its upperdir is
1124 * attempted to be used as a lower layer in a new overlay mount.
1125 */
ovl_report_in_use(struct ovl_fs * ofs,const char * name)1126 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
1127 {
1128 if (ofs->config.index) {
1129 pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
1130 name);
1131 return -EBUSY;
1132 } else {
1133 pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
1134 name);
1135 return 0;
1136 }
1137 }
1138
ovl_get_upper(struct super_block * sb,struct ovl_fs * ofs,struct ovl_layer * upper_layer,struct path * upperpath)1139 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
1140 struct ovl_layer *upper_layer, struct path *upperpath)
1141 {
1142 struct vfsmount *upper_mnt;
1143 int err;
1144
1145 err = ovl_mount_dir(ofs->config.upperdir, upperpath);
1146 if (err)
1147 goto out;
1148
1149 /* Upper fs should not be r/o */
1150 if (sb_rdonly(upperpath->mnt->mnt_sb)) {
1151 pr_err("upper fs is r/o, try multi-lower layers mount\n");
1152 err = -EINVAL;
1153 goto out;
1154 }
1155
1156 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
1157 if (err)
1158 goto out;
1159
1160 err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
1161 "upperdir");
1162 if (err)
1163 goto out;
1164
1165 upper_mnt = clone_private_mount(upperpath);
1166 err = PTR_ERR(upper_mnt);
1167 if (IS_ERR(upper_mnt)) {
1168 pr_err("failed to clone upperpath\n");
1169 goto out;
1170 }
1171
1172 /* Don't inherit atime flags */
1173 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
1174 upper_layer->mnt = upper_mnt;
1175 upper_layer->idx = 0;
1176 upper_layer->fsid = 0;
1177
1178 /*
1179 * Inherit SB_NOSEC flag from upperdir.
1180 *
1181 * This optimization changes behavior when a security related attribute
1182 * (suid/sgid/security.*) is changed on an underlying layer. This is
1183 * okay because we don't yet have guarantees in that case, but it will
1184 * need careful treatment once we want to honour changes to underlying
1185 * filesystems.
1186 */
1187 if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
1188 sb->s_flags |= SB_NOSEC;
1189
1190 if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
1191 ofs->upperdir_locked = true;
1192 } else {
1193 err = ovl_report_in_use(ofs, "upperdir");
1194 if (err)
1195 goto out;
1196 }
1197
1198 err = 0;
1199 out:
1200 return err;
1201 }
1202
1203 /*
1204 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
1205 * negative values if error is encountered.
1206 */
ovl_check_rename_whiteout(struct dentry * workdir)1207 static int ovl_check_rename_whiteout(struct dentry *workdir)
1208 {
1209 struct inode *dir = d_inode(workdir);
1210 struct dentry *temp;
1211 struct dentry *dest;
1212 struct dentry *whiteout;
1213 struct name_snapshot name;
1214 int err;
1215
1216 inode_lock_nested(dir, I_MUTEX_PARENT);
1217
1218 temp = ovl_create_temp(workdir, OVL_CATTR(S_IFREG | 0));
1219 err = PTR_ERR(temp);
1220 if (IS_ERR(temp))
1221 goto out_unlock;
1222
1223 dest = ovl_lookup_temp(workdir);
1224 err = PTR_ERR(dest);
1225 if (IS_ERR(dest)) {
1226 dput(temp);
1227 goto out_unlock;
1228 }
1229
1230 /* Name is inline and stable - using snapshot as a copy helper */
1231 take_dentry_name_snapshot(&name, temp);
1232 err = ovl_do_rename(dir, temp, dir, dest, RENAME_WHITEOUT);
1233 if (err) {
1234 if (err == -EINVAL)
1235 err = 0;
1236 goto cleanup_temp;
1237 }
1238
1239 whiteout = lookup_one_len(name.name.name, workdir, name.name.len);
1240 err = PTR_ERR(whiteout);
1241 if (IS_ERR(whiteout))
1242 goto cleanup_temp;
1243
1244 err = ovl_is_whiteout(whiteout);
1245
1246 /* Best effort cleanup of whiteout and temp file */
1247 if (err)
1248 ovl_cleanup(dir, whiteout);
1249 dput(whiteout);
1250
1251 cleanup_temp:
1252 ovl_cleanup(dir, temp);
1253 release_dentry_name_snapshot(&name);
1254 dput(temp);
1255 dput(dest);
1256
1257 out_unlock:
1258 inode_unlock(dir);
1259
1260 return err;
1261 }
1262
ovl_lookup_or_create(struct dentry * parent,const char * name,umode_t mode)1263 static struct dentry *ovl_lookup_or_create(struct dentry *parent,
1264 const char *name, umode_t mode)
1265 {
1266 size_t len = strlen(name);
1267 struct dentry *child;
1268
1269 inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
1270 child = lookup_one_len(name, parent, len);
1271 if (!IS_ERR(child) && !child->d_inode)
1272 child = ovl_create_real(parent->d_inode, child,
1273 OVL_CATTR(mode));
1274 inode_unlock(parent->d_inode);
1275 dput(parent);
1276
1277 return child;
1278 }
1279
1280 /*
1281 * Creates $workdir/work/incompat/volatile/dirty file if it is not already
1282 * present.
1283 */
ovl_create_volatile_dirty(struct ovl_fs * ofs)1284 static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
1285 {
1286 unsigned int ctr;
1287 struct dentry *d = dget(ofs->workbasedir);
1288 static const char *const volatile_path[] = {
1289 OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
1290 };
1291 const char *const *name = volatile_path;
1292
1293 for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
1294 d = ovl_lookup_or_create(d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
1295 if (IS_ERR(d))
1296 return PTR_ERR(d);
1297 }
1298 dput(d);
1299 return 0;
1300 }
1301
ovl_make_workdir(struct super_block * sb,struct ovl_fs * ofs,struct path * workpath)1302 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
1303 struct path *workpath)
1304 {
1305 struct vfsmount *mnt = ovl_upper_mnt(ofs);
1306 struct dentry *temp, *workdir;
1307 bool rename_whiteout;
1308 bool d_type;
1309 int fh_type;
1310 int err;
1311
1312 err = mnt_want_write(mnt);
1313 if (err)
1314 return err;
1315
1316 workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
1317 err = PTR_ERR(workdir);
1318 if (IS_ERR_OR_NULL(workdir))
1319 goto out;
1320
1321 ofs->workdir = workdir;
1322
1323 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
1324 if (err)
1325 goto out;
1326
1327 /*
1328 * Upper should support d_type, else whiteouts are visible. Given
1329 * workdir and upper are on same fs, we can do iterate_dir() on
1330 * workdir. This check requires successful creation of workdir in
1331 * previous step.
1332 */
1333 err = ovl_check_d_type_supported(workpath);
1334 if (err < 0)
1335 goto out;
1336
1337 d_type = err;
1338 if (!d_type)
1339 pr_warn("upper fs needs to support d_type.\n");
1340
1341 /* Check if upper/work fs supports O_TMPFILE */
1342 temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
1343 ofs->tmpfile = !IS_ERR(temp);
1344 if (ofs->tmpfile)
1345 dput(temp);
1346 else
1347 pr_warn("upper fs does not support tmpfile.\n");
1348
1349
1350 /* Check if upper/work fs supports RENAME_WHITEOUT */
1351 err = ovl_check_rename_whiteout(ofs->workdir);
1352 if (err < 0)
1353 goto out;
1354
1355 rename_whiteout = err;
1356 if (!rename_whiteout)
1357 pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
1358
1359 /*
1360 * Check if upper/work fs supports trusted.overlay.* xattr
1361 */
1362 err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
1363 if (err) {
1364 ofs->noxattr = true;
1365 ofs->config.index = false;
1366 ofs->config.metacopy = false;
1367 pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
1368 err = 0;
1369 } else {
1370 ovl_do_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
1371 }
1372
1373 /*
1374 * We allowed sub-optimal upper fs configuration and don't want to break
1375 * users over kernel upgrade, but we never allowed remote upper fs, so
1376 * we can enforce strict requirements for remote upper fs.
1377 */
1378 if (ovl_dentry_remote(ofs->workdir) &&
1379 (!d_type || !rename_whiteout || ofs->noxattr)) {
1380 pr_err("upper fs missing required features.\n");
1381 err = -EINVAL;
1382 goto out;
1383 }
1384
1385 /*
1386 * For volatile mount, create a incompat/volatile/dirty file to keep
1387 * track of it.
1388 */
1389 if (ofs->config.ovl_volatile) {
1390 err = ovl_create_volatile_dirty(ofs);
1391 if (err < 0) {
1392 pr_err("Failed to create volatile/dirty file.\n");
1393 goto out;
1394 }
1395 }
1396
1397 /* Check if upper/work fs supports file handles */
1398 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
1399 if (ofs->config.index && !fh_type) {
1400 ofs->config.index = false;
1401 pr_warn("upper fs does not support file handles, falling back to index=off.\n");
1402 }
1403
1404 /* Check if upper fs has 32bit inode numbers */
1405 if (fh_type != FILEID_INO32_GEN)
1406 ofs->xino_mode = -1;
1407
1408 /* NFS export of r/w mount depends on index */
1409 if (ofs->config.nfs_export && !ofs->config.index) {
1410 pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1411 ofs->config.nfs_export = false;
1412 }
1413 out:
1414 mnt_drop_write(mnt);
1415 return err;
1416 }
1417
ovl_get_workdir(struct super_block * sb,struct ovl_fs * ofs,struct path * upperpath)1418 static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
1419 struct path *upperpath)
1420 {
1421 int err;
1422 struct path workpath = { };
1423
1424 err = ovl_mount_dir(ofs->config.workdir, &workpath);
1425 if (err)
1426 goto out;
1427
1428 err = -EINVAL;
1429 if (upperpath->mnt != workpath.mnt) {
1430 pr_err("workdir and upperdir must reside under the same mount\n");
1431 goto out;
1432 }
1433 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1434 pr_err("workdir and upperdir must be separate subtrees\n");
1435 goto out;
1436 }
1437
1438 ofs->workbasedir = dget(workpath.dentry);
1439
1440 if (ovl_inuse_trylock(ofs->workbasedir)) {
1441 ofs->workdir_locked = true;
1442 } else {
1443 err = ovl_report_in_use(ofs, "workdir");
1444 if (err)
1445 goto out;
1446 }
1447
1448 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
1449 "workdir");
1450 if (err)
1451 goto out;
1452
1453 err = ovl_make_workdir(sb, ofs, &workpath);
1454
1455 out:
1456 path_put(&workpath);
1457
1458 return err;
1459 }
1460
ovl_get_indexdir(struct super_block * sb,struct ovl_fs * ofs,struct ovl_entry * oe,struct path * upperpath)1461 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
1462 struct ovl_entry *oe, struct path *upperpath)
1463 {
1464 struct vfsmount *mnt = ovl_upper_mnt(ofs);
1465 struct dentry *indexdir;
1466 int err;
1467
1468 err = mnt_want_write(mnt);
1469 if (err)
1470 return err;
1471
1472 /* Verify lower root is upper root origin */
1473 err = ovl_verify_origin(ofs, upperpath->dentry,
1474 oe->lowerstack[0].dentry, true);
1475 if (err) {
1476 pr_err("failed to verify upper root origin\n");
1477 goto out;
1478 }
1479
1480 /* index dir will act also as workdir */
1481 iput(ofs->workdir_trap);
1482 ofs->workdir_trap = NULL;
1483 dput(ofs->workdir);
1484 ofs->workdir = NULL;
1485 indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1486 if (IS_ERR(indexdir)) {
1487 err = PTR_ERR(indexdir);
1488 } else if (indexdir) {
1489 ofs->indexdir = indexdir;
1490 ofs->workdir = dget(indexdir);
1491
1492 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
1493 "indexdir");
1494 if (err)
1495 goto out;
1496
1497 /*
1498 * Verify upper root is exclusively associated with index dir.
1499 * Older kernels stored upper fh in "trusted.overlay.origin"
1500 * xattr. If that xattr exists, verify that it is a match to
1501 * upper dir file handle. In any case, verify or set xattr
1502 * "trusted.overlay.upper" to indicate that index may have
1503 * directory entries.
1504 */
1505 if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
1506 err = ovl_verify_set_fh(ofs, ofs->indexdir,
1507 OVL_XATTR_ORIGIN,
1508 upperpath->dentry, true, false);
1509 if (err)
1510 pr_err("failed to verify index dir 'origin' xattr\n");
1511 }
1512 err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
1513 true);
1514 if (err)
1515 pr_err("failed to verify index dir 'upper' xattr\n");
1516
1517 /* Cleanup bad/stale/orphan index entries */
1518 if (!err)
1519 err = ovl_indexdir_cleanup(ofs);
1520 }
1521 if (err || !ofs->indexdir)
1522 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1523
1524 out:
1525 mnt_drop_write(mnt);
1526 return err;
1527 }
1528
ovl_lower_uuid_ok(struct ovl_fs * ofs,const uuid_t * uuid)1529 static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
1530 {
1531 unsigned int i;
1532
1533 if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
1534 return true;
1535
1536 /*
1537 * We allow using single lower with null uuid for index and nfs_export
1538 * for example to support those features with single lower squashfs.
1539 * To avoid regressions in setups of overlay with re-formatted lower
1540 * squashfs, do not allow decoding origin with lower null uuid unless
1541 * user opted-in to one of the new features that require following the
1542 * lower inode of non-dir upper.
1543 */
1544 if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino &&
1545 uuid_is_null(uuid))
1546 return false;
1547
1548 for (i = 0; i < ofs->numfs; i++) {
1549 /*
1550 * We use uuid to associate an overlay lower file handle with a
1551 * lower layer, so we can accept lower fs with null uuid as long
1552 * as all lower layers with null uuid are on the same fs.
1553 * if we detect multiple lower fs with the same uuid, we
1554 * disable lower file handle decoding on all of them.
1555 */
1556 if (ofs->fs[i].is_lower &&
1557 uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
1558 ofs->fs[i].bad_uuid = true;
1559 return false;
1560 }
1561 }
1562 return true;
1563 }
1564
1565 /* Get a unique fsid for the layer */
ovl_get_fsid(struct ovl_fs * ofs,const struct path * path)1566 static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
1567 {
1568 struct super_block *sb = path->mnt->mnt_sb;
1569 unsigned int i;
1570 dev_t dev;
1571 int err;
1572 bool bad_uuid = false;
1573
1574 for (i = 0; i < ofs->numfs; i++) {
1575 if (ofs->fs[i].sb == sb)
1576 return i;
1577 }
1578
1579 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
1580 bad_uuid = true;
1581 if (ofs->config.index || ofs->config.nfs_export) {
1582 ofs->config.index = false;
1583 ofs->config.nfs_export = false;
1584 pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
1585 uuid_is_null(&sb->s_uuid) ? "null" :
1586 "conflicting",
1587 path->dentry);
1588 }
1589 }
1590
1591 err = get_anon_bdev(&dev);
1592 if (err) {
1593 pr_err("failed to get anonymous bdev for lowerpath\n");
1594 return err;
1595 }
1596
1597 ofs->fs[ofs->numfs].sb = sb;
1598 ofs->fs[ofs->numfs].pseudo_dev = dev;
1599 ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
1600
1601 return ofs->numfs++;
1602 }
1603
ovl_get_layers(struct super_block * sb,struct ovl_fs * ofs,struct path * stack,unsigned int numlower,struct ovl_layer * layers)1604 static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
1605 struct path *stack, unsigned int numlower,
1606 struct ovl_layer *layers)
1607 {
1608 int err;
1609 unsigned int i;
1610
1611 err = -ENOMEM;
1612 ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
1613 if (ofs->fs == NULL)
1614 goto out;
1615
1616 /* idx/fsid 0 are reserved for upper fs even with lower only overlay */
1617 ofs->numfs++;
1618
1619 /*
1620 * All lower layers that share the same fs as upper layer, use the same
1621 * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower
1622 * only overlay to simplify ovl_fs_free().
1623 * is_lower will be set if upper fs is shared with a lower layer.
1624 */
1625 err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
1626 if (err) {
1627 pr_err("failed to get anonymous bdev for upper fs\n");
1628 goto out;
1629 }
1630
1631 if (ovl_upper_mnt(ofs)) {
1632 ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
1633 ofs->fs[0].is_lower = false;
1634 }
1635
1636 for (i = 0; i < numlower; i++) {
1637 struct vfsmount *mnt;
1638 struct inode *trap;
1639 int fsid;
1640
1641 err = fsid = ovl_get_fsid(ofs, &stack[i]);
1642 if (err < 0)
1643 goto out;
1644
1645 /*
1646 * Check if lower root conflicts with this overlay layers before
1647 * checking if it is in-use as upperdir/workdir of "another"
1648 * mount, because we do not bother to check in ovl_is_inuse() if
1649 * the upperdir/workdir is in fact in-use by our
1650 * upperdir/workdir.
1651 */
1652 err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
1653 if (err)
1654 goto out;
1655
1656 if (ovl_is_inuse(stack[i].dentry)) {
1657 err = ovl_report_in_use(ofs, "lowerdir");
1658 if (err) {
1659 iput(trap);
1660 goto out;
1661 }
1662 }
1663
1664 mnt = clone_private_mount(&stack[i]);
1665 err = PTR_ERR(mnt);
1666 if (IS_ERR(mnt)) {
1667 pr_err("failed to clone lowerpath\n");
1668 iput(trap);
1669 goto out;
1670 }
1671
1672 /*
1673 * Make lower layers R/O. That way fchmod/fchown on lower file
1674 * will fail instead of modifying lower fs.
1675 */
1676 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1677
1678 layers[ofs->numlayer].trap = trap;
1679 layers[ofs->numlayer].mnt = mnt;
1680 layers[ofs->numlayer].idx = ofs->numlayer;
1681 layers[ofs->numlayer].fsid = fsid;
1682 layers[ofs->numlayer].fs = &ofs->fs[fsid];
1683 ofs->numlayer++;
1684 ofs->fs[fsid].is_lower = true;
1685 }
1686
1687 /*
1688 * When all layers on same fs, overlay can use real inode numbers.
1689 * With mount option "xino=<on|auto>", mounter declares that there are
1690 * enough free high bits in underlying fs to hold the unique fsid.
1691 * If overlayfs does encounter underlying inodes using the high xino
1692 * bits reserved for fsid, it emits a warning and uses the original
1693 * inode number or a non persistent inode number allocated from a
1694 * dedicated range.
1695 */
1696 if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
1697 if (ofs->config.xino == OVL_XINO_ON)
1698 pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
1699 ofs->xino_mode = 0;
1700 } else if (ofs->config.xino == OVL_XINO_OFF) {
1701 ofs->xino_mode = -1;
1702 } else if (ofs->xino_mode < 0) {
1703 /*
1704 * This is a roundup of number of bits needed for encoding
1705 * fsid, where fsid 0 is reserved for upper fs (even with
1706 * lower only overlay) +1 extra bit is reserved for the non
1707 * persistent inode number range that is used for resolving
1708 * xino lower bits overflow.
1709 */
1710 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
1711 ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
1712 }
1713
1714 if (ofs->xino_mode > 0) {
1715 pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
1716 ofs->xino_mode);
1717 }
1718
1719 err = 0;
1720 out:
1721 return err;
1722 }
1723
ovl_get_lowerstack(struct super_block * sb,const char * lower,unsigned int numlower,struct ovl_fs * ofs,struct ovl_layer * layers)1724 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1725 const char *lower, unsigned int numlower,
1726 struct ovl_fs *ofs, struct ovl_layer *layers)
1727 {
1728 int err;
1729 struct path *stack = NULL;
1730 unsigned int i;
1731 struct ovl_entry *oe;
1732
1733 if (!ofs->config.upperdir && numlower == 1) {
1734 pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
1735 return ERR_PTR(-EINVAL);
1736 }
1737
1738 stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
1739 if (!stack)
1740 return ERR_PTR(-ENOMEM);
1741
1742 err = -EINVAL;
1743 for (i = 0; i < numlower; i++) {
1744 err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth);
1745 if (err)
1746 goto out_err;
1747
1748 lower = strchr(lower, '\0') + 1;
1749 }
1750
1751 err = -EINVAL;
1752 sb->s_stack_depth++;
1753 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1754 pr_err("maximum fs stacking depth exceeded\n");
1755 goto out_err;
1756 }
1757
1758 err = ovl_get_layers(sb, ofs, stack, numlower, layers);
1759 if (err)
1760 goto out_err;
1761
1762 err = -ENOMEM;
1763 oe = ovl_alloc_entry(numlower);
1764 if (!oe)
1765 goto out_err;
1766
1767 for (i = 0; i < numlower; i++) {
1768 oe->lowerstack[i].dentry = dget(stack[i].dentry);
1769 oe->lowerstack[i].layer = &ofs->layers[i+1];
1770 }
1771
1772 out:
1773 for (i = 0; i < numlower; i++)
1774 path_put(&stack[i]);
1775 kfree(stack);
1776
1777 return oe;
1778
1779 out_err:
1780 oe = ERR_PTR(err);
1781 goto out;
1782 }
1783
1784 /*
1785 * Check if this layer root is a descendant of:
1786 * - another layer of this overlayfs instance
1787 * - upper/work dir of any overlayfs instance
1788 */
ovl_check_layer(struct super_block * sb,struct ovl_fs * ofs,struct dentry * dentry,const char * name,bool is_lower)1789 static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
1790 struct dentry *dentry, const char *name,
1791 bool is_lower)
1792 {
1793 struct dentry *next = dentry, *parent;
1794 int err = 0;
1795
1796 if (!dentry)
1797 return 0;
1798
1799 parent = dget_parent(next);
1800
1801 /* Walk back ancestors to root (inclusive) looking for traps */
1802 while (!err && parent != next) {
1803 if (is_lower && ovl_lookup_trap_inode(sb, parent)) {
1804 err = -ELOOP;
1805 pr_err("overlapping %s path\n", name);
1806 } else if (ovl_is_inuse(parent)) {
1807 err = ovl_report_in_use(ofs, name);
1808 }
1809 next = parent;
1810 parent = dget_parent(next);
1811 dput(next);
1812 }
1813
1814 dput(parent);
1815
1816 return err;
1817 }
1818
1819 /*
1820 * Check if any of the layers or work dirs overlap.
1821 */
ovl_check_overlapping_layers(struct super_block * sb,struct ovl_fs * ofs)1822 static int ovl_check_overlapping_layers(struct super_block *sb,
1823 struct ovl_fs *ofs)
1824 {
1825 int i, err;
1826
1827 if (ovl_upper_mnt(ofs)) {
1828 err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
1829 "upperdir", false);
1830 if (err)
1831 return err;
1832
1833 /*
1834 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
1835 * this instance and covers overlapping work and index dirs,
1836 * unless work or index dir have been moved since created inside
1837 * workbasedir. In that case, we already have their traps in
1838 * inode cache and we will catch that case on lookup.
1839 */
1840 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir",
1841 false);
1842 if (err)
1843 return err;
1844 }
1845
1846 for (i = 1; i < ofs->numlayer; i++) {
1847 err = ovl_check_layer(sb, ofs,
1848 ofs->layers[i].mnt->mnt_root,
1849 "lowerdir", true);
1850 if (err)
1851 return err;
1852 }
1853
1854 return 0;
1855 }
1856
ovl_get_root(struct super_block * sb,struct dentry * upperdentry,struct ovl_entry * oe)1857 static struct dentry *ovl_get_root(struct super_block *sb,
1858 struct dentry *upperdentry,
1859 struct ovl_entry *oe)
1860 {
1861 struct dentry *root;
1862 struct ovl_path *lowerpath = &oe->lowerstack[0];
1863 unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
1864 int fsid = lowerpath->layer->fsid;
1865 struct ovl_inode_params oip = {
1866 .upperdentry = upperdentry,
1867 .lowerpath = lowerpath,
1868 };
1869
1870 root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1871 if (!root)
1872 return NULL;
1873
1874 root->d_fsdata = oe;
1875
1876 if (upperdentry) {
1877 /* Root inode uses upper st_ino/i_ino */
1878 ino = d_inode(upperdentry)->i_ino;
1879 fsid = 0;
1880 ovl_dentry_set_upper_alias(root);
1881 if (ovl_is_impuredir(sb, upperdentry))
1882 ovl_set_flag(OVL_IMPURE, d_inode(root));
1883 }
1884
1885 /* Root is always merge -> can have whiteouts */
1886 ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
1887 ovl_dentry_set_flag(OVL_E_CONNECTED, root);
1888 ovl_set_upperdata(d_inode(root));
1889 ovl_inode_init(d_inode(root), &oip, ino, fsid);
1890 ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
1891
1892 return root;
1893 }
1894
ovl_fill_super(struct super_block * sb,void * data,int silent)1895 static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1896 {
1897 struct path upperpath = { };
1898 struct dentry *root_dentry;
1899 struct ovl_entry *oe;
1900 struct ovl_fs *ofs;
1901 struct ovl_layer *layers;
1902 struct cred *cred;
1903 char *splitlower = NULL;
1904 unsigned int numlower;
1905 int err;
1906
1907 sb->s_d_op = &ovl_dentry_operations;
1908
1909 err = -ENOMEM;
1910 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1911 if (!ofs)
1912 goto out;
1913
1914 ofs->creator_cred = cred = prepare_creds();
1915 if (!cred)
1916 goto out_err;
1917
1918 /* Is there a reason anyone would want not to share whiteouts? */
1919 ofs->share_whiteout = true;
1920
1921 ofs->config.index = ovl_index_def;
1922 ofs->config.nfs_export = ovl_nfs_export_def;
1923 ofs->config.xino = ovl_xino_def();
1924 ofs->config.metacopy = ovl_metacopy_def;
1925 err = ovl_parse_opt((char *) data, &ofs->config);
1926 if (err)
1927 goto out_err;
1928
1929 err = -EINVAL;
1930 if (!ofs->config.lowerdir) {
1931 if (!silent)
1932 pr_err("missing 'lowerdir'\n");
1933 goto out_err;
1934 }
1935
1936 err = -ENOMEM;
1937 splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1938 if (!splitlower)
1939 goto out_err;
1940
1941 numlower = ovl_split_lowerdirs(splitlower);
1942 if (numlower > OVL_MAX_STACK) {
1943 pr_err("too many lower directories, limit is %d\n",
1944 OVL_MAX_STACK);
1945 goto out_err;
1946 }
1947
1948 layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
1949 if (!layers)
1950 goto out_err;
1951
1952 ofs->layers = layers;
1953 /* Layer 0 is reserved for upper even if there's no upper */
1954 ofs->numlayer = 1;
1955
1956 sb->s_stack_depth = 0;
1957 sb->s_maxbytes = MAX_LFS_FILESIZE;
1958 atomic_long_set(&ofs->last_ino, 1);
1959 /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
1960 if (ofs->config.xino != OVL_XINO_OFF) {
1961 ofs->xino_mode = BITS_PER_LONG - 32;
1962 if (!ofs->xino_mode) {
1963 pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
1964 ofs->config.xino = OVL_XINO_OFF;
1965 }
1966 }
1967
1968 /* alloc/destroy_inode needed for setting up traps in inode cache */
1969 sb->s_op = &ovl_super_operations;
1970
1971 if (ofs->config.upperdir) {
1972 struct super_block *upper_sb;
1973
1974 if (!ofs->config.workdir) {
1975 pr_err("missing 'workdir'\n");
1976 goto out_err;
1977 }
1978
1979 err = ovl_get_upper(sb, ofs, &layers[0], &upperpath);
1980 if (err)
1981 goto out_err;
1982
1983 upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
1984 if (!ovl_should_sync(ofs)) {
1985 ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
1986 if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
1987 err = -EIO;
1988 pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
1989 goto out_err;
1990 }
1991 }
1992
1993 err = ovl_get_workdir(sb, ofs, &upperpath);
1994 if (err)
1995 goto out_err;
1996
1997 if (!ofs->workdir)
1998 sb->s_flags |= SB_RDONLY;
1999
2000 sb->s_stack_depth = upper_sb->s_stack_depth;
2001 sb->s_time_gran = upper_sb->s_time_gran;
2002 }
2003 oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
2004 err = PTR_ERR(oe);
2005 if (IS_ERR(oe))
2006 goto out_err;
2007
2008 /* If the upper fs is nonexistent, we mark overlayfs r/o too */
2009 if (!ovl_upper_mnt(ofs))
2010 sb->s_flags |= SB_RDONLY;
2011
2012 if (!ovl_force_readonly(ofs) && ofs->config.index) {
2013 err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
2014 if (err)
2015 goto out_free_oe;
2016
2017 /* Force r/o mount with no index dir */
2018 if (!ofs->indexdir)
2019 sb->s_flags |= SB_RDONLY;
2020 }
2021
2022 err = ovl_check_overlapping_layers(sb, ofs);
2023 if (err)
2024 goto out_free_oe;
2025
2026 /* Show index=off in /proc/mounts for forced r/o mount */
2027 if (!ofs->indexdir) {
2028 ofs->config.index = false;
2029 if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
2030 pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
2031 ofs->config.nfs_export = false;
2032 }
2033 }
2034
2035 if (ofs->config.metacopy && ofs->config.nfs_export) {
2036 pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
2037 ofs->config.nfs_export = false;
2038 }
2039
2040 if (ofs->config.nfs_export)
2041 sb->s_export_op = &ovl_export_operations;
2042
2043 /* Never override disk quota limits or use reserved space */
2044 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
2045
2046 sb->s_magic = OVERLAYFS_SUPER_MAGIC;
2047 sb->s_xattr = ovl_xattr_handlers;
2048 sb->s_fs_info = ofs;
2049 sb->s_flags |= SB_POSIXACL;
2050 sb->s_iflags |= SB_I_SKIP_SYNC;
2051
2052 err = -ENOMEM;
2053 root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
2054 if (!root_dentry)
2055 goto out_free_oe;
2056
2057 mntput(upperpath.mnt);
2058 kfree(splitlower);
2059
2060 sb->s_root = root_dentry;
2061 return 0;
2062
2063 out_free_oe:
2064 ovl_entry_stack_free(oe);
2065 kfree(oe);
2066 out_err:
2067 kfree(splitlower);
2068 path_put(&upperpath);
2069 ovl_free_fs(ofs);
2070 out:
2071 return err;
2072 }
2073
ovl_mount(struct file_system_type * fs_type,int flags,const char * dev_name,void * raw_data)2074 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
2075 const char *dev_name, void *raw_data)
2076 {
2077 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
2078 }
2079
2080 static struct file_system_type ovl_fs_type = {
2081 .owner = THIS_MODULE,
2082 .name = "overlay",
2083 .mount = ovl_mount,
2084 .kill_sb = kill_anon_super,
2085 };
2086 MODULE_ALIAS_FS("overlay");
2087
ovl_inode_init_once(void * foo)2088 static void ovl_inode_init_once(void *foo)
2089 {
2090 struct ovl_inode *oi = foo;
2091
2092 inode_init_once(&oi->vfs_inode);
2093 }
2094
ovl_init(void)2095 static int __init ovl_init(void)
2096 {
2097 int err;
2098
2099 ovl_inode_cachep = kmem_cache_create("ovl_inode",
2100 sizeof(struct ovl_inode), 0,
2101 (SLAB_RECLAIM_ACCOUNT|
2102 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
2103 ovl_inode_init_once);
2104 if (ovl_inode_cachep == NULL)
2105 return -ENOMEM;
2106
2107 err = ovl_aio_request_cache_init();
2108 if (!err) {
2109 err = register_filesystem(&ovl_fs_type);
2110 if (!err)
2111 return 0;
2112
2113 ovl_aio_request_cache_destroy();
2114 }
2115 kmem_cache_destroy(ovl_inode_cachep);
2116
2117 return err;
2118 }
2119
ovl_exit(void)2120 static void __exit ovl_exit(void)
2121 {
2122 unregister_filesystem(&ovl_fs_type);
2123
2124 /*
2125 * Make sure all delayed rcu free inodes are flushed before we
2126 * destroy cache.
2127 */
2128 rcu_barrier();
2129 kmem_cache_destroy(ovl_inode_cachep);
2130 ovl_aio_request_cache_destroy();
2131 }
2132
2133 module_init(ovl_init);
2134 module_exit(ovl_exit);
2135