3 * Copyright (C) 2011 Novell Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
11 #include <linux/namei.h>
12 #include <linux/xattr.h>
13 #include <linux/security.h>
14 #include <linux/cred.h>
15 #include <linux/module.h>
16 #include <linux/posix_acl.h>
17 #include <linux/posix_acl_xattr.h>
18 #include <linux/atomic.h>
19 #include <linux/ratelimit.h>
20 #include "overlayfs.h"
22 static unsigned short ovl_redirect_max = 256;
23 module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
24 MODULE_PARM_DESC(ovl_redirect_max,
25 "Maximum length of absolute redirect xattr value");
27 void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
32 if (d_is_dir(wdentry))
33 err = ovl_do_rmdir(wdir, wdentry);
35 err = ovl_do_unlink(wdir, wdentry);
39 pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
44 struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
48 static atomic_t temp_id = ATOMIC_INIT(0);
50 /* counter is allowed to wrap, since temp dentries are ephemeral */
51 snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
53 temp = lookup_one_len(name, workdir, strlen(name));
54 if (!IS_ERR(temp) && temp->d_inode) {
55 pr_err("overlayfs: workdir/%s already exists\n", name);
63 /* caller holds i_mutex on workdir */
64 static struct dentry *ovl_whiteout(struct dentry *workdir,
65 struct dentry *dentry)
68 struct dentry *whiteout;
69 struct inode *wdir = workdir->d_inode;
71 whiteout = ovl_lookup_temp(workdir, dentry);
75 err = ovl_do_whiteout(wdir, whiteout);
78 whiteout = ERR_PTR(err);
84 int ovl_create_real(struct inode *dir, struct dentry *newdentry,
85 struct cattr *attr, struct dentry *hardlink, bool debug)
89 if (newdentry->d_inode)
93 err = ovl_do_link(hardlink, dir, newdentry, debug);
95 switch (attr->mode & S_IFMT) {
97 err = ovl_do_create(dir, newdentry, attr->mode, debug);
101 err = ovl_do_mkdir(dir, newdentry, attr->mode, debug);
108 err = ovl_do_mknod(dir, newdentry,
109 attr->mode, attr->rdev, debug);
113 err = ovl_do_symlink(dir, newdentry, attr->link, debug);
120 if (!err && WARN_ON(!newdentry->d_inode)) {
122 * Not quite sure if non-instantiated dentry is legal or not.
123 * VFS doesn't seem to care so check and warn here.
130 static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
134 err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
136 ovl_dentry_set_opaque(dentry);
141 static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
142 u32 request_mask, unsigned int flags)
144 struct dentry *dentry = path->dentry;
146 enum ovl_path_type type;
147 struct path realpath;
148 const struct cred *old_cred;
150 type = ovl_path_real(dentry, &realpath);
151 old_cred = ovl_override_creds(dentry->d_sb);
152 err = vfs_getattr(&realpath, stat, request_mask, flags);
153 revert_creds(old_cred);
157 stat->dev = dentry->d_sb->s_dev;
158 stat->ino = dentry->d_inode->i_ino;
161 * It's probably not worth it to count subdirs to get the
162 * correct link count. nlink=1 seems to pacify 'find' and
165 if (OVL_TYPE_MERGE(type))
171 /* Common operations required to be done after creation of file on upper */
172 static void ovl_instantiate(struct dentry *dentry, struct inode *inode,
173 struct dentry *newdentry, bool hardlink)
175 ovl_dentry_version_inc(dentry->d_parent);
176 ovl_dentry_update(dentry, newdentry);
178 ovl_inode_update(inode, d_inode(newdentry));
179 ovl_copyattr(newdentry->d_inode, inode);
181 WARN_ON(ovl_inode_real(inode, NULL) != d_inode(newdentry));
184 d_instantiate(dentry, inode);
187 static bool ovl_type_merge(struct dentry *dentry)
189 return OVL_TYPE_MERGE(ovl_path_type(dentry));
192 static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
193 struct cattr *attr, struct dentry *hardlink)
195 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
196 struct inode *udir = upperdir->d_inode;
197 struct dentry *newdentry;
200 if (!hardlink && !IS_POSIXACL(udir))
201 attr->mode &= ~current_umask();
203 inode_lock_nested(udir, I_MUTEX_PARENT);
204 newdentry = lookup_one_len(dentry->d_name.name, upperdir,
206 err = PTR_ERR(newdentry);
207 if (IS_ERR(newdentry))
209 err = ovl_create_real(udir, newdentry, attr, hardlink, false);
213 if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) {
214 /* Setting opaque here is just an optimization, allow to fail */
215 ovl_set_opaque(dentry, newdentry);
218 ovl_instantiate(dentry, inode, newdentry, !!hardlink);
227 static int ovl_lock_rename_workdir(struct dentry *workdir,
228 struct dentry *upperdir)
230 /* Workdir should not be the same as upperdir */
231 if (workdir == upperdir)
234 /* Workdir should not be subdir of upperdir and vice versa */
235 if (lock_rename(workdir, upperdir) != NULL)
241 unlock_rename(workdir, upperdir);
243 pr_err("overlayfs: failed to lock workdir+upperdir\n");
247 static struct dentry *ovl_clear_empty(struct dentry *dentry,
248 struct list_head *list)
250 struct dentry *workdir = ovl_workdir(dentry);
251 struct inode *wdir = workdir->d_inode;
252 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
253 struct inode *udir = upperdir->d_inode;
254 struct path upperpath;
255 struct dentry *upper;
256 struct dentry *opaquedir;
260 if (WARN_ON(!workdir))
261 return ERR_PTR(-EROFS);
263 err = ovl_lock_rename_workdir(workdir, upperdir);
267 ovl_path_upper(dentry, &upperpath);
268 err = vfs_getattr(&upperpath, &stat,
269 STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
274 if (!S_ISDIR(stat.mode))
276 upper = upperpath.dentry;
277 if (upper->d_parent->d_inode != udir)
280 opaquedir = ovl_lookup_temp(workdir, dentry);
281 err = PTR_ERR(opaquedir);
282 if (IS_ERR(opaquedir))
285 err = ovl_create_real(wdir, opaquedir,
286 &(struct cattr){.mode = stat.mode}, NULL, true);
290 err = ovl_copy_xattr(upper, opaquedir);
294 err = ovl_set_opaque(dentry, opaquedir);
298 inode_lock(opaquedir->d_inode);
299 err = ovl_set_attr(opaquedir, &stat);
300 inode_unlock(opaquedir->d_inode);
304 err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
308 ovl_cleanup_whiteouts(upper, list);
309 ovl_cleanup(wdir, upper);
310 unlock_rename(workdir, upperdir);
312 /* dentry's upper doesn't match now, get rid of it */
318 ovl_cleanup(wdir, opaquedir);
322 unlock_rename(workdir, upperdir);
327 static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
330 struct dentry *ret = NULL;
331 enum ovl_path_type type = ovl_path_type(dentry);
334 err = ovl_check_empty_dir(dentry, &list);
341 * When removing an empty opaque directory, then it makes no sense to
342 * replace it with an exact replica of itself.
344 * If no upperdentry then skip clearing whiteouts.
346 * Can race with copy-up, since we don't hold the upperdir mutex.
347 * Doesn't matter, since copy-up can't create a non-empty directory
350 if (OVL_TYPE_UPPER(type) && OVL_TYPE_MERGE(type))
351 ret = ovl_clear_empty(dentry, &list);
354 ovl_cache_free(&list);
359 static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
360 const struct posix_acl *acl)
366 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
369 size = posix_acl_to_xattr(NULL, acl, NULL, 0);
370 buffer = kmalloc(size, GFP_KERNEL);
374 size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
379 err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
385 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
387 struct dentry *hardlink)
389 struct dentry *workdir = ovl_workdir(dentry);
390 struct inode *wdir = workdir->d_inode;
391 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
392 struct inode *udir = upperdir->d_inode;
393 struct dentry *upper;
394 struct dentry *newdentry;
396 struct posix_acl *acl, *default_acl;
398 if (WARN_ON(!workdir))
402 err = posix_acl_create(dentry->d_parent->d_inode,
403 &cattr->mode, &default_acl, &acl);
408 err = ovl_lock_rename_workdir(workdir, upperdir);
412 newdentry = ovl_lookup_temp(workdir, dentry);
413 err = PTR_ERR(newdentry);
414 if (IS_ERR(newdentry))
417 upper = lookup_one_len(dentry->d_name.name, upperdir,
419 err = PTR_ERR(upper);
423 err = ovl_create_real(wdir, newdentry, cattr, hardlink, true);
428 * mode could have been mutilated due to umask (e.g. sgid directory)
431 !S_ISLNK(cattr->mode) &&
432 newdentry->d_inode->i_mode != cattr->mode) {
433 struct iattr attr = {
434 .ia_valid = ATTR_MODE,
435 .ia_mode = cattr->mode,
437 inode_lock(newdentry->d_inode);
438 err = notify_change(newdentry, &attr, NULL);
439 inode_unlock(newdentry->d_inode);
444 err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
449 err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
455 if (!hardlink && S_ISDIR(cattr->mode)) {
456 err = ovl_set_opaque(dentry, newdentry);
460 err = ovl_do_rename(wdir, newdentry, udir, upper,
465 ovl_cleanup(wdir, upper);
467 err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
471 ovl_instantiate(dentry, inode, newdentry, !!hardlink);
478 unlock_rename(workdir, upperdir);
481 posix_acl_release(acl);
482 posix_acl_release(default_acl);
487 ovl_cleanup(wdir, newdentry);
491 static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
492 struct cattr *attr, struct dentry *hardlink)
495 const struct cred *old_cred;
496 struct cred *override_cred;
498 err = ovl_copy_up(dentry->d_parent);
502 old_cred = ovl_override_creds(dentry->d_sb);
504 override_cred = prepare_creds();
506 override_cred->fsuid = inode->i_uid;
507 override_cred->fsgid = inode->i_gid;
509 err = security_dentry_create_files_as(dentry,
510 attr->mode, &dentry->d_name, old_cred,
513 put_cred(override_cred);
514 goto out_revert_creds;
517 put_cred(override_creds(override_cred));
518 put_cred(override_cred);
520 if (!ovl_dentry_is_whiteout(dentry))
521 err = ovl_create_upper(dentry, inode, attr,
524 err = ovl_create_over_whiteout(dentry, inode, attr,
528 revert_creds(old_cred);
530 struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
532 WARN_ON(inode->i_mode != realinode->i_mode);
533 WARN_ON(!uid_eq(inode->i_uid, realinode->i_uid));
534 WARN_ON(!gid_eq(inode->i_gid, realinode->i_gid));
539 static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
544 struct cattr attr = {
549 err = ovl_want_write(dentry);
554 inode = ovl_new_inode(dentry->d_sb, mode, rdev);
558 inode_init_owner(inode, dentry->d_parent->d_inode, mode);
559 attr.mode = inode->i_mode;
561 err = ovl_create_or_link(dentry, inode, &attr, NULL);
566 ovl_drop_write(dentry);
571 static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
574 return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
577 static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
579 return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
582 static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
585 /* Don't allow creation of "whiteout" on overlay */
586 if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
589 return ovl_create_object(dentry, mode, rdev, NULL);
592 static int ovl_symlink(struct inode *dir, struct dentry *dentry,
595 return ovl_create_object(dentry, S_IFLNK, 0, link);
598 static int ovl_link(struct dentry *old, struct inode *newdir,
604 err = ovl_want_write(old);
608 err = ovl_copy_up(old);
612 inode = d_inode(old);
615 err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old));
625 static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
627 struct dentry *workdir = ovl_workdir(dentry);
628 struct inode *wdir = workdir->d_inode;
629 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
630 struct inode *udir = upperdir->d_inode;
631 struct dentry *whiteout;
632 struct dentry *upper;
633 struct dentry *opaquedir = NULL;
637 if (WARN_ON(!workdir))
641 opaquedir = ovl_check_empty_and_clear(dentry);
642 err = PTR_ERR(opaquedir);
643 if (IS_ERR(opaquedir))
647 err = ovl_lock_rename_workdir(workdir, upperdir);
651 upper = lookup_one_len(dentry->d_name.name, upperdir,
653 err = PTR_ERR(upper);
658 if ((opaquedir && upper != opaquedir) ||
659 (!opaquedir && ovl_dentry_upper(dentry) &&
660 upper != ovl_dentry_upper(dentry))) {
664 whiteout = ovl_whiteout(workdir, dentry);
665 err = PTR_ERR(whiteout);
666 if (IS_ERR(whiteout))
670 flags = RENAME_EXCHANGE;
672 err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
676 ovl_cleanup(wdir, upper);
678 ovl_dentry_version_inc(dentry->d_parent);
685 unlock_rename(workdir, upperdir);
692 ovl_cleanup(wdir, whiteout);
696 static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
698 struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
699 struct inode *dir = upperdir->d_inode;
700 struct dentry *upper;
701 struct dentry *opaquedir = NULL;
704 /* Redirect dir can be !ovl_lower_positive && OVL_TYPE_MERGE */
705 if (is_dir && ovl_dentry_get_redirect(dentry)) {
706 opaquedir = ovl_check_empty_and_clear(dentry);
707 err = PTR_ERR(opaquedir);
708 if (IS_ERR(opaquedir))
712 inode_lock_nested(dir, I_MUTEX_PARENT);
713 upper = lookup_one_len(dentry->d_name.name, upperdir,
715 err = PTR_ERR(upper);
720 if ((opaquedir && upper != opaquedir) ||
721 (!opaquedir && upper != ovl_dentry_upper(dentry)))
725 err = vfs_rmdir(dir, upper);
727 err = vfs_unlink(dir, upper, NULL);
728 ovl_dentry_version_inc(dentry->d_parent);
731 * Keeping this dentry hashed would mean having to release
732 * upperpath/lowerpath, which could only be done if we are the
733 * sole user of this dentry. Too tricky... Just unhash for
747 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
749 enum ovl_path_type type;
751 const struct cred *old_cred;
753 err = ovl_want_write(dentry);
757 err = ovl_copy_up(dentry->d_parent);
761 type = ovl_path_type(dentry);
763 old_cred = ovl_override_creds(dentry->d_sb);
764 if (!ovl_lower_positive(dentry))
765 err = ovl_remove_upper(dentry, is_dir);
767 err = ovl_remove_and_whiteout(dentry, is_dir);
768 revert_creds(old_cred);
771 clear_nlink(dentry->d_inode);
773 drop_nlink(dentry->d_inode);
776 ovl_drop_write(dentry);
781 static int ovl_unlink(struct inode *dir, struct dentry *dentry)
783 return ovl_do_remove(dentry, false);
786 static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
788 return ovl_do_remove(dentry, true);
791 static bool ovl_type_merge_or_lower(struct dentry *dentry)
793 enum ovl_path_type type = ovl_path_type(dentry);
795 return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
798 static bool ovl_can_move(struct dentry *dentry)
800 return ovl_redirect_dir(dentry->d_sb) ||
801 !d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
804 static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
807 struct dentry *d, *tmp;
808 int buflen = ovl_redirect_max + 1;
811 ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
816 buf = ret = kmalloc(buflen, GFP_TEMPORARY);
822 for (d = dget(dentry); !IS_ROOT(d);) {
826 spin_lock(&d->d_lock);
827 name = ovl_dentry_get_redirect(d);
829 thislen = strlen(name);
831 name = d->d_name.name;
832 thislen = d->d_name.len;
835 /* If path is too long, fall back to userspace move */
836 if (thislen + (name[0] != '/') > buflen) {
837 ret = ERR_PTR(-EXDEV);
838 spin_unlock(&d->d_lock);
843 memcpy(&buf[buflen], name, thislen);
844 tmp = dget_dlock(d->d_parent);
845 spin_unlock(&d->d_lock);
850 /* Absolute redirect: finished */
851 if (buf[buflen] == '/')
856 ret = kstrdup(&buf[buflen], GFP_KERNEL);
861 return ret ? ret : ERR_PTR(-ENOMEM);
864 static int ovl_set_redirect(struct dentry *dentry, bool samedir)
867 const char *redirect = ovl_dentry_get_redirect(dentry);
869 if (redirect && (samedir || redirect[0] == '/'))
872 redirect = ovl_get_redirect(dentry, samedir);
873 if (IS_ERR(redirect))
874 return PTR_ERR(redirect);
876 err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT,
877 redirect, strlen(redirect), 0);
879 spin_lock(&dentry->d_lock);
880 ovl_dentry_set_redirect(dentry, redirect);
881 spin_unlock(&dentry->d_lock);
884 if (err == -EOPNOTSUPP)
885 ovl_clear_redirect_dir(dentry->d_sb);
887 pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
888 /* Fall back to userspace copy-up */
894 static int ovl_rename(struct inode *olddir, struct dentry *old,
895 struct inode *newdir, struct dentry *new,
899 struct dentry *old_upperdir;
900 struct dentry *new_upperdir;
901 struct dentry *olddentry;
902 struct dentry *newdentry;
906 bool cleanup_whiteout = false;
907 bool overwrite = !(flags & RENAME_EXCHANGE);
908 bool is_dir = d_is_dir(old);
909 bool new_is_dir = d_is_dir(new);
910 bool samedir = olddir == newdir;
911 struct dentry *opaquedir = NULL;
912 const struct cred *old_cred = NULL;
915 if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
918 flags &= ~RENAME_NOREPLACE;
920 /* Don't copy up directory trees */
922 if (!ovl_can_move(old))
924 if (!overwrite && !ovl_can_move(new))
927 err = ovl_want_write(old);
931 err = ovl_copy_up(old);
935 err = ovl_copy_up(new->d_parent);
939 err = ovl_copy_up(new);
944 old_cred = ovl_override_creds(old->d_sb);
946 if (overwrite && new_is_dir && ovl_type_merge_or_lower(new)) {
947 opaquedir = ovl_check_empty_and_clear(new);
948 err = PTR_ERR(opaquedir);
949 if (IS_ERR(opaquedir)) {
951 goto out_revert_creds;
956 if (ovl_lower_positive(old)) {
957 if (!ovl_dentry_is_whiteout(new)) {
958 /* Whiteout source */
959 flags |= RENAME_WHITEOUT;
961 /* Switch whiteouts */
962 flags |= RENAME_EXCHANGE;
964 } else if (is_dir && ovl_dentry_is_whiteout(new)) {
965 flags |= RENAME_EXCHANGE;
966 cleanup_whiteout = true;
970 old_upperdir = ovl_dentry_upper(old->d_parent);
971 new_upperdir = ovl_dentry_upper(new->d_parent);
973 trap = lock_rename(new_upperdir, old_upperdir);
975 olddentry = lookup_one_len(old->d_name.name, old_upperdir,
977 err = PTR_ERR(olddentry);
978 if (IS_ERR(olddentry))
982 if (olddentry != ovl_dentry_upper(old))
985 newdentry = lookup_one_len(new->d_name.name, new_upperdir,
987 err = PTR_ERR(newdentry);
988 if (IS_ERR(newdentry))
991 old_opaque = ovl_dentry_is_opaque(old);
992 new_opaque = ovl_dentry_is_opaque(new);
995 if (ovl_dentry_upper(new)) {
997 if (newdentry != opaquedir)
1000 if (newdentry != ovl_dentry_upper(new))
1004 if (!d_is_negative(newdentry) &&
1005 (!new_opaque || !ovl_is_whiteout(newdentry)))
1009 if (olddentry == trap)
1011 if (newdentry == trap)
1014 if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
1019 if (ovl_type_merge_or_lower(old))
1020 err = ovl_set_redirect(old, samedir);
1021 else if (!old_opaque && ovl_type_merge(new->d_parent))
1022 err = ovl_set_opaque(old, olddentry);
1026 if (!overwrite && new_is_dir) {
1027 if (ovl_type_merge_or_lower(new))
1028 err = ovl_set_redirect(new, samedir);
1029 else if (!new_opaque && ovl_type_merge(old->d_parent))
1030 err = ovl_set_opaque(new, newdentry);
1035 err = ovl_do_rename(old_upperdir->d_inode, olddentry,
1036 new_upperdir->d_inode, newdentry, flags);
1040 if (cleanup_whiteout)
1041 ovl_cleanup(old_upperdir->d_inode, newdentry);
1043 ovl_dentry_version_inc(old->d_parent);
1044 ovl_dentry_version_inc(new->d_parent);
1051 unlock_rename(new_upperdir, old_upperdir);
1053 revert_creds(old_cred);
1055 ovl_drop_write(old);
1061 const struct inode_operations ovl_dir_inode_operations = {
1062 .lookup = ovl_lookup,
1064 .symlink = ovl_symlink,
1065 .unlink = ovl_unlink,
1067 .rename = ovl_rename,
1069 .setattr = ovl_setattr,
1070 .create = ovl_create,
1072 .permission = ovl_permission,
1073 .getattr = ovl_dir_getattr,
1074 .listxattr = ovl_listxattr,
1075 .get_acl = ovl_get_acl,
1076 .update_time = ovl_update_time,