Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c

index 965d381abd75c100bcb88c71ff6fb1f9b4e3ad6b..25db92a8e1cf919db8a919232805fb71ad480b7f 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1094,7 +1094,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
                 LOAD_INT(c), LOAD_FRAC(c),
                 count_active_contexts(),
                 atomic_read(&nr_spu_contexts),
-               current->nsproxy->pid_ns->last_pid);
+               task_active_pid_ns(current)->last_pid);
         return 0;
  }
  
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c

index 49e3b49e552f7f81dea63e708bbb0abf1e32a3f4..4bd82ac0210f27c8ef7c755480399ad9dce2f15b 100644 (file)
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -123,7 +123,7 @@ void mconsole_log(struct mc_request *req)
  
  void mconsole_proc(struct mc_request *req)
  {
-       struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
+       struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
         char *buf;
         int len;
         struct file *file;
diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c

index 4a36e9ab8cf7d5ffa66723ca3fd7be255c13aa72..2d12e8a1f82ee06b89f8be1127e644c16ead8994 100644 (file)
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -35,6 +35,7 @@
  #include <linux/uaccess.h>
  #include <linux/vmalloc.h>
  #include <linux/slab.h>
+#include <linux/pid_namespace.h>
  
  #include "binder.h"
  #include "binder_trace.h"
@@ -2320,7 +2321,7 @@ retry:
                 if (t->from) {
                         struct task_struct *sender = t->from->proc->tsk;
                         tr.sender_pid = task_tgid_nr_ns(sender,
-                                                       current->nsproxy->pid_ns);
+                                                       task_active_pid_ns(current));
                 } else {
                         tr.sender_pid = 0;
                 }
diff --git a/fs/attr.c b/fs/attr.c

index cce7df53b694373b6288795f14b07eb91c0894fc..1449adb14ef6a468b3d97865499df941a2c28312 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -49,14 +49,15 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
         /* Make sure a caller can chown. */
         if ((ia_valid & ATTR_UID) &&
             (!uid_eq(current_fsuid(), inode->i_uid) ||
-            !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN))
+            !uid_eq(attr->ia_uid, inode->i_uid)) &&
+           !inode_capable(inode, CAP_CHOWN))
                 return -EPERM;
  
         /* Make sure caller can chgrp. */
         if ((ia_valid & ATTR_GID) &&
             (!uid_eq(current_fsuid(), inode->i_uid) ||
             (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
-           !capable(CAP_CHOWN))
+           !inode_capable(inode, CAP_CHOWN))
                 return -EPERM;
  
         /* Make sure a caller can chmod. */
@@ -65,7 +66,8 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
                         return -EPERM;
                 /* Also check the setgid bit! */
                 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
-                               inode->i_gid) && !capable(CAP_FSETID))
+                               inode->i_gid) &&
+                   !inode_capable(inode, CAP_FSETID))
                         attr->ia_mode &= ~S_ISGID;
         }
  
@@ -157,7 +159,8 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
         if (ia_valid & ATTR_MODE) {
                 umode_t mode = attr->ia_mode;
  
-               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+               if (!in_group_p(inode->i_gid) &&
+                   !inode_capable(inode, CAP_FSETID))
                         mode &= ~S_ISGID;
                 inode->i_mode = mode;
         }
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h

index 908e18455413fc2e49a4d845c8020007dce95252..b785e77079595d81bec518138e730297d3ee33d6 100644 (file)
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -74,8 +74,8 @@ struct autofs_info {
         unsigned long last_used;
         atomic_t count;
  
-       uid_t uid;
-       gid_t gid;
+       kuid_t uid;
+       kgid_t gid;
  };
  
  #define AUTOFS_INF_EXPIRING    (1<<0) /* dentry is in the process of expiring */
@@ -89,8 +89,8 @@ struct autofs_wait_queue {
         struct qstr name;
         u32 dev;
         u64 ino;
-       uid_t uid;
-       gid_t gid;
+       kuid_t uid;
+       kgid_t gid;
         pid_t pid;
         pid_t tgid;
         /* This is for status reporting upon return */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c

index a16214109d31ef8c7f0897eb6389b16872a2392d..9f68a37bb2b201044e14cd8ebf356d9b14d2e899 100644 (file)
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -437,8 +437,8 @@ static int autofs_dev_ioctl_requester(struct file *fp,
                 err = 0;
                 autofs4_expire_wait(path.dentry);
                 spin_lock(&sbi->fs_lock);
-               param->requester.uid = ino->uid;
-               param->requester.gid = ino->gid;
+               param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid);
+               param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid);
                 spin_unlock(&sbi->fs_lock);
         }
         path_put(&path);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c

index 8a4fed8ead30a5a051fded49a2cbfd07a1ff9630..b104726e2d0a7dbdb998e3daa574ec5e0326138c 100644 (file)
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -36,8 +36,8 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
  
  void autofs4_clean_ino(struct autofs_info *ino)
  {
-       ino->uid = 0;
-       ino->gid = 0;
+       ino->uid = GLOBAL_ROOT_UID;
+       ino->gid = GLOBAL_ROOT_GID;
         ino->last_used = jiffies;
  }
  
@@ -79,10 +79,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
                 return 0;
  
         seq_printf(m, ",fd=%d", sbi->pipefd);
-       if (root_inode->i_uid != 0)
-               seq_printf(m, ",uid=%u", root_inode->i_uid);
-       if (root_inode->i_gid != 0)
-               seq_printf(m, ",gid=%u", root_inode->i_gid);
+       if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
+               seq_printf(m, ",uid=%u",
+                       from_kuid_munged(&init_user_ns, root_inode->i_uid));
+       if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
+               seq_printf(m, ",gid=%u",
+                       from_kgid_munged(&init_user_ns, root_inode->i_gid));
         seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
         seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
         seq_printf(m, ",minproto=%d", sbi->min_proto);
@@ -126,7 +128,7 @@ static const match_table_t tokens = {
         {Opt_err, NULL}
  };
  
-static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
+static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
                 pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
  {
         char *p;
@@ -159,12 +161,16 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
                 case Opt_uid:
                         if (match_int(args, &option))
                                 return 1;
-                       *uid = option;
+                       *uid = make_kuid(current_user_ns(), option);
+                       if (!uid_valid(*uid))
+                               return 1;
                         break;
                 case Opt_gid:
                         if (match_int(args, &option))
                                 return 1;
-                       *gid = option;
+                       *gid = make_kgid(current_user_ns(), option);
+                       if (!gid_valid(*gid))
+                               return 1;
                         break;
                 case Opt_pgrp:
                         if (match_int(args, &option))
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c

index dce436e595c19275cc7f84c79596b41605ab5c59..03bc1d347d8e58f41ae2d1facda91e3deee470ee 100644 (file)
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -154,6 +154,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
         case autofs_ptype_expire_direct:
         {
                 struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
+               struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns;
  
                 pktsz = sizeof(*packet);
  
@@ -163,8 +164,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
                 packet->name[wq->name.len] = '\0';
                 packet->dev = wq->dev;
                 packet->ino = wq->ino;
-               packet->uid = wq->uid;
-               packet->gid = wq->gid;
+               packet->uid = from_kuid_munged(user_ns, wq->uid);
+               packet->gid = from_kgid_munged(user_ns, wq->gid);
                 packet->pid = wq->pid;
                 packet->tgid = wq->tgid;
                 break;
diff --git a/fs/exec.c b/fs/exec.c

index 721a299295117f92d271f17afd224db1787712a1..b71b08ce71204824c7c66c8d101a7ebecdbb4db0 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1266,14 +1266,13 @@ int prepare_binprm(struct linux_binprm *bprm)
         bprm->cred->egid = current_egid();
  
         if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-           !current->no_new_privs) {
+           !current->no_new_privs &&
+           kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
+           kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
                 /* Set-uid? */
                 if (mode & S_ISUID) {
-                       if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid))
-                               return -EPERM;
                         bprm->per_clear |= PER_CLEAR_ON_SETID;
                         bprm->cred->euid = inode->i_uid;
-
                 }
  
                 /* Set-gid? */
@@ -1283,8 +1282,6 @@ int prepare_binprm(struct linux_binprm *bprm)
                  * executable.
                  */
                 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
-                       if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid))
-                               return -EPERM;
                         bprm->per_clear |= PER_CLEAR_ON_SETID;
                         bprm->cred->egid = inode->i_gid;
                 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c

index 8c23fa7a91e65cb46ad3907432e4418c7f96ad2d..c16335315e5da8843a41dc9de6ea5f27cae6df12 100644 (file)
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -92,8 +92,8 @@ static void __fuse_put_request(struct fuse_req *req)
  
  static void fuse_req_init_context(struct fuse_req *req)
  {
-       req->in.h.uid = current_fsuid();
-       req->in.h.gid = current_fsgid();
+       req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
+       req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
         req->in.h.pid = current->pid;
  }
  
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c

index 324bc085053447665eccaacdc8fcf169cb418413..b7c09f9eb40cd7ac26f626a5bcadd26aeaf06ec5 100644 (file)
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -818,8 +818,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
         stat->ino = attr->ino;
         stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
         stat->nlink = attr->nlink;
-       stat->uid = attr->uid;
-       stat->gid = attr->gid;
+       stat->uid = make_kuid(&init_user_ns, attr->uid);
+       stat->gid = make_kgid(&init_user_ns, attr->gid);
         stat->rdev = inode->i_rdev;
         stat->atime.tv_sec = attr->atime;
         stat->atime.tv_nsec = attr->atimensec;
@@ -1007,12 +1007,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
         rcu_read_lock();
         ret = 0;
         cred = __task_cred(task);
-       if (cred->euid == fc->user_id &&
-           cred->suid == fc->user_id &&
-           cred->uid  == fc->user_id &&
-           cred->egid == fc->group_id &&
-           cred->sgid == fc->group_id &&
-           cred->gid  == fc->group_id)
+       if (uid_eq(cred->euid, fc->user_id) &&
+           uid_eq(cred->suid, fc->user_id) &&
+           uid_eq(cred->uid,  fc->user_id) &&
+           gid_eq(cred->egid, fc->group_id) &&
+           gid_eq(cred->sgid, fc->group_id) &&
+           gid_eq(cred->gid,  fc->group_id))
                 ret = 1;
         rcu_read_unlock();
  
@@ -1306,9 +1306,9 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
         if (ivalid & ATTR_MODE)
                 arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
         if (ivalid & ATTR_UID)
-               arg->valid |= FATTR_UID,    arg->uid = iattr->ia_uid;
+               arg->valid |= FATTR_UID,    arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
         if (ivalid & ATTR_GID)
-               arg->valid |= FATTR_GID,    arg->gid = iattr->ia_gid;
+               arg->valid |= FATTR_GID,    arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
         if (ivalid & ATTR_SIZE)
                 arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
         if (ivalid & ATTR_ATIME) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h

index e24dd74e3068d130545ee58f918519d4ca2ee620..e105a53fc72df6c08c116bec56d82b7eda9ce56b 100644 (file)
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -333,10 +333,10 @@ struct fuse_conn {
         atomic_t count;
  
         /** The user id for this mount */
-       uid_t user_id;
+       kuid_t user_id;
  
         /** The group id for this mount */
-       gid_t group_id;
+       kgid_t group_id;
  
         /** The fuse mount flags for this mount */
         unsigned flags;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index f0eda124cffb7714daf51c475dcabc8c955056e0..73ca6b72beafa0d19f5997bf7627b5046459c492 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh,
  struct fuse_mount_data {
         int fd;
         unsigned rootmode;
-       unsigned user_id;
-       unsigned group_id;
+       kuid_t user_id;
+       kgid_t group_id;
         unsigned fd_present:1;
         unsigned rootmode_present:1;
         unsigned user_id_present:1;
@@ -164,8 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
         inode->i_ino     = fuse_squash_ino(attr->ino);
         inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
         set_nlink(inode, attr->nlink);
-       inode->i_uid     = attr->uid;
-       inode->i_gid     = attr->gid;
+       inode->i_uid     = make_kuid(&init_user_ns, attr->uid);
+       inode->i_gid     = make_kgid(&init_user_ns, attr->gid);
         inode->i_blocks  = attr->blocks;
         inode->i_atime.tv_sec   = attr->atime;
         inode->i_atime.tv_nsec  = attr->atimensec;
@@ -492,14 +492,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
                 case OPT_USER_ID:
                         if (match_int(&args[0], &value))
                                 return 0;
-                       d->user_id = value;
+                       d->user_id = make_kuid(current_user_ns(), value);
+                       if (!uid_valid(d->user_id))
+                               return 0;
                         d->user_id_present = 1;
                         break;
  
                 case OPT_GROUP_ID:
                         if (match_int(&args[0], &value))
                                 return 0;
-                       d->group_id = value;
+                       d->group_id = make_kgid(current_user_ns(), value);
+                       if (!gid_valid(d->group_id))
+                               return 0;
                         d->group_id_present = 1;
                         break;
  
@@ -540,8 +544,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
         struct super_block *sb = root->d_sb;
         struct fuse_conn *fc = get_fuse_conn_super(sb);
  
-       seq_printf(m, ",user_id=%u", fc->user_id);
-       seq_printf(m, ",group_id=%u", fc->group_id);
+       seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
+       seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
         if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
                 seq_puts(m, ",default_permissions");
         if (fc->flags & FUSE_ALLOW_OTHER)
@@ -989,7 +993,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
         if (!file)
                 goto err;
  
-       if (file->f_op != &fuse_dev_operations)
+       if ((file->f_op != &fuse_dev_operations) ||
+           (file->f_cred->user_ns != &init_user_ns))
                 goto err_fput;
  
         fc = kmalloc(sizeof(*fc), GFP_KERNEL);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c

index 78f21f8dc2ecf4caf8427bdab5e1deb0c3470952..43b315f2002bd7f1e6515d0c978d532561357a75 100644 (file)
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -710,7 +710,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
         struct vfsmount *proc_mnt;
         int err = -ENOENT;
  
-       proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt);
+       proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt);
         if (IS_ERR(proc_mnt))
                 goto out;
  
diff --git a/fs/mount.h b/fs/mount.h

index 4f291f9de641ea2aca94f609daa7e865ae71fd58..cd50079804003ece9325c5338089952f9fa0166b 100644 (file)
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -4,8 +4,11 @@
  
  struct mnt_namespace {
         atomic_t                count;
+       unsigned int            proc_inum;
         struct mount *  root;
         struct list_head        list;
+       struct user_namespace   *user_ns;
+       u64                     seq;    /* Sequence number to prevent loops */
         wait_queue_head_t poll;
         int event;
  };
diff --git a/fs/namespace.c b/fs/namespace.c

index 24960626bb6bfc7b9eff78631876d4eb7ca7a40e..c1bbe86f4920057d0cee12b70512d318740ad08d 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -12,6 +12,7 @@
  #include <linux/export.h>
  #include <linux/capability.h>
  #include <linux/mnt_namespace.h>
+#include <linux/user_namespace.h>
  #include <linux/namei.h>
  #include <linux/security.h>
  #include <linux/idr.h>
@@ -20,6 +21,7 @@
  #include <linux/fs_struct.h>   /* get_fs_root et.al. */
  #include <linux/fsnotify.h>    /* fsnotify_vfsmount_delete */
  #include <linux/uaccess.h>
+#include <linux/proc_fs.h>
  #include "pnode.h"
  #include "internal.h"
  
@@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
         if (!mnt)
                 return ERR_PTR(-ENOMEM);
  
-       if (flag & (CL_SLAVE | CL_PRIVATE))
+       if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
                 mnt->mnt_group_id = 0; /* not a peer of original */
         else
                 mnt->mnt_group_id = old->mnt_group_id;
@@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
         list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
         br_write_unlock(&vfsmount_lock);
  
-       if (flag & CL_SLAVE) {
+       if ((flag & CL_SLAVE) ||
+           ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
                 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
                 mnt->mnt_master = old;
                 CLEAR_MNT_SHARED(mnt);
@@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
                 goto dput_and_out;
  
         retval = -EPERM;
-       if (!capable(CAP_SYS_ADMIN))
+       if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
                 goto dput_and_out;
  
         retval = do_umount(mnt, flags);
@@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
  
  static int mount_is_safe(struct path *path)
  {
-       if (capable(CAP_SYS_ADMIN))
+       if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
                 return 0;
         return -EPERM;
  #ifdef notyet
@@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path)
  #endif
  }
  
+static bool mnt_ns_loop(struct path *path)
+{
+       /* Could bind mounting the mount namespace inode cause a
+        * mount namespace loop?
+        */
+       struct inode *inode = path->dentry->d_inode;
+       struct proc_inode *ei;
+       struct mnt_namespace *mnt_ns;
+
+       if (!proc_ns_inode(inode))
+               return false;
+
+       ei = PROC_I(inode);
+       if (ei->ns_ops != &mntns_operations)
+               return false;
+
+       mnt_ns = ei->ns;
+       return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+}
+
  struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                                         int flag)
  {
@@ -1610,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)
         int type;
         int err = 0;
  
-       if (!capable(CAP_SYS_ADMIN))
+       if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         if (path->dentry != path->mnt->mnt_root)
@@ -1655,6 +1678,10 @@ static int do_loopback(struct path *path, const char *old_name,
         if (err)
                 return err;
  
+       err = -EINVAL;
+       if (mnt_ns_loop(&old_path))
+               goto out; 
+
         err = lock_mount(path);
         if (err)
                 goto out;
@@ -1770,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)
         struct mount *p;
         struct mount *old;
         int err = 0;
-       if (!capable(CAP_SYS_ADMIN))
+       if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
         if (!old_name || !*old_name)
                 return -EINVAL;
@@ -1857,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
         return ERR_PTR(err);
  }
  
-static struct vfsmount *
-do_kern_mount(const char *fstype, int flags, const char *name, void *data)
-{
-       struct file_system_type *type = get_fs_type(fstype);
-       struct vfsmount *mnt;
-       if (!type)
-               return ERR_PTR(-ENODEV);
-       mnt = vfs_kern_mount(type, flags, name, data);
-       if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
-           !mnt->mnt_sb->s_subtype)
-               mnt = fs_set_subtype(mnt, fstype);
-       put_filesystem(type);
-       return mnt;
-}
-
  /*
   * add a mount into a namespace's mount tree
   */
@@ -1917,20 +1929,46 @@ unlock:
   * create a new mount for userspace and request it to be added into the
   * namespace's tree
   */
-static int do_new_mount(struct path *path, const char *type, int flags,
+static int do_new_mount(struct path *path, const char *fstype, int flags,
                         int mnt_flags, const char *name, void *data)
  {
+       struct file_system_type *type;
+       struct user_namespace *user_ns;
         struct vfsmount *mnt;
         int err;
  
-       if (!type)
+       if (!fstype)
                 return -EINVAL;
  
         /* we need capabilities... */
-       if (!capable(CAP_SYS_ADMIN))
+       user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
+       if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
-       mnt = do_kern_mount(type, flags, name, data);
+       type = get_fs_type(fstype);
+       if (!type)
+               return -ENODEV;
+
+       if (user_ns != &init_user_ns) {
+               if (!(type->fs_flags & FS_USERNS_MOUNT)) {
+                       put_filesystem(type);
+                       return -EPERM;
+               }
+               /* Only in special cases allow devices from mounts
+                * created outside the initial user namespace.
+                */
+               if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+                       flags |= MS_NODEV;
+                       mnt_flags |= MNT_NODEV;
+               }
+       }
+
+       mnt = vfs_kern_mount(type, flags, name, data);
+       if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
+           !mnt->mnt_sb->s_subtype)
+               mnt = fs_set_subtype(mnt, fstype);
+
+       put_filesystem(type);
         if (IS_ERR(mnt))
                 return PTR_ERR(mnt);
  
@@ -2261,18 +2299,42 @@ dput_out:
         return retval;
  }
  
-static struct mnt_namespace *alloc_mnt_ns(void)
+static void free_mnt_ns(struct mnt_namespace *ns)
+{
+       proc_free_inum(ns->proc_inum);
+       put_user_ns(ns->user_ns);
+       kfree(ns);
+}
+
+/*
+ * Assign a sequence number so we can detect when we attempt to bind
+ * mount a reference to an older mount namespace into the current
+ * mount namespace, preventing reference counting loops.  A 64bit
+ * number incrementing at 10Ghz will take 12,427 years to wrap which
+ * is effectively never, so we can ignore the possibility.
+ */
+static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
+
+static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
  {
         struct mnt_namespace *new_ns;
+       int ret;
  
         new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
         if (!new_ns)
                 return ERR_PTR(-ENOMEM);
+       ret = proc_alloc_inum(&new_ns->proc_inum);
+       if (ret) {
+               kfree(new_ns);
+               return ERR_PTR(ret);
+       }
+       new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
         atomic_set(&new_ns->count, 1);
         new_ns->root = NULL;
         INIT_LIST_HEAD(&new_ns->list);
         init_waitqueue_head(&new_ns->poll);
         new_ns->event = 0;
+       new_ns->user_ns = get_user_ns(user_ns);
         return new_ns;
  }
  
@@ -2281,24 +2343,28 @@ static struct mnt_namespace *alloc_mnt_ns(void)
   * copied from the namespace of the passed in task structure.
   */
  static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
-               struct fs_struct *fs)
+               struct user_namespace *user_ns, struct fs_struct *fs)
  {
         struct mnt_namespace *new_ns;
         struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
         struct mount *p, *q;
         struct mount *old = mnt_ns->root;
         struct mount *new;
+       int copy_flags;
  
-       new_ns = alloc_mnt_ns();
+       new_ns = alloc_mnt_ns(user_ns);
         if (IS_ERR(new_ns))
                 return new_ns;
  
         down_write(&namespace_sem);
         /* First pass: copy the tree topology */
-       new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
+       copy_flags = CL_COPY_ALL | CL_EXPIRE;
+       if (user_ns != mnt_ns->user_ns)
+               copy_flags |= CL_SHARED_TO_SLAVE;
+       new = copy_tree(old, old->mnt.mnt_root, copy_flags);
         if (IS_ERR(new)) {
                 up_write(&namespace_sem);
-               kfree(new_ns);
+               free_mnt_ns(new_ns);
                 return ERR_CAST(new);
         }
         new_ns->root = new;
@@ -2339,7 +2405,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
  }
  
  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
-               struct fs_struct *new_fs)
+               struct user_namespace *user_ns, struct fs_struct *new_fs)
  {
         struct mnt_namespace *new_ns;
  
@@ -2349,7 +2415,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
         if (!(flags & CLONE_NEWNS))
                 return ns;
  
-       new_ns = dup_mnt_ns(ns, new_fs);
+       new_ns = dup_mnt_ns(ns, user_ns, new_fs);
  
         put_mnt_ns(ns);
         return new_ns;
@@ -2361,7 +2427,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
   */
  static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
  {
-       struct mnt_namespace *new_ns = alloc_mnt_ns();
+       struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
         if (!IS_ERR(new_ns)) {
                 struct mount *mnt = real_mount(m);
                 mnt->mnt_ns = new_ns;
@@ -2501,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
         struct mount *new_mnt, *root_mnt;
         int error;
  
-       if (!capable(CAP_SYS_ADMIN))
+       if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         error = user_path_dir(new_root, &new);
@@ -2583,8 +2649,13 @@ static void __init init_mount_tree(void)
         struct vfsmount *mnt;
         struct mnt_namespace *ns;
         struct path root;
+       struct file_system_type *type;
  
-       mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
+       type = get_fs_type("rootfs");
+       if (!type)
+               panic("Can't find rootfs type");
+       mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
+       put_filesystem(type);
         if (IS_ERR(mnt))
                 panic("Can't create rootfs");
  
@@ -2647,7 +2718,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
         br_write_unlock(&vfsmount_lock);
         up_write(&namespace_sem);
         release_mounts(&umount_list);
-       kfree(ns);
+       free_mnt_ns(ns);
  }
  
  struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
@@ -2681,3 +2752,71 @@ bool our_mnt(struct vfsmount *mnt)
  {
         return check_mnt(real_mount(mnt));
  }
+
+static void *mntns_get(struct task_struct *task)
+{
+       struct mnt_namespace *ns = NULL;
+       struct nsproxy *nsproxy;
+
+       rcu_read_lock();
+       nsproxy = task_nsproxy(task);
+       if (nsproxy) {
+               ns = nsproxy->mnt_ns;
+               get_mnt_ns(ns);
+       }
+       rcu_read_unlock();
+
+       return ns;
+}
+
+static void mntns_put(void *ns)
+{
+       put_mnt_ns(ns);
+}
+
+static int mntns_install(struct nsproxy *nsproxy, void *ns)
+{
+       struct fs_struct *fs = current->fs;
+       struct mnt_namespace *mnt_ns = ns;
+       struct path root;
+
+       if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
+           !nsown_capable(CAP_SYS_CHROOT))
+               return -EPERM;
+
+       if (fs->users != 1)
+               return -EINVAL;
+
+       get_mnt_ns(mnt_ns);
+       put_mnt_ns(nsproxy->mnt_ns);
+       nsproxy->mnt_ns = mnt_ns;
+
+       /* Find the root */
+       root.mnt    = &mnt_ns->root->mnt;
+       root.dentry = mnt_ns->root->mnt.mnt_root;
+       path_get(&root);
+       while(d_mountpoint(root.dentry) && follow_down_one(&root))
+               ;
+
+       /* Update the pwd and root */
+       set_fs_pwd(fs, &root);
+       set_fs_root(fs, &root);
+
+       path_put(&root);
+       return 0;
+}
+
+static unsigned int mntns_inum(void *ns)
+{
+       struct mnt_namespace *mnt_ns = ns;
+       return mnt_ns->proc_inum;
+}
+
+const struct proc_ns_operations mntns_operations = {
+       .name           = "mnt",
+       .type           = CLONE_NEWNS,
+       .get            = mntns_get,
+       .put            = mntns_put,
+       .install        = mntns_install,
+       .inum           = mntns_inum,
+};
diff --git a/fs/open.c b/fs/open.c

index 59071f55bf7fe97545cc691e5dc845a05cbe8f64..182d8667b7bd57f5c3c6f738dcdeed0b18c0cbe6 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -435,7 +435,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
                 goto dput_and_out;
  
         error = -EPERM;
-       if (!capable(CAP_SYS_CHROOT))
+       if (!nsown_capable(CAP_SYS_CHROOT))
                 goto dput_and_out;
         error = security_path_chroot(&path);
         if (error)
diff --git a/fs/pnode.h b/fs/pnode.h

index 65c60979d5410f9b3aae1248e9503683c2397358..19b853a3445cb907665b4403484984a0c525af68 100644 (file)
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -22,6 +22,7 @@
  #define CL_COPY_ALL            0x04
  #define CL_MAKE_SHARED                 0x08
  #define CL_PRIVATE             0x10
+#define CL_SHARED_TO_SLAVE     0x20
  
  static inline void set_mnt_shared(struct mount *mnt)
  {
diff --git a/fs/proc/Makefile b/fs/proc/Makefile

index 99349efbbc2b53781afd3e321a9a52cd92441882..981b05601931c2036aa36f54839134766f002dfc 100644 (file)
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -21,6 +21,7 @@ proc-y        += uptime.o
  proc-y += version.o
  proc-y += softirqs.o
  proc-y += namespaces.o
+proc-y += self.o
  proc-$(CONFIG_PROC_SYSCTL)     += proc_sysctl.o
  proc-$(CONFIG_NET)             += proc_net.o
  proc-$(CONFIG_PROC_KCORE)      += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c

index d3696708fc1ae4bff76a1d5a253103e792629ae4..d66248a1919b3a3028d07ae1d4dc59f36ce57aea 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
  static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
                                 struct pid *pid, struct task_struct *p)
  {
-       struct user_namespace *user_ns = current_user_ns();
+       struct user_namespace *user_ns = seq_user_ns(m);
         struct group_info *group_info;
         int g;
         struct fdtable *fdt = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c

index aa63d25157b8d396a9a7d0f1728fe673fa577e31..5a5a0be40e405f4693bad85fcc7d04703b362523 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2345,146 +2345,6 @@ static const struct file_operations proc_coredump_filter_operations = {
  };
  #endif
  
-/*
- * /proc/self:
- */
-static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
-                             int buflen)
-{
-       struct pid_namespace *ns = dentry->d_sb->s_fs_info;
-       pid_t tgid = task_tgid_nr_ns(current, ns);
-       char tmp[PROC_NUMBUF];
-       if (!tgid)
-               return -ENOENT;
-       sprintf(tmp, "%d", tgid);
-       return vfs_readlink(dentry,buffer,buflen,tmp);
-}
-
-static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       struct pid_namespace *ns = dentry->d_sb->s_fs_info;
-       pid_t tgid = task_tgid_nr_ns(current, ns);
-       char *name = ERR_PTR(-ENOENT);
-       if (tgid) {
-               /* 11 for max length of signed int in decimal + NULL term */
-               name = kmalloc(12, GFP_KERNEL);
-               if (!name)
-                       name = ERR_PTR(-ENOMEM);
-               else
-                       sprintf(name, "%d", tgid);
-       }
-       nd_set_link(nd, name);
-       return NULL;
-}
-
-static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
-                               void *cookie)
-{
-       char *s = nd_get_link(nd);
-       if (!IS_ERR(s))
-               kfree(s);
-}
-
-static const struct inode_operations proc_self_inode_operations = {
-       .readlink       = proc_self_readlink,
-       .follow_link    = proc_self_follow_link,
-       .put_link       = proc_self_put_link,
-};
-
-/*
- * proc base
- *
- * These are the directory entries in the root directory of /proc
- * that properly belong to the /proc filesystem, as they describe
- * describe something that is process related.
- */
-static const struct pid_entry proc_base_stuff[] = {
-       NOD("self", S_IFLNK|S_IRWXUGO,
-               &proc_self_inode_operations, NULL, {}),
-};
-
-static struct dentry *proc_base_instantiate(struct inode *dir,
-       struct dentry *dentry, struct task_struct *task, const void *ptr)
-{
-       const struct pid_entry *p = ptr;
-       struct inode *inode;
-       struct proc_inode *ei;
-       struct dentry *error;
-
-       /* Allocate the inode */
-       error = ERR_PTR(-ENOMEM);
-       inode = new_inode(dir->i_sb);
-       if (!inode)
-               goto out;
-
-       /* Initialize the inode */
-       ei = PROC_I(inode);
-       inode->i_ino = get_next_ino();
-       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-
-       /*
-        * grab the reference to the task.
-        */
-       ei->pid = get_task_pid(task, PIDTYPE_PID);
-       if (!ei->pid)
-               goto out_iput;
-
-       inode->i_mode = p->mode;
-       if (S_ISDIR(inode->i_mode))
-               set_nlink(inode, 2);
-       if (S_ISLNK(inode->i_mode))
-               inode->i_size = 64;
-       if (p->iop)
-               inode->i_op = p->iop;
-       if (p->fop)
-               inode->i_fop = p->fop;
-       ei->op = p->op;
-       d_add(dentry, inode);
-       error = NULL;
-out:
-       return error;
-out_iput:
-       iput(inode);
-       goto out;
-}
-
-static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
-{
-       struct dentry *error;
-       struct task_struct *task = get_proc_task(dir);
-       const struct pid_entry *p, *last;
-
-       error = ERR_PTR(-ENOENT);
-
-       if (!task)
-               goto out_no_task;
-
-       /* Lookup the directory entry */
-       last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
-       for (p = proc_base_stuff; p <= last; p++) {
-               if (p->len != dentry->d_name.len)
-                       continue;
-               if (!memcmp(dentry->d_name.name, p->name, p->len))
-                       break;
-       }
-       if (p > last)
-               goto out;
-
-       error = proc_base_instantiate(dir, dentry, task, p);
-
-out:
-       put_task_struct(task);
-out_no_task:
-       return error;
-}
-
-static int proc_base_fill_cache(struct file *filp, void *dirent,
-       filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
-{
-       return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
-                               proc_base_instantiate, task, p);
-}
-
  #ifdef CONFIG_TASK_IO_ACCOUNTING
  static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
  {
@@ -2839,10 +2699,6 @@ void proc_flush_task(struct task_struct *task)
                 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
                                         tgid->numbers[i].nr);
         }
-
-       upid = &pid->numbers[pid->level];
-       if (upid->nr == 1)
-               pid_ns_release_proc(upid->ns);
  }
  
  static struct dentry *proc_pid_instantiate(struct inode *dir,
@@ -2876,15 +2732,11 @@ out:
  
  struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
  {
-       struct dentry *result;
+       struct dentry *result = NULL;
         struct task_struct *task;
         unsigned tgid;
         struct pid_namespace *ns;
  
-       result = proc_base_lookup(dir, dentry);
-       if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
-               goto out;
-
         tgid = name_to_int(dentry);
         if (tgid == ~0U)
                 goto out;
@@ -2947,7 +2799,7 @@ retry:
         return iter;
  }
  
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
  
  static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
         struct tgid_iter iter)
@@ -2967,25 +2819,12 @@ static int fake_filldir(void *buf, const char *name, int namelen,
  /* for the /proc/ directory itself, after non-process stuff has been done */
  int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
  {
-       unsigned int nr;
-       struct task_struct *reaper;
         struct tgid_iter iter;
         struct pid_namespace *ns;
         filldir_t __filldir;
  
         if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
-               goto out_no_task;
-       nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-
-       reaper = get_proc_task(filp->f_path.dentry->d_inode);
-       if (!reaper)
-               goto out_no_task;
-
-       for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
-               const struct pid_entry *p = &proc_base_stuff[nr];
-               if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
-                       goto out;
-       }
+               goto out;
  
         ns = filp->f_dentry->d_sb->s_fs_info;
         iter.task = NULL;
@@ -3006,8 +2845,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
         }
         filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
  out:
-       put_task_struct(reaper);
-out_no_task:
         return 0;
  }
  
diff --git a/fs/proc/generic.c b/fs/proc/generic.c

index 0d80cef4cfb93ea5bbd423b6cf887039f15b093a..7b3ae3cc0ef9ae34da5ece6dd63d550042e4b1b5 100644 (file)
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
   * Return an inode number between PROC_DYNAMIC_FIRST and
   * 0xffffffff, or zero on failure.
   */
-static unsigned int get_inode_number(void)
+int proc_alloc_inum(unsigned int *inum)
  {
         unsigned int i;
         int error;
  
  retry:
-       if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
-               return 0;
+       if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
+               return -ENOMEM;
  
         spin_lock(&proc_inum_lock);
         error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
         if (error == -EAGAIN)
                 goto retry;
         else if (error)
-               return 0;
+               return error;
  
         if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
                 spin_lock(&proc_inum_lock);
                 ida_remove(&proc_inum_ida, i);
                 spin_unlock(&proc_inum_lock);
-               return 0;
+               return -ENOSPC;
         }
-       return PROC_DYNAMIC_FIRST + i;
+       *inum = PROC_DYNAMIC_FIRST + i;
+       return 0;
  }
  
-static void release_inode_number(unsigned int inum)
+void proc_free_inum(unsigned int inum)
  {
         spin_lock(&proc_inum_lock);
         ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
  
  static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
  {
-       unsigned int i;
         struct proc_dir_entry *tmp;
+       int ret;
         
-       i = get_inode_number();
-       if (i == 0)
-               return -EAGAIN;
-       dp->low_ino = i;
+       ret = proc_alloc_inum(&dp->low_ino);
+       if (ret)
+               return ret;
  
         if (S_ISDIR(dp->mode)) {
                 if (dp->proc_iops == NULL) {
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data);
  
  static void free_proc_entry(struct proc_dir_entry *de)
  {
-       release_inode_number(de->low_ino);
+       proc_free_inum(de->low_ino);
  
         if (S_ISLNK(de->mode))
                 kfree(de->data);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c

index 3b22bbdee9ec6e8bb9a8b6f19d19bc508e9d2879..439ae688650739f173499a635d578c99f104cebf 100644 (file)
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode)
         struct proc_dir_entry *de;
         struct ctl_table_header *head;
         const struct proc_ns_operations *ns_ops;
+       void *ns;
  
         truncate_inode_pages(&inode->i_data, 0);
         clear_inode(inode);
@@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode)
         }
         /* Release any associated namespace */
         ns_ops = PROC_I(inode)->ns_ops;
-       if (ns_ops && ns_ops->put)
-               ns_ops->put(PROC_I(inode)->ns);
+       ns = PROC_I(inode)->ns;
+       if (ns_ops && ns)
+               ns_ops->put(ns);
  }
  
  static struct kmem_cache * proc_inode_cachep;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h

index 43973b084abf25649fdb12e16ad3d169c0e29c9e..252544c05207903559e9382b6603925a2b7db102 100644 (file)
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -15,6 +15,7 @@ struct  ctl_table_header;
  struct  mempolicy;
  
  extern struct proc_dir_entry proc_root;
+extern void proc_self_init(void);
  #ifdef CONFIG_PROC_SYSCTL
  extern int proc_sys_init(void);
  extern void sysctl_head_put(struct ctl_table_header *head);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c

index b178ed733c3698a0ad2fcf49def0727b9fc135f6..b7a47196c8c3577e9cda96125a0157444128a7be 100644 (file)
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -11,6 +11,7 @@
  #include <net/net_namespace.h>
  #include <linux/ipc_namespace.h>
  #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
  #include "internal.h"
  
  
@@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = {
  #ifdef CONFIG_IPC_NS
         &ipcns_operations,
  #endif
+#ifdef CONFIG_PID_NS
+       &pidns_operations,
+#endif
+#ifdef CONFIG_USER_NS
+       &userns_operations,
+#endif
+       &mntns_operations,
  };
  
  static const struct file_operations ns_file_operations = {
         .llseek         = no_llseek,
  };
  
+static const struct inode_operations ns_inode_operations = {
+       .setattr        = proc_setattr,
+};
+
+static int ns_delete_dentry(const struct dentry *dentry)
+{
+       /* Don't cache namespace inodes when not in use */
+       return 1;
+}
+
+static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+       struct inode *inode = dentry->d_inode;
+       const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
+
+       return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
+               ns_ops->name, inode->i_ino);
+}
+
+const struct dentry_operations ns_dentry_operations =
+{
+       .d_delete       = ns_delete_dentry,
+       .d_dname        = ns_dname,
+};
+
+static struct dentry *proc_ns_get_dentry(struct super_block *sb,
+       struct task_struct *task, const struct proc_ns_operations *ns_ops)
+{
+       struct dentry *dentry, *result;
+       struct inode *inode;
+       struct proc_inode *ei;
+       struct qstr qname = { .name = "", };
+       void *ns;
+
+       ns = ns_ops->get(task);
+       if (!ns)
+               return ERR_PTR(-ENOENT);
+
+       dentry = d_alloc_pseudo(sb, &qname);
+       if (!dentry) {
+               ns_ops->put(ns);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       inode = iget_locked(sb, ns_ops->inum(ns));
+       if (!inode) {
+               dput(dentry);
+               ns_ops->put(ns);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       ei = PROC_I(inode);
+       if (inode->i_state & I_NEW) {
+               inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+               inode->i_op = &ns_inode_operations;
+               inode->i_mode = S_IFREG | S_IRUGO;
+               inode->i_fop = &ns_file_operations;
+               ei->ns_ops = ns_ops;
+               ei->ns = ns;
+               unlock_new_inode(inode);
+       } else {
+               ns_ops->put(ns);
+       }
+
+       d_set_d_op(dentry, &ns_dentry_operations);
+       result = d_instantiate_unique(dentry, inode);
+       if (result) {
+               dput(dentry);
+               dentry = result;
+       }
+
+       return dentry;
+}
+
+static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+       struct inode *inode = dentry->d_inode;
+       struct super_block *sb = inode->i_sb;
+       struct proc_inode *ei = PROC_I(inode);
+       struct task_struct *task;
+       struct dentry *ns_dentry;
+       void *error = ERR_PTR(-EACCES);
+
+       task = get_proc_task(inode);
+       if (!task)
+               goto out;
+
+       if (!ptrace_may_access(task, PTRACE_MODE_READ))
+               goto out_put_task;
+
+       ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
+       if (IS_ERR(ns_dentry)) {
+               error = ERR_CAST(ns_dentry);
+               goto out_put_task;
+       }
+
+       dput(nd->path.dentry);
+       nd->path.dentry = ns_dentry;
+       error = NULL;
+
+out_put_task:
+       put_task_struct(task);
+out:
+       return error;
+}
+
+static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+       struct inode *inode = dentry->d_inode;
+       struct proc_inode *ei = PROC_I(inode);
+       const struct proc_ns_operations *ns_ops = ei->ns_ops;
+       struct task_struct *task;
+       void *ns;
+       char name[50];
+       int len = -EACCES;
+
+       task = get_proc_task(inode);
+       if (!task)
+               goto out;
+
+       if (!ptrace_may_access(task, PTRACE_MODE_READ))
+               goto out_put_task;
+
+       len = -ENOENT;
+       ns = ns_ops->get(task);
+       if (!ns)
+               goto out_put_task;
+
+       snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
+       len = strlen(name);
+
+       if (len > buflen)
+               len = buflen;
+       if (copy_to_user(buffer, name, len))
+               len = -EFAULT;
+
+       ns_ops->put(ns);
+out_put_task:
+       put_task_struct(task);
+out:
+       return len;
+}
+
+static const struct inode_operations proc_ns_link_inode_operations = {
+       .readlink       = proc_ns_readlink,
+       .follow_link    = proc_ns_follow_link,
+       .setattr        = proc_setattr,
+};
+
  static struct dentry *proc_ns_instantiate(struct inode *dir,
         struct dentry *dentry, struct task_struct *task, const void *ptr)
  {
@@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
         struct inode *inode;
         struct proc_inode *ei;
         struct dentry *error = ERR_PTR(-ENOENT);
-       void *ns;
  
         inode = proc_pid_make_inode(dir->i_sb, task);
         if (!inode)
                 goto out;
  
-       ns = ns_ops->get(task);
-       if (!ns)
-               goto out_iput;
-
         ei = PROC_I(inode);
-       inode->i_mode = S_IFREG|S_IRUSR;
-       inode->i_fop  = &ns_file_operations;
-       ei->ns_ops    = ns_ops;
-       ei->ns        = ns;
+       inode->i_mode = S_IFLNK|S_IRWXUGO;
+       inode->i_op = &proc_ns_link_inode_operations;
+       ei->ns_ops = ns_ops;
  
         d_set_d_op(dentry, &pid_dentry_operations);
         d_add(dentry, inode);
@@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
                 error = NULL;
  out:
         return error;
-out_iput:
-       iput(inode);
-       goto out;
  }
  
  static int proc_ns_fill_cache(struct file *filp, void *dirent,
@@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent,
         if (!task)
                 goto out_no_task;
  
-       ret = -EPERM;
-       if (!ptrace_may_access(task, PTRACE_MODE_READ))
-               goto out;
-
         ret = 0;
         i = filp->f_pos;
         switch (i) {
@@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
         if (!task)
                 goto out_no_task;
  
-       error = ERR_PTR(-EPERM);
-       if (!ptrace_may_access(task, PTRACE_MODE_READ))
-               goto out;
-
         last = &ns_entries[ARRAY_SIZE(ns_entries)];
         for (entry = ns_entries; entry < last; entry++) {
                 if (strlen((*entry)->name) != len)
@@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
                 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
                         break;
         }
-       error = ERR_PTR(-ENOENT);
         if (entry == last)
                 goto out;
  
@@ -198,3 +337,7 @@ out_invalid:
         return ERR_PTR(-EINVAL);
  }
  
+bool proc_ns_inode(struct inode *inode)
+{
+       return inode->i_fop == &ns_file_operations;
+}
diff --git a/fs/proc/root.c b/fs/proc/root.c

index 9889a92d2e01773113a5c7db29975cb47d7dcb1d..c6e9fac26bace4e9b63bd57dce624589dc67dfd7 100644 (file)
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -100,14 +100,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
         int err;
         struct super_block *sb;
         struct pid_namespace *ns;
-       struct proc_inode *ei;
         char *options;
  
         if (flags & MS_KERNMOUNT) {
                 ns = (struct pid_namespace *)data;
                 options = NULL;
         } else {
-               ns = current->nsproxy->pid_ns;
+               ns = task_active_pid_ns(current);
                 options = data;
         }
  
@@ -130,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
                 sb->s_flags |= MS_ACTIVE;
         }
  
-       ei = PROC_I(sb->s_root->d_inode);
-       if (!ei->pid) {
-               rcu_read_lock();
-               ei->pid = get_pid(find_pid_ns(1, ns));
-               rcu_read_unlock();
-       }
-
         return dget(sb->s_root);
  }
  
@@ -153,6 +145,7 @@ static struct file_system_type proc_fs_type = {
         .name           = "proc",
         .mount          = proc_mount,
         .kill_sb        = proc_kill_sb,
+       .fs_flags       = FS_USERNS_MOUNT,
  };
  
  void __init proc_root_init(void)
@@ -163,12 +156,8 @@ void __init proc_root_init(void)
         err = register_filesystem(&proc_fs_type);
         if (err)
                 return;
-       err = pid_ns_prepare_proc(&init_pid_ns);
-       if (err) {
-               unregister_filesystem(&proc_fs_type);
-               return;
-       }
  
+       proc_self_init();
         proc_symlink("mounts", NULL, "self/mounts");
  
         proc_net_init();
diff --git a/fs/proc/self.c b/fs/proc/self.c

new file mode 100644 (file)

index 0000000..aa5cc3b
--- /dev/null
+++ b/fs/proc/self.c
@@ -0,0 +1,59 @@
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+
+/*
+ * /proc/self:
+ */
+static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
+                             int buflen)
+{
+       struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+       pid_t tgid = task_tgid_nr_ns(current, ns);
+       char tmp[PROC_NUMBUF];
+       if (!tgid)
+               return -ENOENT;
+       sprintf(tmp, "%d", tgid);
+       return vfs_readlink(dentry,buffer,buflen,tmp);
+}
+
+static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+       struct pid_namespace *ns = dentry->d_sb->s_fs_info;
+       pid_t tgid = task_tgid_nr_ns(current, ns);
+       char *name = ERR_PTR(-ENOENT);
+       if (tgid) {
+               /* 11 for max length of signed int in decimal + NULL term */
+               name = kmalloc(12, GFP_KERNEL);
+               if (!name)
+                       name = ERR_PTR(-ENOMEM);
+               else
+                       sprintf(name, "%d", tgid);
+       }
+       nd_set_link(nd, name);
+       return NULL;
+}
+
+static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
+                               void *cookie)
+{
+       char *s = nd_get_link(nd);
+       if (!IS_ERR(s))
+               kfree(s);
+}
+
+static const struct inode_operations proc_self_inode_operations = {
+       .readlink       = proc_self_readlink,
+       .follow_link    = proc_self_follow_link,
+       .put_link       = proc_self_put_link,
+};
+
+void __init proc_self_init(void)
+{
+       struct proc_dir_entry *proc_self_symlink;
+       mode_t mode;
+
+       mode = S_IFLNK | S_IRWXUGO;
+       proc_self_symlink = proc_create("self", mode, NULL, NULL );
+       proc_self_symlink->proc_iops = &proc_self_inode_operations;
+}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c

index 71eb7e2539274a5cacd1fe61ba0bca46db8381b4..db940a9be0458216b6b54df956d89a20b32f16ee 100644 (file)
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -149,6 +149,7 @@ static struct file_system_type sysfs_fs_type = {
         .name           = "sysfs",
         .mount          = sysfs_mount,
         .kill_sb        = sysfs_kill_sb,
+       .fs_flags       = FS_USERNS_MOUNT,
  };
  
  int __init sysfs_init(void)
diff --git a/include/linux/cred.h b/include/linux/cred.h

index 0142aacb70b7049583a1618c735c7410a6827d65..abb2cd50f6b26ace7da7a554527087d4685d3e76 100644 (file)
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -344,10 +344,8 @@ static inline void put_cred(const struct cred *_cred)
  extern struct user_namespace init_user_ns;
  #ifdef CONFIG_USER_NS
  #define current_user_ns()      (current_cred_xxx(user_ns))
-#define task_user_ns(task)     (task_cred_xxx((task), user_ns))
  #else
  #define current_user_ns()      (&init_user_ns)
-#define task_user_ns(task)     (&init_user_ns)
  #endif
  
  
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 408fb1e77a0a36804363d49d804e3ea3d3a802ab..035521b46528ace428c7f07d4aa49ac4880e145e 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1810,6 +1810,8 @@ struct file_system_type {
  #define FS_REQUIRES_DEV                1 
  #define FS_BINARY_MOUNTDATA    2
  #define FS_HAS_SUBTYPE         4
+#define FS_USERNS_MOUNT                8       /* Can be mounted by userns root */
+#define FS_USERNS_DEV_MOUNT    16 /* A userns mount does not imply MNT_NODEV */
  #define FS_REVAL_DOT           16384   /* Check the paths ".", ".." for staleness */
  #define FS_RENAME_DOES_D_MOVE  32768   /* FS will handle d_move() during rename() internally. */
         struct dentry *(*mount) (struct file_system_type *, int,
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h

index 5499c92a91539afcc0987d49fe6477acad2d16e4..fe771978e87759000562162835f32fa66b4b4571 100644 (file)
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -67,6 +67,8 @@ struct ipc_namespace {
  
         /* user_ns which owns the ipc ns */
         struct user_namespace *user_ns;
+
+       unsigned int    proc_inum;
  };
  
  extern struct ipc_namespace init_ipc_ns;
@@ -133,7 +135,8 @@ static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
  
  #if defined(CONFIG_IPC_NS)
  extern struct ipc_namespace *copy_ipcs(unsigned long flags,
-                                      struct task_struct *tsk);
+       struct user_namespace *user_ns, struct ipc_namespace *ns);
+
  static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
  {
         if (ns)
@@ -144,12 +147,12 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
  extern void put_ipc_ns(struct ipc_namespace *ns);
  #else
  static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
-                                             struct task_struct *tsk)
+       struct user_namespace *user_ns, struct ipc_namespace *ns)
  {
         if (flags & CLONE_NEWIPC)
                 return ERR_PTR(-EINVAL);
  
-       return tsk->nsproxy->ipc_ns;
+       return ns;
  }
  
  static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h

index 5a8e3903d7707f32490ee607766ccf8d4882c8cf..12b2ab51032317357c9ca49221f0af864668af0d 100644 (file)
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -4,9 +4,10 @@
  
  struct mnt_namespace;
  struct fs_struct;
+struct user_namespace;
  
  extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
-               struct fs_struct *);
+               struct user_namespace *, struct fs_struct *);
  extern void put_mnt_ns(struct mnt_namespace *ns);
  
  extern const struct file_operations proc_mounts_operations;
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h

index cc37a55ad004391597661e13071f3c6e1c708c19..10e5947491c7b9b54f423ab567f9948be021eb6f 100644 (file)
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -67,7 +67,7 @@ void exit_task_namespaces(struct task_struct *tsk);
  void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
  void free_nsproxy(struct nsproxy *ns);
  int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
-       struct fs_struct *);
+       struct cred *, struct fs_struct *);
  int __init nsproxy_cache_init(void);
  
  static inline void put_nsproxy(struct nsproxy *ns)
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h

index 65e3e87eacc59aab1d95a8bd1a14a2e61f29c1f3..bf285999273a6bce0da2db81c007b643883f37ff 100644 (file)
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -21,6 +21,7 @@ struct pid_namespace {
         struct kref kref;
         struct pidmap pidmap[PIDMAP_ENTRIES];
         int last_pid;
+       int nr_hashed;
         struct task_struct *child_reaper;
         struct kmem_cache *pid_cachep;
         unsigned int level;
@@ -31,9 +32,12 @@ struct pid_namespace {
  #ifdef CONFIG_BSD_PROCESS_ACCT
         struct bsd_acct_struct *bacct;
  #endif
+       struct user_namespace *user_ns;
+       struct work_struct proc_work;
         kgid_t pid_gid;
         int hide_pid;
         int reboot;     /* group exit code if this pidns was rebooted */
+       unsigned int proc_inum;
  };
  
  extern struct pid_namespace init_pid_ns;
@@ -46,7 +50,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
         return ns;
  }
  
-extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
+extern struct pid_namespace *copy_pid_ns(unsigned long flags,
+       struct user_namespace *user_ns, struct pid_namespace *ns);
  extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
  extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
  extern void put_pid_ns(struct pid_namespace *ns);
@@ -59,8 +64,8 @@ static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
         return ns;
  }
  
-static inline struct pid_namespace *
-copy_pid_ns(unsigned long flags, struct pid_namespace *ns)
+static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
+       struct user_namespace *user_ns, struct pid_namespace *ns)
  {
         if (flags & CLONE_NEWPID)
                 ns = ERR_PTR(-EINVAL);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h

index 3fd2e871ff1bfbd23fae57c536d8b247c19ef131..2e24018b7cecd9fd7a3e7e3090bb9a0d61c53a9d 100644 (file)
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -28,7 +28,11 @@ struct mm_struct;
   */
  
  enum {
-       PROC_ROOT_INO = 1,
+       PROC_ROOT_INO           = 1,
+       PROC_IPC_INIT_INO       = 0xEFFFFFFFU,
+       PROC_UTS_INIT_INO       = 0xEFFFFFFEU,
+       PROC_USER_INIT_INO      = 0xEFFFFFFDU,
+       PROC_PID_INIT_INO       = 0xEFFFFFFCU,
  };
  
  /*
@@ -174,7 +178,10 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
         struct proc_dir_entry *parent);
  
  extern struct file *proc_ns_fget(int fd);
+extern bool proc_ns_inode(struct inode *inode);
  
+extern int proc_alloc_inum(unsigned int *pino);
+extern void proc_free_inum(unsigned int inum);
  #else
  
  #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
@@ -229,6 +236,19 @@ static inline struct file *proc_ns_fget(int fd)
         return ERR_PTR(-EINVAL);
  }
  
+static inline bool proc_ns_inode(struct inode *inode)
+{
+       return false;
+}
+
+static inline int proc_alloc_inum(unsigned int *inum)
+{
+       *inum = 1;
+       return 0;
+}
+static inline void proc_free_inum(unsigned int inum)
+{
+}
  #endif /* CONFIG_PROC_FS */
  
  #if !defined(CONFIG_PROC_KCORE)
@@ -247,10 +267,14 @@ struct proc_ns_operations {
         void *(*get)(struct task_struct *task);
         void (*put)(void *ns);
         int (*install)(struct nsproxy *nsproxy, void *ns);
+       unsigned int (*inum)(void *ns);
  };
  extern const struct proc_ns_operations netns_operations;
  extern const struct proc_ns_operations utsns_operations;
  extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations mntns_operations;
  
  union proc_op {
         int (*proc_get_link)(struct dentry *, struct path *);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h

index 95142cae446a7205e9887029a7433eff1b147d81..b9bd2e6c73ccb51ec8f4fadec473d64bcb689fea 100644 (file)
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -25,6 +25,7 @@ struct user_namespace {
         struct user_namespace   *parent;
         kuid_t                  owner;
         kgid_t                  group;
+       unsigned int            proc_inum;
  };
  
  extern struct user_namespace init_user_ns;
@@ -39,6 +40,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
  }
  
  extern int create_user_ns(struct cred *new);
+extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
  extern void free_user_ns(struct kref *kref);
  
  static inline void put_user_ns(struct user_namespace *ns)
@@ -66,6 +68,14 @@ static inline int create_user_ns(struct cred *new)
         return -EINVAL;
  }
  
+static inline int unshare_userns(unsigned long unshare_flags,
+                                struct cred **new_cred)
+{
+       if (unshare_flags & CLONE_NEWUSER)
+               return -EINVAL;
+       return 0;
+}
+
  static inline void put_user_ns(struct user_namespace *ns)
  {
  }
diff --git a/include/linux/utsname.h b/include/linux/utsname.h

index 2b345206722a2ff374bf690a84be4a20a2ffabfe..239e27733d6ccd42482c0cda35e28ec052ef730e 100644 (file)
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -23,6 +23,7 @@ struct uts_namespace {
         struct kref kref;
         struct new_utsname name;
         struct user_namespace *user_ns;
+       unsigned int proc_inum;
  };
  extern struct uts_namespace init_uts_ns;
  
@@ -33,7 +34,7 @@ static inline void get_uts_ns(struct uts_namespace *ns)
  }
  
  extern struct uts_namespace *copy_utsname(unsigned long flags,
-                                         struct task_struct *tsk);
+       struct user_namespace *user_ns, struct uts_namespace *old_ns);
  extern void free_uts_ns(struct kref *kref);
  
  static inline void put_uts_ns(struct uts_namespace *ns)
@@ -50,12 +51,12 @@ static inline void put_uts_ns(struct uts_namespace *ns)
  }
  
  static inline struct uts_namespace *copy_utsname(unsigned long flags,
-                                                struct task_struct *tsk)
+       struct user_namespace *user_ns, struct uts_namespace *old_ns)
  {
         if (flags & CLONE_NEWUTS)
                 return ERR_PTR(-EINVAL);
  
-       return tsk->nsproxy->uts_ns;
+       return old_ns;
  }
  #endif
  
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h

index c5a43f56b79690104c94811bfa8f83719bf1aaab..de644bcd861343961d2ef6de675250416b98725b 100644 (file)
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -56,6 +56,8 @@ struct net {
  
         struct user_namespace   *user_ns;       /* Owning user namespace */
  
+       unsigned int            proc_inum;
+
         struct proc_dir_entry   *proc_net;
         struct proc_dir_entry   *proc_net_stat;
  
diff --git a/init/Kconfig b/init/Kconfig

index 1a207efca5918d8ba97a8f9abffcc65527f3da2d..675d8a2326cf29fc3c758e6a4533e98d40aa6aa1 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1069,11 +1069,9 @@ config UIDGID_CONVERTED
         # Filesystems
         depends on 9P_FS = n
         depends on AFS_FS = n
-       depends on AUTOFS4_FS = n
         depends on CEPH_FS = n
         depends on CIFS = n
         depends on CODA_FS = n
-       depends on FUSE_FS = n
         depends on GFS2_FS = n
         depends on NCP_FS = n
         depends on NFSD = n
diff --git a/init/main.c b/init/main.c

index 63ae904a99a8eb3718f6a57ee515c12f0b60b8dc..baf1f0f5c4611eb08b3f0eae7995c5d789f8e741 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -812,7 +812,6 @@ static int __ref kernel_init(void *unused)
         system_state = SYSTEM_RUNNING;
         numa_default_policy();
  
-       current->signal->flags |= SIGNAL_UNKILLABLE;
         flush_delayed_fput();
  
         if (ramdisk_execute_command) {
diff --git a/init/version.c b/init/version.c

index 86fe0ccb997abdd2e97a920288fd2d3fbfd9736e..58170f18912d885e9fcd76b2892c4866beda577d 100644 (file)
--- a/init/version.c
+++ b/init/version.c
@@ -12,6 +12,7 @@
  #include <linux/utsname.h>
  #include <generated/utsrelease.h>
  #include <linux/version.h>
+#include <linux/proc_fs.h>
  
  #ifndef CONFIG_KALLSYMS
  #define version(a) Version_ ## a
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
                 .domainname     = UTS_DOMAINNAME,
         },
         .user_ns = &init_user_ns,
+       .proc_inum = PROC_UTS_INIT_INO,
  };
  EXPORT_SYMBOL_GPL(init_uts_ns);
  
diff --git a/ipc/msgutil.c b/ipc/msgutil.c

index 26143d377c951be9fd5e855074d431707bfd9f10..6471f1bdae96f2c6650a3c369bb77d0dc357d388 100644 (file)
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -16,6 +16,7 @@
  #include <linux/msg.h>
  #include <linux/ipc_namespace.h>
  #include <linux/utsname.h>
+#include <linux/proc_fs.h>
  #include <asm/uaccess.h>
  
  #include "util.h"
@@ -30,6 +31,7 @@ DEFINE_SPINLOCK(mq_lock);
  struct ipc_namespace init_ipc_ns = {
         .count          = ATOMIC_INIT(1),
         .user_ns = &init_user_ns,
+       .proc_inum = PROC_IPC_INIT_INO,
  };
  
  atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c

index f362298c5ce465e3585dfd2cbfdd528e29e4790d..cf3386a51de25509f15c85871d447e95feee07fe 100644 (file)
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -16,7 +16,7 @@
  
  #include "util.h"
  
-static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
+static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
                                            struct ipc_namespace *old_ns)
  {
         struct ipc_namespace *ns;
@@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
         if (ns == NULL)
                 return ERR_PTR(-ENOMEM);
  
+       err = proc_alloc_inum(&ns->proc_inum);
+       if (err) {
+               kfree(ns);
+               return ERR_PTR(err);
+       }
+
         atomic_set(&ns->count, 1);
         err = mq_init_ns(ns);
         if (err) {
+               proc_free_inum(ns->proc_inum);
                 kfree(ns);
                 return ERR_PTR(err);
         }
@@ -46,19 +53,17 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
         ipcns_notify(IPCNS_CREATED);
         register_ipcns_notifier(ns);
  
-       ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns));
+       ns->user_ns = get_user_ns(user_ns);
  
         return ns;
  }
  
  struct ipc_namespace *copy_ipcs(unsigned long flags,
-                               struct task_struct *tsk)
+       struct user_namespace *user_ns, struct ipc_namespace *ns)
  {
-       struct ipc_namespace *ns = tsk->nsproxy->ipc_ns;
-
         if (!(flags & CLONE_NEWIPC))
                 return get_ipc_ns(ns);
-       return create_ipc_ns(tsk, ns);
+       return create_ipc_ns(user_ns, ns);
  }
  
  /*
@@ -113,6 +118,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
          */
         ipcns_notify(IPCNS_REMOVED);
         put_user_ns(ns->user_ns);
+       proc_free_inum(ns->proc_inum);
         kfree(ns);
  }
  
@@ -161,8 +167,12 @@ static void ipcns_put(void *ns)
         return put_ipc_ns(ns);
  }
  
-static int ipcns_install(struct nsproxy *nsproxy, void *ns)
+static int ipcns_install(struct nsproxy *nsproxy, void *new)
  {
+       struct ipc_namespace *ns = new;
+       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+               return -EPERM;
+
         /* Ditch state from the old ipc namespace */
         exit_sem(current);
         put_ipc_ns(nsproxy->ipc_ns);
@@ -170,10 +180,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns)
         return 0;
  }
  
+static unsigned int ipcns_inum(void *vp)
+{
+       struct ipc_namespace *ns = vp;
+
+       return ns->proc_inum;
+}
+
  const struct proc_ns_operations ipcns_operations = {
         .name           = "ipc",
         .type           = CLONE_NEWIPC,
         .get            = ipcns_get,
         .put            = ipcns_put,
         .install        = ipcns_install,
+       .inum           = ipcns_inum,
  };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index f34c41bfaa37daa2b399c6387740d397703a277c..9915ffe013727d68fbe21013288b282154d576de 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3409,7 +3409,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
  {
         struct cgroup_pidlist *l;
         /* don't need task_nsproxy() if we're looking at ourself */
-       struct pid_namespace *ns = current->nsproxy->pid_ns;
+       struct pid_namespace *ns = task_active_pid_ns(current);
  
         /*
          * We can't drop the pidlist_mutex before taking the l->mutex in case
diff --git a/kernel/events/core.c b/kernel/events/core.c

index f9ff5493171d83208b140d19f8276fe3908e670b..301079d06f24ebe44081a286766436de104a3a91 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6155,7 +6155,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
  
         event->parent           = parent_event;
  
-       event->ns               = get_pid_ns(current->nsproxy->pid_ns);
+       event->ns               = get_pid_ns(task_active_pid_ns(current));
         event->id               = atomic64_inc_return(&perf_event_id);
  
         event->state            = PERF_EVENT_STATE_INACTIVE;
diff --git a/kernel/exit.c b/kernel/exit.c

index 50d2e93c36ea6ff421192e7fb0f92a3cb0df6e63..b4df21937216e1704670d89e8ef8fe8aa9aee810 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,18 +72,6 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
                 list_del_rcu(&p->tasks);
                 list_del_init(&p->sibling);
                 __this_cpu_dec(process_counts);
-               /*
-                * If we are the last child process in a pid namespace to be
-                * reaped, notify the reaper sleeping zap_pid_ns_processes().
-                */
-               if (IS_ENABLED(CONFIG_PID_NS)) {
-                       struct task_struct *parent = p->real_parent;
-
-                       if ((task_active_pid_ns(parent)->child_reaper == parent) &&
-                           list_empty(&parent->children) &&
-                           (parent->flags & PF_EXITING))
-                               wake_up_process(parent);
-               }
         }
         list_del_rcu(&p->thread_group);
  }
diff --git a/kernel/fork.c b/kernel/fork.c

index 115d6c2e4cca0dda8601efe7c3b114f3c37859a3..c36c4e301efef7c92a39b35b71a67e72cc0fb365 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1044,8 +1044,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
         atomic_set(&sig->live, 1);
         atomic_set(&sig->sigcnt, 1);
         init_waitqueue_head(&sig->wait_chldexit);
-       if (clone_flags & CLONE_NEWPID)
-               sig->flags |= SIGNAL_UNKILLABLE;
         sig->curr_target = tsk;
         init_sigpending(&sig->shared_pending);
         INIT_LIST_HEAD(&sig->posix_timers);
@@ -1438,8 +1436,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
  
                 if (thread_group_leader(p)) {
-                       if (is_child_reaper(pid))
-                               p->nsproxy->pid_ns->child_reaper = p;
+                       if (is_child_reaper(pid)) {
+                               ns_of_pid(pid)->child_reaper = p;
+                               p->signal->flags |= SIGNAL_UNKILLABLE;
+                       }
  
                         p->signal->leader_pid = pid;
                         p->signal->tty = tty_kref_get(current->signal->tty);
@@ -1473,8 +1473,6 @@ bad_fork_cleanup_io:
         if (p->io_context)
                 exit_io_context(p);
  bad_fork_cleanup_namespaces:
-       if (unlikely(clone_flags & CLONE_NEWPID))
-               pid_ns_release_proc(p->nsproxy->pid_ns);
         exit_task_namespaces(p);
  bad_fork_cleanup_mm:
         if (p->mm)
@@ -1554,15 +1552,9 @@ long do_fork(unsigned long clone_flags,
          * Do some preliminary argument and permissions checking before we
          * actually start allocating stuff
          */
-       if (clone_flags & CLONE_NEWUSER) {
-               if (clone_flags & CLONE_THREAD)
+       if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
+               if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
                         return -EINVAL;
-               /* hopefully this check will go away when userns support is
-                * complete
-                */
-               if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
-                               !capable(CAP_SETGID))
-                       return -EPERM;
         }
  
         /*
@@ -1724,7 +1716,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
  {
         if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
                                 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-                               CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+                               CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
+                               CLONE_NEWUSER|CLONE_NEWPID))
                 return -EINVAL;
         /*
          * Not implemented, but pretend it works if there is nothing to
@@ -1791,19 +1784,40 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
  {
         struct fs_struct *fs, *new_fs = NULL;
         struct files_struct *fd, *new_fd = NULL;
+       struct cred *new_cred = NULL;
         struct nsproxy *new_nsproxy = NULL;
         int do_sysvsem = 0;
         int err;
  
-       err = check_unshare_flags(unshare_flags);
-       if (err)
-               goto bad_unshare_out;
-
+       /*
+        * If unsharing a user namespace must also unshare the thread.
+        */
+       if (unshare_flags & CLONE_NEWUSER)
+               unshare_flags |= CLONE_THREAD;
+       /*
+        * If unsharing a pid namespace must also unshare the thread.
+        */
+       if (unshare_flags & CLONE_NEWPID)
+               unshare_flags |= CLONE_THREAD;
+       /*
+        * If unsharing a thread from a thread group, must also unshare vm.
+        */
+       if (unshare_flags & CLONE_THREAD)
+               unshare_flags |= CLONE_VM;
+       /*
+        * If unsharing vm, must also unshare signal handlers.
+        */
+       if (unshare_flags & CLONE_VM)
+               unshare_flags |= CLONE_SIGHAND;
         /*
          * If unsharing namespace, must also unshare filesystem information.
          */
         if (unshare_flags & CLONE_NEWNS)
                 unshare_flags |= CLONE_FS;
+
+       err = check_unshare_flags(unshare_flags);
+       if (err)
+               goto bad_unshare_out;
         /*
          * CLONE_NEWIPC must also detach from the undolist: after switching
          * to a new ipc namespace, the semaphore arrays from the old
@@ -1817,11 +1831,15 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
         err = unshare_fd(unshare_flags, &new_fd);
         if (err)
                 goto bad_unshare_cleanup_fs;
-       err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs);
+       err = unshare_userns(unshare_flags, &new_cred);
         if (err)
                 goto bad_unshare_cleanup_fd;
+       err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
+                                        new_cred, new_fs);
+       if (err)
+               goto bad_unshare_cleanup_cred;
  
-       if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
+       if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
                 if (do_sysvsem) {
                         /*
                          * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1854,11 +1872,20 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
                 }
  
                 task_unlock(current);
+
+               if (new_cred) {
+                       /* Install the new user namespace */
+                       commit_creds(new_cred);
+                       new_cred = NULL;
+               }
         }
  
         if (new_nsproxy)
                 put_nsproxy(new_nsproxy);
  
+bad_unshare_cleanup_cred:
+       if (new_cred)
+               put_cred(new_cred);
  bad_unshare_cleanup_fd:
         if (new_fd)
                 put_files_struct(new_fd);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c

index 7e1c3de1ce45f1520fc9b1ef71507b43c2f08697..78e2ecb201655575dceef87cd9f8c771547bcfb7 100644 (file)
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -57,7 +57,8 @@ static inline struct nsproxy *create_nsproxy(void)
   * leave it to the caller to do proper locking and attach it to task.
   */
  static struct nsproxy *create_new_namespaces(unsigned long flags,
-                       struct task_struct *tsk, struct fs_struct *new_fs)
+       struct task_struct *tsk, struct user_namespace *user_ns,
+       struct fs_struct *new_fs)
  {
         struct nsproxy *new_nsp;
         int err;
@@ -66,31 +67,31 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
         if (!new_nsp)
                 return ERR_PTR(-ENOMEM);
  
-       new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs);
+       new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
         if (IS_ERR(new_nsp->mnt_ns)) {
                 err = PTR_ERR(new_nsp->mnt_ns);
                 goto out_ns;
         }
  
-       new_nsp->uts_ns = copy_utsname(flags, tsk);
+       new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
         if (IS_ERR(new_nsp->uts_ns)) {
                 err = PTR_ERR(new_nsp->uts_ns);
                 goto out_uts;
         }
  
-       new_nsp->ipc_ns = copy_ipcs(flags, tsk);
+       new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
         if (IS_ERR(new_nsp->ipc_ns)) {
                 err = PTR_ERR(new_nsp->ipc_ns);
                 goto out_ipc;
         }
  
-       new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
+       new_nsp->pid_ns = copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns);
         if (IS_ERR(new_nsp->pid_ns)) {
                 err = PTR_ERR(new_nsp->pid_ns);
                 goto out_pid;
         }
  
-       new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns);
+       new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
         if (IS_ERR(new_nsp->net_ns)) {
                 err = PTR_ERR(new_nsp->net_ns);
                 goto out_net;
@@ -122,6 +123,7 @@ out_ns:
  int copy_namespaces(unsigned long flags, struct task_struct *tsk)
  {
         struct nsproxy *old_ns = tsk->nsproxy;
+       struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
         struct nsproxy *new_ns;
         int err = 0;
  
@@ -134,7 +136,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
                                 CLONE_NEWPID | CLONE_NEWNET)))
                 return 0;
  
-       if (!capable(CAP_SYS_ADMIN)) {
+       if (!ns_capable(user_ns, CAP_SYS_ADMIN)) {
                 err = -EPERM;
                 goto out;
         }
@@ -151,7 +153,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
                 goto out;
         }
  
-       new_ns = create_new_namespaces(flags, tsk, tsk->fs);
+       new_ns = create_new_namespaces(flags, tsk,
+                                      task_cred_xxx(tsk, user_ns), tsk->fs);
         if (IS_ERR(new_ns)) {
                 err = PTR_ERR(new_ns);
                 goto out;
@@ -183,19 +186,21 @@ void free_nsproxy(struct nsproxy *ns)
   * On success, returns the new nsproxy.
   */
  int unshare_nsproxy_namespaces(unsigned long unshare_flags,
-               struct nsproxy **new_nsp, struct fs_struct *new_fs)
+       struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
  {
+       struct user_namespace *user_ns;
         int err = 0;
  
         if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-                              CLONE_NEWNET)))
+                              CLONE_NEWNET | CLONE_NEWPID)))
                 return 0;
  
-       if (!capable(CAP_SYS_ADMIN))
+       user_ns = new_cred ? new_cred->user_ns : current_user_ns();
+       if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
-       *new_nsp = create_new_namespaces(unshare_flags, current,
-                               new_fs ? new_fs : current->fs);
+       *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
+                                        new_fs ? new_fs : current->fs);
         if (IS_ERR(*new_nsp)) {
                 err = PTR_ERR(*new_nsp);
                 goto out;
@@ -241,9 +246,6 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
         struct file *file;
         int err;
  
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
         file = proc_ns_fget(fd);
         if (IS_ERR(file))
                 return PTR_ERR(file);
@@ -254,7 +256,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
         if (nstype && (ops->type != nstype))
                 goto out;
  
-       new_nsproxy = create_new_namespaces(0, tsk, tsk->fs);
+       new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
         if (IS_ERR(new_nsproxy)) {
                 err = PTR_ERR(new_nsproxy);
                 goto out;
diff --git a/kernel/pid.c b/kernel/pid.c

index fd996c1ed9f891988607812abb95dc8820ab3751..3e2cf8100acc84b23b5741603c44fc908e3d5068 100644 (file)
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
  #include <linux/pid_namespace.h>
  #include <linux/init_task.h>
  #include <linux/syscalls.h>
+#include <linux/proc_fs.h>
  
  #define pid_hashfn(nr, ns)     \
         hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -78,6 +79,8 @@ struct pid_namespace init_pid_ns = {
         .last_pid = 0,
         .level = 0,
         .child_reaper = &init_task,
+       .user_ns = &init_user_ns,
+       .proc_inum = PROC_PID_INIT_INO,
  };
  EXPORT_SYMBOL_GPL(init_pid_ns);
  
@@ -269,8 +272,24 @@ void free_pid(struct pid *pid)
         unsigned long flags;
  
         spin_lock_irqsave(&pidmap_lock, flags);
-       for (i = 0; i <= pid->level; i++)
-               hlist_del_rcu(&pid->numbers[i].pid_chain);
+       for (i = 0; i <= pid->level; i++) {
+               struct upid *upid = pid->numbers + i;
+               struct pid_namespace *ns = upid->ns;
+               hlist_del_rcu(&upid->pid_chain);
+               switch(--ns->nr_hashed) {
+               case 1:
+                       /* When all that is left in the pid namespace
+                        * is the reaper wake up the reaper.  The reaper
+                        * may be sleeping in zap_pid_ns_processes().
+                        */
+                       wake_up_process(ns->child_reaper);
+                       break;
+               case 0:
+                       ns->nr_hashed = -1;
+                       schedule_work(&ns->proc_work);
+                       break;
+               }
+       }
         spin_unlock_irqrestore(&pidmap_lock, flags);
  
         for (i = 0; i <= pid->level; i++)
@@ -292,6 +311,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
                 goto out;
  
         tmp = ns;
+       pid->level = ns->level;
         for (i = ns->level; i >= 0; i--) {
                 nr = alloc_pidmap(tmp);
                 if (nr < 0)
@@ -302,22 +322,32 @@ struct pid *alloc_pid(struct pid_namespace *ns)
                 tmp = tmp->parent;
         }
  
+       if (unlikely(is_child_reaper(pid))) {
+               if (pid_ns_prepare_proc(ns))
+                       goto out_free;
+       }
+
         get_pid_ns(ns);
-       pid->level = ns->level;
         atomic_set(&pid->count, 1);
         for (type = 0; type < PIDTYPE_MAX; ++type)
                 INIT_HLIST_HEAD(&pid->tasks[type]);
  
         upid = pid->numbers + ns->level;
         spin_lock_irq(&pidmap_lock);
-       for ( ; upid >= pid->numbers; --upid)
+       if (ns->nr_hashed < 0)
+               goto out_unlock;
+       for ( ; upid >= pid->numbers; --upid) {
                 hlist_add_head_rcu(&upid->pid_chain,
                                 &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+               upid->ns->nr_hashed++;
+       }
         spin_unlock_irq(&pidmap_lock);
  
  out:
         return pid;
  
+out_unlock:
+       spin_unlock(&pidmap_lock);
  out_free:
         while (++i <= ns->level)
                 free_pidmap(pid->numbers + i);
@@ -344,7 +374,7 @@ EXPORT_SYMBOL_GPL(find_pid_ns);
  
  struct pid *find_vpid(int nr)
  {
-       return find_pid_ns(nr, current->nsproxy->pid_ns);
+       return find_pid_ns(nr, task_active_pid_ns(current));
  }
  EXPORT_SYMBOL_GPL(find_vpid);
  
@@ -428,7 +458,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
  
  struct task_struct *find_task_by_vpid(pid_t vnr)
  {
-       return find_task_by_pid_ns(vnr, current->nsproxy->pid_ns);
+       return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
  }
  
  struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
@@ -483,7 +513,7 @@ EXPORT_SYMBOL_GPL(pid_nr_ns);
  
  pid_t pid_vnr(struct pid *pid)
  {
-       return pid_nr_ns(pid, current->nsproxy->pid_ns);
+       return pid_nr_ns(pid, task_active_pid_ns(current));
  }
  EXPORT_SYMBOL_GPL(pid_vnr);
  
@@ -494,7 +524,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
  
         rcu_read_lock();
         if (!ns)
-               ns = current->nsproxy->pid_ns;
+               ns = task_active_pid_ns(current);
         if (likely(pid_alive(task))) {
                 if (type != PIDTYPE_PID)
                         task = task->group_leader;
@@ -569,6 +599,7 @@ void __init pidmap_init(void)
         /* Reserve PID 0. We never call free_pidmap(0) */
         set_bit(0, init_pid_ns.pidmap[0].page);
         atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+       init_pid_ns.nr_hashed = 1;
  
         init_pid_ns.pid_cachep = KMEM_CACHE(pid,
                         SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c

index 7b07cc0dfb75fb6b2f2f802178abf82565d9c1a4..560da0dab230aab631dab7003597815fc4d21c6b 100644 (file)
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -10,6 +10,7 @@
  
  #include <linux/pid.h>
  #include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
  #include <linux/syscalls.h>
  #include <linux/err.h>
  #include <linux/acct.h>
@@ -71,10 +72,17 @@ err_alloc:
         return NULL;
  }
  
+static void proc_cleanup_work(struct work_struct *work)
+{
+       struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work);
+       pid_ns_release_proc(ns);
+}
+
  /* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
  #define MAX_PID_NS_LEVEL 32
  
-static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)
+static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns,
+       struct pid_namespace *parent_pid_ns)
  {
         struct pid_namespace *ns;
         unsigned int level = parent_pid_ns->level + 1;
@@ -99,9 +107,15 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
         if (ns->pid_cachep == NULL)
                 goto out_free_map;
  
+       err = proc_alloc_inum(&ns->proc_inum);
+       if (err)
+               goto out_free_map;
+
         kref_init(&ns->kref);
         ns->level = level;
         ns->parent = get_pid_ns(parent_pid_ns);
+       ns->user_ns = get_user_ns(user_ns);
+       INIT_WORK(&ns->proc_work, proc_cleanup_work);
  
         set_bit(0, ns->pidmap[0].page);
         atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
@@ -109,14 +123,8 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
         for (i = 1; i < PIDMAP_ENTRIES; i++)
                 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
  
-       err = pid_ns_prepare_proc(ns);
-       if (err)
-               goto out_put_parent_pid_ns;
-
         return ns;
  
-out_put_parent_pid_ns:
-       put_pid_ns(parent_pid_ns);
  out_free_map:
         kfree(ns->pidmap[0].page);
  out_free:
@@ -129,18 +137,21 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
  {
         int i;
  
+       proc_free_inum(ns->proc_inum);
         for (i = 0; i < PIDMAP_ENTRIES; i++)
                 kfree(ns->pidmap[i].page);
+       put_user_ns(ns->user_ns);
         kmem_cache_free(pid_ns_cachep, ns);
  }
  
-struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
+struct pid_namespace *copy_pid_ns(unsigned long flags,
+       struct user_namespace *user_ns, struct pid_namespace *old_ns)
  {
         if (!(flags & CLONE_NEWPID))
                 return get_pid_ns(old_ns);
-       if (flags & (CLONE_THREAD|CLONE_PARENT))
+       if (task_active_pid_ns(current) != old_ns)
                 return ERR_PTR(-EINVAL);
-       return create_pid_namespace(old_ns);
+       return create_pid_namespace(user_ns, old_ns);
  }
  
  static void free_pid_ns(struct kref *kref)
@@ -211,22 +222,15 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
  
         /*
          * sys_wait4() above can't reap the TASK_DEAD children.
-        * Make sure they all go away, see __unhash_process().
+        * Make sure they all go away, see free_pid().
          */
         for (;;) {
-               bool need_wait = false;
-
-               read_lock(&tasklist_lock);
-               if (!list_empty(&current->children)) {
-                       __set_current_state(TASK_UNINTERRUPTIBLE);
-                       need_wait = true;
-               }
-               read_unlock(&tasklist_lock);
-
-               if (!need_wait)
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (pid_ns->nr_hashed == 1)
                         break;
                 schedule();
         }
+       __set_current_state(TASK_RUNNING);
  
         if (pid_ns->reboot)
                 current->signal->group_exit_code = pid_ns->reboot;
@@ -239,9 +243,10 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
  static int pid_ns_ctl_handler(struct ctl_table *table, int write,
                 void __user *buffer, size_t *lenp, loff_t *ppos)
  {
+       struct pid_namespace *pid_ns = task_active_pid_ns(current);
         struct ctl_table tmp = *table;
  
-       if (write && !capable(CAP_SYS_ADMIN))
+       if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
                 return -EPERM;
  
         /*
@@ -250,7 +255,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
          * it should synchronize its usage with external means.
          */
  
-       tmp.data = &current->nsproxy->pid_ns->last_pid;
+       tmp.data = &pid_ns->last_pid;
         return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
  }
  
@@ -299,6 +304,67 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
         return 0;
  }
  
+static void *pidns_get(struct task_struct *task)
+{
+       struct pid_namespace *ns;
+
+       rcu_read_lock();
+       ns = get_pid_ns(task_active_pid_ns(task));
+       rcu_read_unlock();
+
+       return ns;
+}
+
+static void pidns_put(void *ns)
+{
+       put_pid_ns(ns);
+}
+
+static int pidns_install(struct nsproxy *nsproxy, void *ns)
+{
+       struct pid_namespace *active = task_active_pid_ns(current);
+       struct pid_namespace *ancestor, *new = ns;
+
+       if (!ns_capable(new->user_ns, CAP_SYS_ADMIN))
+               return -EPERM;
+
+       /*
+        * Only allow entering the current active pid namespace
+        * or a child of the current active pid namespace.
+        *
+        * This is required for fork to return a usable pid value and
+        * this maintains the property that processes and their
+        * children can not escape their current pid namespace.
+        */
+       if (new->level < active->level)
+               return -EINVAL;
+
+       ancestor = new;
+       while (ancestor->level > active->level)
+               ancestor = ancestor->parent;
+       if (ancestor != active)
+               return -EINVAL;
+
+       put_pid_ns(nsproxy->pid_ns);
+       nsproxy->pid_ns = get_pid_ns(new);
+       return 0;
+}
+
+static unsigned int pidns_inum(void *ns)
+{
+       struct pid_namespace *pid_ns = ns;
+       return pid_ns->proc_inum;
+}
+
+const struct proc_ns_operations pidns_operations = {
+       .name           = "pid",
+       .type           = CLONE_NEWPID,
+       .get            = pidns_get,
+       .put            = pidns_put,
+       .install        = pidns_install,
+       .inum           = pidns_inum,
+};
+
  static __init int pid_namespaces_init(void)
  {
         pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c

index 1f5e55dda955544ca4a3f1f944e967f6b561bea2..7b09b88862cc8fdb78a893e3fbbd93418a5858aa 100644 (file)
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -215,8 +215,12 @@ ok:
         smp_rmb();
         if (task->mm)
                 dumpable = get_dumpable(task->mm);
-       if (!dumpable  && !ptrace_has_cap(task_user_ns(task), mode))
+       rcu_read_lock();
+       if (!dumpable && !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
+               rcu_read_unlock();
                 return -EPERM;
+       }
+       rcu_read_unlock();
  
         return security_ptrace_access_check(task, mode);
  }
@@ -280,8 +284,10 @@ static int ptrace_attach(struct task_struct *task, long request,
  
         if (seize)
                 flags |= PT_SEIZED;
-       if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
+       rcu_read_lock();
+       if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
                 flags |= PT_PTRACE_CAP;
+       rcu_read_unlock();
         task->ptrace = flags;
  
         __ptrace_link(task, current);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index c1fb82104bfbc9405d0c782799173ecd388ceb9a..257002c13bb02acad92c74347e3b38ca3bc881b1 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4097,8 +4097,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
                 goto out_free_cpus_allowed;
         }
         retval = -EPERM;
-       if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
-               goto out_unlock;
+       if (!check_same_owner(p)) {
+               rcu_read_lock();
+               if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+                       rcu_read_unlock();
+                       goto out_unlock;
+               }
+               rcu_read_unlock();
+       }
  
         retval = security_task_setscheduler(p);
         if (retval)
diff --git a/kernel/signal.c b/kernel/signal.c

index a49c7f36ceb3e595d98a437f0b3031a51cab305d..580a91e634710b6dbbc75f328c3bbef549b999cb 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1753,7 +1753,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
          * see comment in do_notify_parent() about the following 4 lines
          */
         rcu_read_lock();
-       info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
+       info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(parent));
         info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
         rcu_read_unlock();
  
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c

index 65bdcf198d4e1c3f0727176a439aca4f22800136..5a63844505015668ce88ec649f5586badf49dbbd 100644 (file)
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1344,7 +1344,7 @@ static ssize_t binary_sysctl(const int *name, int nlen,
                 goto out_putname;
         }
  
-       mnt = current->nsproxy->pid_ns->proc_mnt;
+       mnt = task_active_pid_ns(current)->proc_mnt;
         file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
         result = PTR_ERR(file);
         if (IS_ERR(file))
diff --git a/kernel/user.c b/kernel/user.c

index 750acffbe9ec5b20191607040c7ec8e840158253..33acb5e53a5ff0415ad14cae01ea318a92aab6d1 100644 (file)
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
  #include <linux/interrupt.h>
  #include <linux/export.h>
  #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
  
  /*
   * userns count is 1 for root user, 1 for init_uts_ns,
@@ -51,6 +52,7 @@ struct user_namespace init_user_ns = {
         },
         .owner = GLOBAL_ROOT_UID,
         .group = GLOBAL_ROOT_GID,
+       .proc_inum = PROC_USER_INIT_INO,
  };
  EXPORT_SYMBOL_GPL(init_user_ns);
  
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c

index 456a6b9fba34f3104bc9db64119acd9d3fdfa8df..f5975ccf9348dffd7d12f7674a47cd47495f3bdd 100644 (file)
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
  #include <linux/nsproxy.h>
  #include <linux/slab.h>
  #include <linux/user_namespace.h>
+#include <linux/proc_fs.h>
  #include <linux/highuid.h>
  #include <linux/cred.h>
  #include <linux/securebits.h>
@@ -26,6 +27,24 @@ static struct kmem_cache *user_ns_cachep __read_mostly;
  static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
                                 struct uid_gid_map *map);
  
+static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
+{
+       /* Start with the same capabilities as init but useless for doing
+        * anything as the capabilities are bound to the new user namespace.
+        */
+       cred->securebits = SECUREBITS_DEFAULT;
+       cred->cap_inheritable = CAP_EMPTY_SET;
+       cred->cap_permitted = CAP_FULL_SET;
+       cred->cap_effective = CAP_FULL_SET;
+       cred->cap_bset = CAP_FULL_SET;
+#ifdef CONFIG_KEYS
+       key_put(cred->request_key_auth);
+       cred->request_key_auth = NULL;
+#endif
+       /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
+       cred->user_ns = user_ns;
+}
+
  /*
   * Create a new user namespace, deriving the creator from the user in the
   * passed credentials, and replacing that user with the new root user for the
@@ -39,6 +58,7 @@ int create_user_ns(struct cred *new)
         struct user_namespace *ns, *parent_ns = new->user_ns;
         kuid_t owner = new->euid;
         kgid_t group = new->egid;
+       int ret;
  
         /* The creator needs a mapping in the parent user namespace
          * or else we won't be able to reasonably tell userspace who
@@ -52,38 +72,45 @@ int create_user_ns(struct cred *new)
         if (!ns)
                 return -ENOMEM;
  
+       ret = proc_alloc_inum(&ns->proc_inum);
+       if (ret) {
+               kmem_cache_free(user_ns_cachep, ns);
+               return ret;
+       }
+
         kref_init(&ns->kref);
+       /* Leave the new->user_ns reference with the new user namespace. */
         ns->parent = parent_ns;
         ns->owner = owner;
         ns->group = group;
  
-       /* Start with the same capabilities as init but useless for doing
-        * anything as the capabilities are bound to the new user namespace.
-        */
-       new->securebits = SECUREBITS_DEFAULT;
-       new->cap_inheritable = CAP_EMPTY_SET;
-       new->cap_permitted = CAP_FULL_SET;
-       new->cap_effective = CAP_FULL_SET;
-       new->cap_bset = CAP_FULL_SET;
-#ifdef CONFIG_KEYS
-       key_put(new->request_key_auth);
-       new->request_key_auth = NULL;
-#endif
-       /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
-
-       /* Leave the new->user_ns reference with the new user namespace. */
-       /* Leave the reference to our user_ns with the new cred. */
-       new->user_ns = ns;
+       set_cred_user_ns(new, ns);
  
         return 0;
  }
  
+int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
+{
+       struct cred *cred;
+
+       if (!(unshare_flags & CLONE_NEWUSER))
+               return 0;
+
+       cred = prepare_creds();
+       if (!cred)
+               return -ENOMEM;
+
+       *new_cred = cred;
+       return create_user_ns(cred);
+}
+
  void free_user_ns(struct kref *kref)
  {
         struct user_namespace *parent, *ns =
                 container_of(kref, struct user_namespace, kref);
  
         parent = ns->parent;
+       proc_free_inum(ns->proc_inum);
         kmem_cache_free(user_ns_cachep, ns);
         put_user_ns(parent);
  }
@@ -372,7 +399,7 @@ static int uid_m_show(struct seq_file *seq, void *v)
         struct user_namespace *lower_ns;
         uid_t lower;
  
-       lower_ns = current_user_ns();
+       lower_ns = seq_user_ns(seq);
         if ((lower_ns == ns) && lower_ns->parent)
                 lower_ns = lower_ns->parent;
  
@@ -393,7 +420,7 @@ static int gid_m_show(struct seq_file *seq, void *v)
         struct user_namespace *lower_ns;
         gid_t lower;
  
-       lower_ns = current_user_ns();
+       lower_ns = seq_user_ns(seq);
         if ((lower_ns == ns) && lower_ns->parent)
                 lower_ns = lower_ns->parent;
  
@@ -669,10 +696,14 @@ ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t siz
  {
         struct seq_file *seq = file->private_data;
         struct user_namespace *ns = seq->private;
+       struct user_namespace *seq_ns = seq_user_ns(seq);
  
         if (!ns->parent)
                 return -EPERM;
  
+       if ((seq_ns != ns) && (seq_ns != ns->parent))
+               return -EPERM;
+
         return map_write(file, buf, size, ppos, CAP_SETUID,
                          &ns->uid_map, &ns->parent->uid_map);
  }
@@ -681,10 +712,14 @@ ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t siz
  {
         struct seq_file *seq = file->private_data;
         struct user_namespace *ns = seq->private;
+       struct user_namespace *seq_ns = seq_user_ns(seq);
  
         if (!ns->parent)
                 return -EPERM;
  
+       if ((seq_ns != ns) && (seq_ns != ns->parent))
+               return -EPERM;
+
         return map_write(file, buf, size, ppos, CAP_SETGID,
                          &ns->gid_map, &ns->parent->gid_map);
  }
@@ -709,6 +744,21 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t
  static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
                                 struct uid_gid_map *new_map)
  {
+       /* Allow mapping to your own filesystem ids */
+       if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+               u32 id = new_map->extent[0].lower_first;
+               if (cap_setid == CAP_SETUID) {
+                       kuid_t uid = make_kuid(ns->parent, id);
+                       if (uid_eq(uid, current_fsuid()))
+                               return true;
+               }
+               else if (cap_setid == CAP_SETGID) {
+                       kgid_t gid = make_kgid(ns->parent, id);
+                       if (gid_eq(gid, current_fsgid()))
+                               return true;
+               }
+       }
+
         /* Allow anyone to set a mapping that doesn't require privilege */
         if (!cap_valid(cap_setid))
                 return true;
@@ -722,6 +772,65 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
         return false;
  }
  
+static void *userns_get(struct task_struct *task)
+{
+       struct user_namespace *user_ns;
+
+       rcu_read_lock();
+       user_ns = get_user_ns(__task_cred(task)->user_ns);
+       rcu_read_unlock();
+
+       return user_ns;
+}
+
+static void userns_put(void *ns)
+{
+       put_user_ns(ns);
+}
+
+static int userns_install(struct nsproxy *nsproxy, void *ns)
+{
+       struct user_namespace *user_ns = ns;
+       struct cred *cred;
+
+       /* Don't allow gaining capabilities by reentering
+        * the same user namespace.
+        */
+       if (user_ns == current_user_ns())
+               return -EINVAL;
+
+       /* Threaded many not enter a different user namespace */
+       if (atomic_read(&current->mm->mm_users) > 1)
+               return -EINVAL;
+
+       if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+               return -EPERM;
+
+       cred = prepare_creds();
+       if (!cred)
+               return -ENOMEM;
+
+       put_user_ns(cred->user_ns);
+       set_cred_user_ns(cred, get_user_ns(user_ns));
+
+       return commit_creds(cred);
+}
+
+static unsigned int userns_inum(void *ns)
+{
+       struct user_namespace *user_ns = ns;
+       return user_ns->proc_inum;
+}
+
+const struct proc_ns_operations userns_operations = {
+       .name           = "user",
+       .type           = CLONE_NEWUSER,
+       .get            = userns_get,
+       .put            = userns_put,
+       .install        = userns_install,
+       .inum           = userns_inum,
+};
+
  static __init int user_namespaces_init(void)
  {
         user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
diff --git a/kernel/utsname.c b/kernel/utsname.c

index 679d97a5d3fdf95c7f22958bc3f8276dd0657984..f6336d51d64c40a1759519ae0561c9ba07438ea8 100644 (file)
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -32,18 +32,25 @@ static struct uts_namespace *create_uts_ns(void)
   * @old_ns: namespace to clone
   * Return NULL on error (failure to kmalloc), new ns otherwise
   */
-static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
+static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
                                           struct uts_namespace *old_ns)
  {
         struct uts_namespace *ns;
+       int err;
  
         ns = create_uts_ns();
         if (!ns)
                 return ERR_PTR(-ENOMEM);
  
+       err = proc_alloc_inum(&ns->proc_inum);
+       if (err) {
+               kfree(ns);
+               return ERR_PTR(err);
+       }
+
         down_read(&uts_sem);
         memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
-       ns->user_ns = get_user_ns(task_cred_xxx(tsk, user_ns));
+       ns->user_ns = get_user_ns(user_ns);
         up_read(&uts_sem);
         return ns;
  }
@@ -55,9 +62,8 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
   * versa.
   */
  struct uts_namespace *copy_utsname(unsigned long flags,
-                                  struct task_struct *tsk)
+       struct user_namespace *user_ns, struct uts_namespace *old_ns)
  {
-       struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
         struct uts_namespace *new_ns;
  
         BUG_ON(!old_ns);
@@ -66,7 +72,7 @@ struct uts_namespace *copy_utsname(unsigned long flags,
         if (!(flags & CLONE_NEWUTS))
                 return old_ns;
  
-       new_ns = clone_uts_ns(tsk, old_ns);
+       new_ns = clone_uts_ns(user_ns, old_ns);
  
         put_uts_ns(old_ns);
         return new_ns;
@@ -78,6 +84,7 @@ void free_uts_ns(struct kref *kref)
  
         ns = container_of(kref, struct uts_namespace, kref);
         put_user_ns(ns->user_ns);
+       proc_free_inum(ns->proc_inum);
         kfree(ns);
  }
  
@@ -102,19 +109,31 @@ static void utsns_put(void *ns)
         put_uts_ns(ns);
  }
  
-static int utsns_install(struct nsproxy *nsproxy, void *ns)
+static int utsns_install(struct nsproxy *nsproxy, void *new)
  {
+       struct uts_namespace *ns = new;
+
+       if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+               return -EPERM;
+
         get_uts_ns(ns);
         put_uts_ns(nsproxy->uts_ns);
         nsproxy->uts_ns = ns;
         return 0;
  }
  
+static unsigned int utsns_inum(void *vp)
+{
+       struct uts_namespace *ns = vp;
+
+       return ns->proc_inum;
+}
+
  const struct proc_ns_operations utsns_operations = {
         .name           = "uts",
         .type           = CLONE_NEWUTS,
         .get            = utsns_get,
         .put            = utsns_put,
         .install        = utsns_install,
+       .inum           = utsns_inum,
  };
-
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c

index 6456439cbbd9e4160c05105554db665226892620..2e9a3132b8dd11ac8a9851e8042d8982af9c20ef 100644 (file)
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -381,6 +381,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
  }
  EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
  
+static __net_init int net_ns_net_init(struct net *net)
+{
+       return proc_alloc_inum(&net->proc_inum);
+}
+
+static __net_exit void net_ns_net_exit(struct net *net)
+{
+       proc_free_inum(net->proc_inum);
+}
+
+static struct pernet_operations __net_initdata net_ns_ops = {
+       .init = net_ns_net_init,
+       .exit = net_ns_net_exit,
+};
+
  static int __init net_ns_init(void)
  {
         struct net_generic *ng;
@@ -412,6 +427,8 @@ static int __init net_ns_init(void)
  
         mutex_unlock(&net_mutex);
  
+       register_pernet_subsys(&net_ns_ops);
+
         return 0;
  }
  
@@ -630,16 +647,28 @@ static void netns_put(void *ns)
  
  static int netns_install(struct nsproxy *nsproxy, void *ns)
  {
+       struct net *net = ns;
+
+       if (!ns_capable(net->user_ns, CAP_SYS_ADMIN))
+               return -EPERM;
+
         put_net(nsproxy->net_ns);
-       nsproxy->net_ns = get_net(ns);
+       nsproxy->net_ns = get_net(net);
         return 0;
  }
  
+static unsigned int netns_inum(void *ns)
+{
+       struct net *net = ns;
+       return net->proc_inum;
+}
+
  const struct proc_ns_operations netns_operations = {
         .name           = "net",
         .type           = CLONE_NEWNET,
         .get            = netns_get,
         .put            = netns_put,
         .install        = netns_install,
+       .inum           = netns_inum,
  };
  #endif
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c

index 2663145d1197a104b71f0e2feca175d21156ee00..23414b93771f30ec82ccf76b6cfb49fbed27edef 100644 (file)
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -298,14 +298,18 @@ int yama_ptrace_access_check(struct task_struct *child,
                         /* No additional restrictions. */
                         break;
                 case YAMA_SCOPE_RELATIONAL:
+                       rcu_read_lock();
                         if (!task_is_descendant(current, child) &&
                             !ptracer_exception_found(current, child) &&
-                           !ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+                           !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
                                 rc = -EPERM;
+                       rcu_read_unlock();
                         break;
                 case YAMA_SCOPE_CAPABILITY:
-                       if (!ns_capable(task_user_ns(child), CAP_SYS_PTRACE))
+                       rcu_read_lock();
+                       if (!ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE))
                                 rc = -EPERM;
+                       rcu_read_unlock();
                         break;
                 case YAMA_SCOPE_NO_ATTACH:
                 default:
@@ -343,8 +347,10 @@ int yama_ptrace_traceme(struct task_struct *parent)
         /* Only disallow PTRACE_TRACEME on more aggressive settings. */
         switch (ptrace_scope) {
         case YAMA_SCOPE_CAPABILITY:
-               if (!ns_capable(task_user_ns(parent), CAP_SYS_PTRACE))
+               rcu_read_lock();
+               if (!ns_capable(__task_cred(parent)->user_ns, CAP_SYS_PTRACE))
                         rc = -EPERM;
+               rcu_read_unlock();
                 break;
         case YAMA_SCOPE_NO_ATTACH:
                 rc = -EPERM;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 17 Dec 2012 23:44:47 +0000 (15:44 -0800)
arch/powerpc/platforms/cell/spufs/sched.c		patch \| blob \| history
arch/um/drivers/mconsole_kern.c		patch \| blob \| history
drivers/staging/android/binder.c		patch \| blob \| history
fs/attr.c		patch \| blob \| history
fs/autofs4/autofs_i.h		patch \| blob \| history
fs/autofs4/dev-ioctl.c		patch \| blob \| history
fs/autofs4/inode.c		patch \| blob \| history
fs/autofs4/waitq.c		patch \| blob \| history
fs/exec.c		patch \| blob \| history
fs/fuse/dev.c		patch \| blob \| history
fs/fuse/dir.c		patch \| blob \| history
fs/fuse/fuse_i.h		patch \| blob \| history
fs/fuse/inode.c		patch \| blob \| history
fs/hppfs/hppfs.c		patch \| blob \| history
fs/mount.h		patch \| blob \| history
fs/namespace.c		patch \| blob \| history
fs/open.c		patch \| blob \| history
fs/pnode.h		patch \| blob \| history
fs/proc/Makefile		patch \| blob \| history
fs/proc/array.c		patch \| blob \| history
fs/proc/base.c		patch \| blob \| history
fs/proc/generic.c		patch \| blob \| history
fs/proc/inode.c		patch \| blob \| history
fs/proc/internal.h		patch \| blob \| history
fs/proc/namespaces.c		patch \| blob \| history
fs/proc/root.c		patch \| blob \| history
fs/proc/self.c	[new file with mode: 0644]	patch \| blob
fs/sysfs/mount.c		patch \| blob \| history
include/linux/cred.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/ipc_namespace.h		patch \| blob \| history
include/linux/mnt_namespace.h		patch \| blob \| history
include/linux/nsproxy.h		patch \| blob \| history
include/linux/pid_namespace.h		patch \| blob \| history
include/linux/proc_fs.h		patch \| blob \| history
include/linux/user_namespace.h		patch \| blob \| history
include/linux/utsname.h		patch \| blob \| history
include/net/net_namespace.h		patch \| blob \| history
init/Kconfig		patch \| blob \| history
init/main.c		patch \| blob \| history
init/version.c		patch \| blob \| history
ipc/msgutil.c		patch \| blob \| history
ipc/namespace.c		patch \| blob \| history
kernel/cgroup.c		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history
kernel/exit.c		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
kernel/nsproxy.c		patch \| blob \| history
kernel/pid.c		patch \| blob \| history
kernel/pid_namespace.c		patch \| blob \| history
kernel/ptrace.c		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/signal.c		patch \| blob \| history
kernel/sysctl_binary.c		patch \| blob \| history
kernel/user.c		patch \| blob \| history
kernel/user_namespace.c		patch \| blob \| history
kernel/utsname.c		patch \| blob \| history
net/core/net_namespace.c		patch \| blob \| history
security/yama/yama_lsm.c		patch \| blob \| history