coredump: move core dump functionality into its own file

author Alex Kelly <alex.page.kelly@gmail.com>

Wed, 26 Sep 2012 01:34:50 +0000 (11:34 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 27 Sep 2012 07:28:42 +0000 (17:28 +1000)
author Alex Kelly <alex.page.kelly@gmail.com>
Wed, 26 Sep 2012 01:34:50 +0000 (11:34 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 27 Sep 2012 07:28:42 +0000 (17:28 +1000)
diff --git a/fs/Makefile b/fs/Makefile

index 2fb977934673812c52e2aa9e7ed0a392e5181a34..8938f8250320ecbde9ab3502141178ddd7492ad3 100644 (file)
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=      open.o read_write.o file_table.o super.o \
                 attr.o bad_inode.o file.o filesystems.o namespace.o \
                 seq_file.o xattr.o libfs.o fs-writeback.o \
                 pnode.o drop_caches.o splice.o sync.o utimes.o \
-               stack.o fs_struct.o statfs.o
+               stack.o fs_struct.o statfs.o coredump.o
  
  ifeq ($(CONFIG_BLOCK),y)
  obj-y +=       buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/coredump.c b/fs/coredump.c

new file mode 100644 (file)

index 0000000..f045bba
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,686 @@
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/swap.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/perf_event.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/key.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/tsacct_kern.h>
+#include <linux/cn_proc.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/kmod.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/compat.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/exec.h>
+
+#include <trace/events/task.h>
+#include "internal.h"
+
+#include <trace/events/sched.h>
+
+int core_uses_pid;
+char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
+
+struct core_name {
+       char *corename;
+       int used, size;
+};
+static atomic_t call_count = ATOMIC_INIT(1);
+
+/* The maximal length of core_pattern is also specified in sysctl.c */
+
+static int expand_corename(struct core_name *cn)
+{
+       char *old_corename = cn->corename;
+
+       cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+       cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+
+       if (!cn->corename) {
+               kfree(old_corename);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+       char *cur;
+       int need;
+       int ret;
+       va_list arg;
+
+       va_start(arg, fmt);
+       need = vsnprintf(NULL, 0, fmt, arg);
+       va_end(arg);
+
+       if (likely(need < cn->size - cn->used - 1))
+               goto out_printf;
+
+       ret = expand_corename(cn);
+       if (ret)
+               goto expand_fail;
+
+out_printf:
+       cur = cn->corename + cn->used;
+       va_start(arg, fmt);
+       vsnprintf(cur, need + 1, fmt, arg);
+       va_end(arg);
+       cn->used += need;
+       return 0;
+
+expand_fail:
+       return ret;
+}
+
+static void cn_escape(char *str)
+{
+       for (; *str; str++)
+               if (*str == '/')
+                       *str = '!';
+}
+
+static int cn_print_exe_file(struct core_name *cn)
+{
+       struct file *exe_file;
+       char *pathbuf, *path;
+       int ret;
+
+       exe_file = get_mm_exe_file(current->mm);
+       if (!exe_file) {
+               char *commstart = cn->corename + cn->used;
+               ret = cn_printf(cn, "%s (path unknown)", current->comm);
+               cn_escape(commstart);
+               return ret;
+       }
+
+       pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+       if (!pathbuf) {
+               ret = -ENOMEM;
+               goto put_exe_file;
+       }
+
+       path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+       if (IS_ERR(path)) {
+               ret = PTR_ERR(path);
+               goto free_buf;
+       }
+
+       cn_escape(path);
+
+       ret = cn_printf(cn, "%s", path);
+
+free_buf:
+       kfree(pathbuf);
+put_exe_file:
+       fput(exe_file);
+       return ret;
+}
+
+/* format_corename will inspect the pattern parameter, and output a
+ * name into corename, which must have space for at least
+ * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
+ */
+static int format_corename(struct core_name *cn, long signr)
+{
+       const struct cred *cred = current_cred();
+       const char *pat_ptr = core_pattern;
+       int ispipe = (*pat_ptr == '|');
+       int pid_in_pattern = 0;
+       int err = 0;
+
+       cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+       cn->corename = kmalloc(cn->size, GFP_KERNEL);
+       cn->used = 0;
+
+       if (!cn->corename)
+               return -ENOMEM;
+
+       /* Repeat as long as we have more pattern to process and more output
+          space */
+       while (*pat_ptr) {
+               if (*pat_ptr != '%') {
+                       if (*pat_ptr == 0)
+                               goto out;
+                       err = cn_printf(cn, "%c", *pat_ptr++);
+               } else {
+                       switch (*++pat_ptr) {
+                       /* single % at the end, drop that */
+                       case 0:
+                               goto out;
+                       /* Double percent, output one percent */
+                       case '%':
+                               err = cn_printf(cn, "%c", '%');
+                               break;
+                       /* pid */
+                       case 'p':
+                               pid_in_pattern = 1;
+                               err = cn_printf(cn, "%d",
+                                             task_tgid_vnr(current));
+                               break;
+                       /* uid */
+                       case 'u':
+                               err = cn_printf(cn, "%d", cred->uid);
+                               break;
+                       /* gid */
+                       case 'g':
+                               err = cn_printf(cn, "%d", cred->gid);
+                               break;
+                       /* signal that caused the coredump */
+                       case 's':
+                               err = cn_printf(cn, "%ld", signr);
+                               break;
+                       /* UNIX time of coredump */
+                       case 't': {
+                               struct timeval tv;
+                               do_gettimeofday(&tv);
+                               err = cn_printf(cn, "%lu", tv.tv_sec);
+                               break;
+                       }
+                       /* hostname */
+                       case 'h': {
+                               char *namestart = cn->corename + cn->used;
+                               down_read(&uts_sem);
+                               err = cn_printf(cn, "%s",
+                                             utsname()->nodename);
+                               up_read(&uts_sem);
+                               cn_escape(namestart);
+                               break;
+                       }
+                       /* executable */
+                       case 'e': {
+                               char *commstart = cn->corename + cn->used;
+                               err = cn_printf(cn, "%s", current->comm);
+                               cn_escape(commstart);
+                               break;
+                       }
+                       case 'E':
+                               err = cn_print_exe_file(cn);
+                               break;
+                       /* core limit size */
+                       case 'c':
+                               err = cn_printf(cn, "%lu",
+                                             rlimit(RLIMIT_CORE));
+                               break;
+                       default:
+                               break;
+                       }
+                       ++pat_ptr;
+               }
+
+               if (err)
+                       return err;
+       }
+
+       /* Backward compatibility with core_uses_pid:
+        *
+        * If core_pattern does not include a %p (as is the default)
+        * and core_uses_pid is set, then .%pid will be appended to
+        * the filename. Do not do this for piped commands. */
+       if (!ispipe && !pid_in_pattern && core_uses_pid) {
+               err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+               if (err)
+                       return err;
+       }
+out:
+       return ispipe;
+}
+
+static int zap_process(struct task_struct *start, int exit_code)
+{
+       struct task_struct *t;
+       int nr = 0;
+
+       start->signal->flags = SIGNAL_GROUP_EXIT;
+       start->signal->group_exit_code = exit_code;
+       start->signal->group_stop_count = 0;
+
+       t = start;
+       do {
+               task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
+               if (t != current && t->mm) {
+                       sigaddset(&t->pending.signal, SIGKILL);
+                       signal_wake_up(t, 1);
+                       nr++;
+               }
+       } while_each_thread(start, t);
+
+       return nr;
+}
+
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+                               struct core_state *core_state, int exit_code)
+{
+       struct task_struct *g, *p;
+       unsigned long flags;
+       int nr = -EAGAIN;
+
+       spin_lock_irq(&tsk->sighand->siglock);
+       if (!signal_group_exit(tsk->signal)) {
+               mm->core_state = core_state;
+               nr = zap_process(tsk, exit_code);
+       }
+       spin_unlock_irq(&tsk->sighand->siglock);
+       if (unlikely(nr < 0))
+               return nr;
+
+       if (atomic_read(&mm->mm_users) == nr + 1)
+               goto done;
+       /*
+        * We should find and kill all tasks which use this mm, and we should
+        * count them correctly into ->nr_threads. We don't take tasklist
+        * lock, but this is safe wrt:
+        *
+        * fork:
+        *      None of sub-threads can fork after zap_process(leader). All
+        *      processes which were created before this point should be
+        *      visible to zap_threads() because copy_process() adds the new
+        *      process to the tail of init_task.tasks list, and lock/unlock
+        *      of ->siglock provides a memory barrier.
+        *
+        * do_exit:
+        *      The caller holds mm->mmap_sem. This means that the task which
+        *      uses this mm can't pass exit_mm(), so it can't exit or clear
+        *      its ->mm.
+        *
+        * de_thread:
+        *      It does list_replace_rcu(&leader->tasks, &current->tasks),
+        *      we must see either old or new leader, this does not matter.
+        *      However, it can change p->sighand, so lock_task_sighand(p)
+        *      must be used. Since p->mm != NULL and we hold ->mmap_sem
+        *      it can't fail.
+        *
+        *      Note also that "g" can be the old leader with ->mm == NULL
+        *      and already unhashed and thus removed from ->thread_group.
+        *      This is OK, __unhash_process()->list_del_rcu() does not
+        *      clear the ->next pointer, we will find the new leader via
+        *      next_thread().
+        */
+       rcu_read_lock();
+       for_each_process(g) {
+               if (g == tsk->group_leader)
+                       continue;
+               if (g->flags & PF_KTHREAD)
+                       continue;
+               p = g;
+               do {
+                       if (p->mm) {
+                               if (unlikely(p->mm == mm)) {
+                                       lock_task_sighand(p, &flags);
+                                       nr += zap_process(p, exit_code);
+                                       unlock_task_sighand(p, &flags);
+                               }
+                               break;
+                       }
+               } while_each_thread(g, p);
+       }
+       rcu_read_unlock();
+done:
+       atomic_set(&core_state->nr_threads, nr);
+       return nr;
+}
+
+static int coredump_wait(int exit_code, struct core_state *core_state)
+{
+       struct task_struct *tsk = current;
+       struct mm_struct *mm = tsk->mm;
+       int core_waiters = -EBUSY;
+
+       init_completion(&core_state->startup);
+       core_state->dumper.task = tsk;
+       core_state->dumper.next = NULL;
+
+       down_write(&mm->mmap_sem);
+       if (!mm->core_state)
+               core_waiters = zap_threads(tsk, mm, core_state, exit_code);
+       up_write(&mm->mmap_sem);
+
+       if (core_waiters > 0) {
+               struct core_thread *ptr;
+
+               wait_for_completion(&core_state->startup);
+               /*
+                * Wait for all the threads to become inactive, so that
+                * all the thread context (extended register state, like
+                * fpu etc) gets copied to the memory.
+                */
+               ptr = core_state->dumper.next;
+               while (ptr != NULL) {
+                       wait_task_inactive(ptr->task, 0);
+                       ptr = ptr->next;
+               }
+       }
+
+       return core_waiters;
+}
+
+static void coredump_finish(struct mm_struct *mm)
+{
+       struct core_thread *curr, *next;
+       struct task_struct *task;
+
+       next = mm->core_state->dumper.next;
+       while ((curr = next) != NULL) {
+               next = curr->next;
+               task = curr->task;
+               /*
+                * see exit_mm(), curr->task must not see
+                * ->task == NULL before we read ->next.
+                */
+               smp_mb();
+               curr->task = NULL;
+               wake_up_process(task);
+       }
+
+       mm->core_state = NULL;
+}
+
+static void wait_for_dump_helpers(struct file *file)
+{
+       struct pipe_inode_info *pipe;
+
+       pipe = file->f_path.dentry->d_inode->i_pipe;
+
+       pipe_lock(pipe);
+       pipe->readers++;
+       pipe->writers--;
+
+       while ((pipe->readers > 1) && (!signal_pending(current))) {
+               wake_up_interruptible_sync(&pipe->wait);
+               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+               pipe_wait(pipe);
+       }
+
+       pipe->readers--;
+       pipe->writers++;
+       pipe_unlock(pipe);
+
+}
+
+/*
+ * umh_pipe_setup
+ * helper function to customize the process used
+ * to collect the core in userspace.  Specifically
+ * it sets up a pipe and installs it as fd 0 (stdin)
+ * for the process.  Returns 0 on success, or
+ * PTR_ERR on failure.
+ * Note that it also sets the core limit to 1.  This
+ * is a special value that we use to trap recursive
+ * core dumps
+ */
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+       struct file *files[2];
+       struct coredump_params *cp = (struct coredump_params *)info->data;
+       int err = create_pipe_files(files, 0);
+       if (err)
+               return err;
+
+       cp->file = files[1];
+
+       replace_fd(0, files[0], 0);
+       /* and disallow core files too */
+       current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+
+       return 0;
+}
+
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
+{
+       struct core_state core_state;
+       struct core_name cn;
+       struct mm_struct *mm = current->mm;
+       struct linux_binfmt * binfmt;
+       const struct cred *old_cred;
+       struct cred *cred;
+       int retval = 0;
+       int flag = 0;
+       int ispipe;
+       struct files_struct *displaced;
+       bool need_nonrelative = false;
+       static atomic_t core_dump_count = ATOMIC_INIT(0);
+       struct coredump_params cprm = {
+               .signr = signr,
+               .regs = regs,
+               .limit = rlimit(RLIMIT_CORE),
+               /*
+                * We must use the same mm->flags while dumping core to avoid
+                * inconsistency of bit flags, since this flag is not protected
+                * by any locks.
+                */
+               .mm_flags = mm->flags,
+       };
+
+       audit_core_dumps(signr);
+
+       binfmt = mm->binfmt;
+       if (!binfmt || !binfmt->core_dump)
+               goto fail;
+       if (!__get_dumpable(cprm.mm_flags))
+               goto fail;
+
+       cred = prepare_creds();
+       if (!cred)
+               goto fail;
+       /*
+        * We cannot trust fsuid as being the "true" uid of the process
+        * nor do we know its entire history. We only know it was tainted
+        * so we dump it as root in mode 2, and only into a controlled
+        * environment (pipe handler or fully qualified path).
+        */
+       if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+               /* Setuid core dump mode */
+               flag = O_EXCL;          /* Stop rewrite attacks */
+               cred->fsuid = GLOBAL_ROOT_UID;  /* Dump root private */
+               need_nonrelative = true;
+       }
+
+       retval = coredump_wait(exit_code, &core_state);
+       if (retval < 0)
+               goto fail_creds;
+
+       old_cred = override_creds(cred);
+
+       /*
+        * Clear any false indication of pending signals that might
+        * be seen by the filesystem code called to write the core file.
+        */
+       clear_thread_flag(TIF_SIGPENDING);
+
+       ispipe = format_corename(&cn, signr);
+
+       if (ispipe) {
+               int dump_count;
+               char **helper_argv;
+
+               if (ispipe < 0) {
+                       printk(KERN_WARNING "format_corename failed\n");
+                       printk(KERN_WARNING "Aborting core\n");
+                       goto fail_corename;
+               }
+
+               if (cprm.limit == 1) {
+                       /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+                        *
+                        * Normally core limits are irrelevant to pipes, since
+                        * we're not writing to the file system, but we use
+                        * cprm.limit of 1 here as a speacial value, this is a
+                        * consistent way to catch recursive crashes.
+                        * We can still crash if the core_pattern binary sets
+                        * RLIM_CORE = !1, but it runs as root, and can do
+                        * lots of stupid things.
+                        *
+                        * Note that we use task_tgid_vnr here to grab the pid
+                        * of the process group leader.  That way we get the
+                        * right pid if a thread in a multi-threaded
+                        * core_pattern process dies.
+                        */
+                       printk(KERN_WARNING
+                               "Process %d(%s) has RLIMIT_CORE set to 1\n",
+                               task_tgid_vnr(current), current->comm);
+                       printk(KERN_WARNING "Aborting core\n");
+                       goto fail_unlock;
+               }
+               cprm.limit = RLIM_INFINITY;
+
+               dump_count = atomic_inc_return(&core_dump_count);
+               if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+                       printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+                              task_tgid_vnr(current), current->comm);
+                       printk(KERN_WARNING "Skipping core dump\n");
+                       goto fail_dropcount;
+               }
+
+               helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
+               if (!helper_argv) {
+                       printk(KERN_WARNING "%s failed to allocate memory\n",
+                              __func__);
+                       goto fail_dropcount;
+               }
+
+               retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
+                                       NULL, UMH_WAIT_EXEC, umh_pipe_setup,
+                                       NULL, &cprm);
+               argv_free(helper_argv);
+               if (retval) {
+                       printk(KERN_INFO "Core dump to %s pipe failed\n",
+                              cn.corename);
+                       goto close_fail;
+               }
+       } else {
+               struct inode *inode;
+
+               if (cprm.limit < binfmt->min_coredump)
+                       goto fail_unlock;
+
+               if (need_nonrelative && cn.corename[0] != '/') {
+                       printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+                               "to fully qualified path!\n",
+                               task_tgid_vnr(current), current->comm);
+                       printk(KERN_WARNING "Skipping core dump\n");
+                       goto fail_unlock;
+               }
+
+               cprm.file = filp_open(cn.corename,
+                                O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+                                0600);
+               if (IS_ERR(cprm.file))
+                       goto fail_unlock;
+
+               inode = cprm.file->f_path.dentry->d_inode;
+               if (inode->i_nlink > 1)
+                       goto close_fail;
+               if (d_unhashed(cprm.file->f_path.dentry))
+                       goto close_fail;
+               /*
+                * AK: actually i see no reason to not allow this for named
+                * pipes etc, but keep the previous behaviour for now.
+                */
+               if (!S_ISREG(inode->i_mode))
+                       goto close_fail;
+               /*
+                * Dont allow local users get cute and trick others to coredump
+                * into their pre-created files.
+                */
+               if (!uid_eq(inode->i_uid, current_fsuid()))
+                       goto close_fail;
+               if (!cprm.file->f_op || !cprm.file->f_op->write)
+                       goto close_fail;
+               if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+                       goto close_fail;
+       }
+
+       /* get us an unshared descriptor table; almost always a no-op */
+       retval = unshare_files(&displaced);
+       if (retval)
+               goto close_fail;
+       if (displaced)
+               put_files_struct(displaced);
+       retval = binfmt->core_dump(&cprm);
+       if (retval)
+               current->signal->group_exit_code |= 0x80;
+
+       if (ispipe && core_pipe_limit)
+               wait_for_dump_helpers(cprm.file);
+close_fail:
+       if (cprm.file)
+               filp_close(cprm.file, NULL);
+fail_dropcount:
+       if (ispipe)
+               atomic_dec(&core_dump_count);
+fail_unlock:
+       kfree(cn.corename);
+fail_corename:
+       coredump_finish(mm);
+       revert_creds(old_cred);
+fail_creds:
+       put_cred(cred);
+fail:
+       return;
+}
+
+/*
+ * Core dumping helper functions.  These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+       return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+EXPORT_SYMBOL(dump_write);
+
+int dump_seek(struct file *file, loff_t off)
+{
+       int ret = 1;
+
+       if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+               if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+                       return 0;
+       } else {
+               char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+               if (!buf)
+                       return 0;
+               while (off > 0) {
+                       unsigned long n = off;
+
+                       if (n > PAGE_SIZE)
+                               n = PAGE_SIZE;
+                       if (!dump_write(file, buf, n)) {
+                               ret = 0;
+                               break;
+                       }
+                       off -= n;
+               }
+               free_page((unsigned long)buf);
+       }
+       return ret;
+}
+EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exec.c b/fs/exec.c

index 6b2e20bf8a4865c3a392c3665ebcd17e599a93b7..b459c86c0bec2099486b83ac9e3b445be40dc475 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -65,19 +65,8 @@
  
  #include <trace/events/sched.h>
  
-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
  int suid_dumpable = 0;
  
-struct core_name {
-       char *corename;
-       int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
-
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
  static LIST_HEAD(formats);
  static DEFINE_RWLOCK(binfmt_lock);
  
@@ -1602,353 +1591,6 @@ void set_binfmt(struct linux_binfmt *new)
  
  EXPORT_SYMBOL(set_binfmt);
  
-static int expand_corename(struct core_name *cn)
-{
-       char *old_corename = cn->corename;
-
-       cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
-       cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
-       if (!cn->corename) {
-               kfree(old_corename);
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
-       char *cur;
-       int need;
-       int ret;
-       va_list arg;
-
-       va_start(arg, fmt);
-       need = vsnprintf(NULL, 0, fmt, arg);
-       va_end(arg);
-
-       if (likely(need < cn->size - cn->used - 1))
-               goto out_printf;
-
-       ret = expand_corename(cn);
-       if (ret)
-               goto expand_fail;
-
-out_printf:
-       cur = cn->corename + cn->used;
-       va_start(arg, fmt);
-       vsnprintf(cur, need + 1, fmt, arg);
-       va_end(arg);
-       cn->used += need;
-       return 0;
-
-expand_fail:
-       return ret;
-}
-
-static void cn_escape(char *str)
-{
-       for (; *str; str++)
-               if (*str == '/')
-                       *str = '!';
-}
-
-static int cn_print_exe_file(struct core_name *cn)
-{
-       struct file *exe_file;
-       char *pathbuf, *path;
-       int ret;
-
-       exe_file = get_mm_exe_file(current->mm);
-       if (!exe_file) {
-               char *commstart = cn->corename + cn->used;
-               ret = cn_printf(cn, "%s (path unknown)", current->comm);
-               cn_escape(commstart);
-               return ret;
-       }
-
-       pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
-       if (!pathbuf) {
-               ret = -ENOMEM;
-               goto put_exe_file;
-       }
-
-       path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
-       if (IS_ERR(path)) {
-               ret = PTR_ERR(path);
-               goto free_buf;
-       }
-
-       cn_escape(path);
-
-       ret = cn_printf(cn, "%s", path);
-
-free_buf:
-       kfree(pathbuf);
-put_exe_file:
-       fput(exe_file);
-       return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
-       const struct cred *cred = current_cred();
-       const char *pat_ptr = core_pattern;
-       int ispipe = (*pat_ptr == '|');
-       int pid_in_pattern = 0;
-       int err = 0;
-
-       cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
-       cn->corename = kmalloc(cn->size, GFP_KERNEL);
-       cn->used = 0;
-
-       if (!cn->corename)
-               return -ENOMEM;
-
-       /* Repeat as long as we have more pattern to process and more output
-          space */
-       while (*pat_ptr) {
-               if (*pat_ptr != '%') {
-                       if (*pat_ptr == 0)
-                               goto out;
-                       err = cn_printf(cn, "%c", *pat_ptr++);
-               } else {
-                       switch (*++pat_ptr) {
-                       /* single % at the end, drop that */
-                       case 0:
-                               goto out;
-                       /* Double percent, output one percent */
-                       case '%':
-                               err = cn_printf(cn, "%c", '%');
-                               break;
-                       /* pid */
-                       case 'p':
-                               pid_in_pattern = 1;
-                               err = cn_printf(cn, "%d",
-                                             task_tgid_vnr(current));
-                               break;
-                       /* uid */
-                       case 'u':
-                               err = cn_printf(cn, "%d", cred->uid);
-                               break;
-                       /* gid */
-                       case 'g':
-                               err = cn_printf(cn, "%d", cred->gid);
-                               break;
-                       /* signal that caused the coredump */
-                       case 's':
-                               err = cn_printf(cn, "%ld", signr);
-                               break;
-                       /* UNIX time of coredump */
-                       case 't': {
-                               struct timeval tv;
-                               do_gettimeofday(&tv);
-                               err = cn_printf(cn, "%lu", tv.tv_sec);
-                               break;
-                       }
-                       /* hostname */
-                       case 'h': {
-                               char *namestart = cn->corename + cn->used;
-                               down_read(&uts_sem);
-                               err = cn_printf(cn, "%s",
-                                             utsname()->nodename);
-                               up_read(&uts_sem);
-                               cn_escape(namestart);
-                               break;
-                       }
-                       /* executable */
-                       case 'e': {
-                               char *commstart = cn->corename + cn->used;
-                               err = cn_printf(cn, "%s", current->comm);
-                               cn_escape(commstart);
-                               break;
-                       }
-                       case 'E':
-                               err = cn_print_exe_file(cn);
-                               break;
-                       /* core limit size */
-                       case 'c':
-                               err = cn_printf(cn, "%lu",
-                                             rlimit(RLIMIT_CORE));
-                               break;
-                       default:
-                               break;
-                       }
-                       ++pat_ptr;
-               }
-
-               if (err)
-                       return err;
-       }
-
-       /* Backward compatibility with core_uses_pid:
-        *
-        * If core_pattern does not include a %p (as is the default)
-        * and core_uses_pid is set, then .%pid will be appended to
-        * the filename. Do not do this for piped commands. */
-       if (!ispipe && !pid_in_pattern && core_uses_pid) {
-               err = cn_printf(cn, ".%d", task_tgid_vnr(current));
-               if (err)
-                       return err;
-       }
-out:
-       return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
-       struct task_struct *t;
-       int nr = 0;
-
-       start->signal->flags = SIGNAL_GROUP_EXIT;
-       start->signal->group_exit_code = exit_code;
-       start->signal->group_stop_count = 0;
-
-       t = start;
-       do {
-               task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
-               if (t != current && t->mm) {
-                       sigaddset(&t->pending.signal, SIGKILL);
-                       signal_wake_up(t, 1);
-                       nr++;
-               }
-       } while_each_thread(start, t);
-
-       return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
-                               struct core_state *core_state, int exit_code)
-{
-       struct task_struct *g, *p;
-       unsigned long flags;
-       int nr = -EAGAIN;
-
-       spin_lock_irq(&tsk->sighand->siglock);
-       if (!signal_group_exit(tsk->signal)) {
-               mm->core_state = core_state;
-               nr = zap_process(tsk, exit_code);
-       }
-       spin_unlock_irq(&tsk->sighand->siglock);
-       if (unlikely(nr < 0))
-               return nr;
-
-       if (atomic_read(&mm->mm_users) == nr + 1)
-               goto done;
-       /*
-        * We should find and kill all tasks which use this mm, and we should
-        * count them correctly into ->nr_threads. We don't take tasklist
-        * lock, but this is safe wrt:
-        *
-        * fork:
-        *      None of sub-threads can fork after zap_process(leader). All
-        *      processes which were created before this point should be
-        *      visible to zap_threads() because copy_process() adds the new
-        *      process to the tail of init_task.tasks list, and lock/unlock
-        *      of ->siglock provides a memory barrier.
-        *
-        * do_exit:
-        *      The caller holds mm->mmap_sem. This means that the task which
-        *      uses this mm can't pass exit_mm(), so it can't exit or clear
-        *      its ->mm.
-        *
-        * de_thread:
-        *      It does list_replace_rcu(&leader->tasks, &current->tasks),
-        *      we must see either old or new leader, this does not matter.
-        *      However, it can change p->sighand, so lock_task_sighand(p)
-        *      must be used. Since p->mm != NULL and we hold ->mmap_sem
-        *      it can't fail.
-        *
-        *      Note also that "g" can be the old leader with ->mm == NULL
-        *      and already unhashed and thus removed from ->thread_group.
-        *      This is OK, __unhash_process()->list_del_rcu() does not
-        *      clear the ->next pointer, we will find the new leader via
-        *      next_thread().
-        */
-       rcu_read_lock();
-       for_each_process(g) {
-               if (g == tsk->group_leader)
-                       continue;
-               if (g->flags & PF_KTHREAD)
-                       continue;
-               p = g;
-               do {
-                       if (p->mm) {
-                               if (unlikely(p->mm == mm)) {
-                                       lock_task_sighand(p, &flags);
-                                       nr += zap_process(p, exit_code);
-                                       unlock_task_sighand(p, &flags);
-                               }
-                               break;
-                       }
-               } while_each_thread(g, p);
-       }
-       rcu_read_unlock();
-done:
-       atomic_set(&core_state->nr_threads, nr);
-       return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
-       struct task_struct *tsk = current;
-       struct mm_struct *mm = tsk->mm;
-       int core_waiters = -EBUSY;
-
-       init_completion(&core_state->startup);
-       core_state->dumper.task = tsk;
-       core_state->dumper.next = NULL;
-
-       down_write(&mm->mmap_sem);
-       if (!mm->core_state)
-               core_waiters = zap_threads(tsk, mm, core_state, exit_code);
-       up_write(&mm->mmap_sem);
-
-       if (core_waiters > 0) {
-               struct core_thread *ptr;
-
-               wait_for_completion(&core_state->startup);
-               /*
-                * Wait for all the threads to become inactive, so that
-                * all the thread context (extended register state, like
-                * fpu etc) gets copied to the memory.
-                */
-               ptr = core_state->dumper.next;
-               while (ptr != NULL) {
-                       wait_task_inactive(ptr->task, 0);
-                       ptr = ptr->next;
-               }
-       }
-
-       return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
-       struct core_thread *curr, *next;
-       struct task_struct *task;
-
-       next = mm->core_state->dumper.next;
-       while ((curr = next) != NULL) {
-               next = curr->next;
-               task = curr->task;
-               /*
-                * see exit_mm(), curr->task must not see
-                * ->task == NULL before we read ->next.
-                */
-               smp_mb();
-               curr->task = NULL;
-               wake_up_process(task);
-       }
-
-       mm->core_state = NULL;
-}
-
  /*
   * set_dumpable converts traditional three-value dumpable to two flags and
   * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
@@ -1990,7 +1632,7 @@ void set_dumpable(struct mm_struct *mm, int value)
         }
  }
  
-static int __get_dumpable(unsigned long mm_flags)
+int __get_dumpable(unsigned long mm_flags)
  {
         int ret;
  
@@ -2003,291 +1645,6 @@ int get_dumpable(struct mm_struct *mm)
         return __get_dumpable(mm->flags);
  }
  
-static void wait_for_dump_helpers(struct file *file)
-{
-       struct pipe_inode_info *pipe;
-
-       pipe = file->f_path.dentry->d_inode->i_pipe;
-
-       pipe_lock(pipe);
-       pipe->readers++;
-       pipe->writers--;
-
-       while ((pipe->readers > 1) && (!signal_pending(current))) {
-               wake_up_interruptible_sync(&pipe->wait);
-               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
-               pipe_wait(pipe);
-       }
-
-       pipe->readers--;
-       pipe->writers++;
-       pipe_unlock(pipe);
-
-}
-
-
-/*
- * umh_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace.  Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process.  Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1.  This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
-       struct file *files[2];
-       struct coredump_params *cp = (struct coredump_params *)info->data;
-       int err = create_pipe_files(files, 0);
-       if (err)
-               return err;
-
-       cp->file = files[1];
-
-       replace_fd(0, files[0], 0);
-       /* and disallow core files too */
-       current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
-       return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
-{
-       struct core_state core_state;
-       struct core_name cn;
-       struct mm_struct *mm = current->mm;
-       struct linux_binfmt * binfmt;
-       const struct cred *old_cred;
-       struct cred *cred;
-       int retval = 0;
-       int flag = 0;
-       int ispipe;
-       struct files_struct *displaced;
-       bool need_nonrelative = false;
-       static atomic_t core_dump_count = ATOMIC_INIT(0);
-       struct coredump_params cprm = {
-               .signr = signr,
-               .regs = regs,
-               .limit = rlimit(RLIMIT_CORE),
-               /*
-                * We must use the same mm->flags while dumping core to avoid
-                * inconsistency of bit flags, since this flag is not protected
-                * by any locks.
-                */
-               .mm_flags = mm->flags,
-       };
-
-       audit_core_dumps(signr);
-
-       binfmt = mm->binfmt;
-       if (!binfmt || !binfmt->core_dump)
-               goto fail;
-       if (!__get_dumpable(cprm.mm_flags))
-               goto fail;
-
-       cred = prepare_creds();
-       if (!cred)
-               goto fail;
-       /*
-        * We cannot trust fsuid as being the "true" uid of the process
-        * nor do we know its entire history. We only know it was tainted
-        * so we dump it as root in mode 2, and only into a controlled
-        * environment (pipe handler or fully qualified path).
-        */
-       if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
-               /* Setuid core dump mode */
-               flag = O_EXCL;          /* Stop rewrite attacks */
-               cred->fsuid = GLOBAL_ROOT_UID;  /* Dump root private */
-               need_nonrelative = true;
-       }
-
-       retval = coredump_wait(exit_code, &core_state);
-       if (retval < 0)
-               goto fail_creds;
-
-       old_cred = override_creds(cred);
-
-       /*
-        * Clear any false indication of pending signals that might
-        * be seen by the filesystem code called to write the core file.
-        */
-       clear_thread_flag(TIF_SIGPENDING);
-
-       ispipe = format_corename(&cn, signr);
-
-       if (ispipe) {
-               int dump_count;
-               char **helper_argv;
-
-               if (ispipe < 0) {
-                       printk(KERN_WARNING "format_corename failed\n");
-                       printk(KERN_WARNING "Aborting core\n");
-                       goto fail_corename;
-               }
-
-               if (cprm.limit == 1) {
-                       /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
-                        *
-                        * Normally core limits are irrelevant to pipes, since
-                        * we're not writing to the file system, but we use
-                        * cprm.limit of 1 here as a speacial value, this is a
-                        * consistent way to catch recursive crashes.
-                        * We can still crash if the core_pattern binary sets
-                        * RLIM_CORE = !1, but it runs as root, and can do
-                        * lots of stupid things.
-                        *
-                        * Note that we use task_tgid_vnr here to grab the pid
-                        * of the process group leader.  That way we get the
-                        * right pid if a thread in a multi-threaded
-                        * core_pattern process dies.
-                        */
-                       printk(KERN_WARNING
-                               "Process %d(%s) has RLIMIT_CORE set to 1\n",
-                               task_tgid_vnr(current), current->comm);
-                       printk(KERN_WARNING "Aborting core\n");
-                       goto fail_unlock;
-               }
-               cprm.limit = RLIM_INFINITY;
-
-               dump_count = atomic_inc_return(&core_dump_count);
-               if (core_pipe_limit && (core_pipe_limit < dump_count)) {
-                       printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
-                              task_tgid_vnr(current), current->comm);
-                       printk(KERN_WARNING "Skipping core dump\n");
-                       goto fail_dropcount;
-               }
-
-               helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
-               if (!helper_argv) {
-                       printk(KERN_WARNING "%s failed to allocate memory\n",
-                              __func__);
-                       goto fail_dropcount;
-               }
-
-               retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
-                                       NULL, UMH_WAIT_EXEC, umh_pipe_setup,
-                                       NULL, &cprm);
-               argv_free(helper_argv);
-               if (retval) {
-                       printk(KERN_INFO "Core dump to %s pipe failed\n",
-                              cn.corename);
-                       goto close_fail;
-               }
-       } else {
-               struct inode *inode;
-
-               if (cprm.limit < binfmt->min_coredump)
-                       goto fail_unlock;
-
-               if (need_nonrelative && cn.corename[0] != '/') {
-                       printk(KERN_WARNING "Pid %d(%s) can only dump core "\
-                               "to fully qualified path!\n",
-                               task_tgid_vnr(current), current->comm);
-                       printk(KERN_WARNING "Skipping core dump\n");
-                       goto fail_unlock;
-               }
-
-               cprm.file = filp_open(cn.corename,
-                                O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
-                                0600);
-               if (IS_ERR(cprm.file))
-                       goto fail_unlock;
-
-               inode = cprm.file->f_path.dentry->d_inode;
-               if (inode->i_nlink > 1)
-                       goto close_fail;
-               if (d_unhashed(cprm.file->f_path.dentry))
-                       goto close_fail;
-               /*
-                * AK: actually i see no reason to not allow this for named
-                * pipes etc, but keep the previous behaviour for now.
-                */
-               if (!S_ISREG(inode->i_mode))
-                       goto close_fail;
-               /*
-                * Dont allow local users get cute and trick others to coredump
-                * into their pre-created files.
-                */
-               if (!uid_eq(inode->i_uid, current_fsuid()))
-                       goto close_fail;
-               if (!cprm.file->f_op || !cprm.file->f_op->write)
-                       goto close_fail;
-               if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
-                       goto close_fail;
-       }
-
-       /* get us an unshared descriptor table; almost always a no-op */
-       retval = unshare_files(&displaced);
-       if (retval)
-               goto close_fail;
-       if (displaced)
-               put_files_struct(displaced);
-       retval = binfmt->core_dump(&cprm);
-       if (retval)
-               current->signal->group_exit_code |= 0x80;
-
-       if (ispipe && core_pipe_limit)
-               wait_for_dump_helpers(cprm.file);
-close_fail:
-       if (cprm.file)
-               filp_close(cprm.file, NULL);
-fail_dropcount:
-       if (ispipe)
-               atomic_dec(&core_dump_count);
-fail_unlock:
-       kfree(cn.corename);
-fail_corename:
-       coredump_finish(mm);
-       revert_creds(old_cred);
-fail_creds:
-       put_cred(cred);
-fail:
-       return;
-}
-
-/*
- * Core dumping helper functions.  These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
-{
-       return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
-{
-       int ret = 1;
-
-       if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
-               if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
-                       return 0;
-       } else {
-               char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
-               if (!buf)
-                       return 0;
-               while (off > 0) {
-                       unsigned long n = off;
-
-                       if (n > PAGE_SIZE)
-                               n = PAGE_SIZE;
-                       if (!dump_write(file, buf, n)) {
-                               ret = 0;
-                               break;
-                       }
-                       off -= n;
-               }
-               free_page((unsigned long)buf);
-       }
-       return ret;
-}
-EXPORT_SYMBOL(dump_seek);
-
  #ifdef __ARCH_WANT_SYS_EXECVE
  SYSCALL_DEFINE3(execve,
                 const char __user *, filename,
diff --git a/include/linux/sched.h b/include/linux/sched.h

index fb7bad99a4bfe9a189f1e555eb4619074c6b46ff..f479c7f7e8d97425e8e4ab04612c777bacba548d 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -405,6 +405,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
  
  extern void set_dumpable(struct mm_struct *mm, int value);
  extern int get_dumpable(struct mm_struct *mm);
+extern int __get_dumpable(unsigned long mm_flags);
  
  /* get/set_dumpable() values */
  #define SUID_DUMPABLE_DISABLED 0
author	Alex Kelly <alex.page.kelly@gmail.com>
	Wed, 26 Sep 2012 01:34:50 +0000 (11:34 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 27 Sep 2012 07:28:42 +0000 (17:28 +1000)
fs/Makefile		patch \| blob \| history
fs/coredump.c	[new file with mode: 0644]	patch \| blob
fs/exec.c		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history