2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
42 #define user_long_t long
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51 int, int, unsigned long);
54 * If we don't support core dumping, then supply a NULL so we
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
60 #define elf_core_dump NULL
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
66 #define ELF_MIN_ALIGN PAGE_SIZE
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
77 static struct linux_binfmt elf_format = {
78 .module = THIS_MODULE,
79 .load_binary = load_elf_binary,
80 .load_shlib = load_elf_library,
81 .core_dump = elf_core_dump,
82 .min_coredump = ELF_EXEC_PAGESIZE,
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
87 static int set_brk(unsigned long start, unsigned long end)
89 start = ELF_PAGEALIGN(start);
90 end = ELF_PAGEALIGN(end);
93 addr = vm_brk(start, end - start);
97 current->mm->start_brk = current->mm->brk = end;
101 /* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
106 static int padzero(unsigned long elf_bss)
110 nbyte = ELF_PAGEOFFSET(elf_bss);
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
134 #ifndef ELF_BASE_PLATFORM
136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138 * will be copied to the user stack in the same manner as AT_PLATFORM.
140 #define ELF_BASE_PLATFORM NULL
144 * Use get_random_int() to implement AT_RANDOM while avoiding depletion
145 * of the entropy pool.
147 static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
149 unsigned char *p = buf;
152 unsigned int random_variable;
153 size_t chunk = min(nbytes, sizeof(random_variable));
155 random_variable = get_random_int();
156 memcpy(p, &random_variable, chunk);
163 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
164 unsigned long load_addr, unsigned long interp_load_addr)
166 unsigned long p = bprm->p;
167 int argc = bprm->argc;
168 int envc = bprm->envc;
169 elf_addr_t __user *argv;
170 elf_addr_t __user *envp;
171 elf_addr_t __user *sp;
172 elf_addr_t __user *u_platform;
173 elf_addr_t __user *u_base_platform;
174 elf_addr_t __user *u_rand_bytes;
175 const char *k_platform = ELF_PLATFORM;
176 const char *k_base_platform = ELF_BASE_PLATFORM;
177 unsigned char k_rand_bytes[16];
179 elf_addr_t *elf_info;
181 const struct cred *cred = current_cred();
182 struct vm_area_struct *vma;
185 * In some cases (e.g. Hyper-Threading), we want to avoid L1
186 * evictions by the processes running on the same package. One
187 * thing we can do is to shuffle the initial stack for them.
190 p = arch_align_stack(p);
193 * If this architecture has a platform capability string, copy it
194 * to userspace. In some cases (Sparc), this info is impossible
195 * for userspace to get any other way, in others (i386) it is
200 size_t len = strlen(k_platform) + 1;
202 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
203 if (__copy_to_user(u_platform, k_platform, len))
208 * If this architecture has a "base" platform capability
209 * string, copy it to userspace.
211 u_base_platform = NULL;
212 if (k_base_platform) {
213 size_t len = strlen(k_base_platform) + 1;
215 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
216 if (__copy_to_user(u_base_platform, k_base_platform, len))
221 * Generate 16 random bytes for userspace PRNG seeding.
223 get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
224 u_rand_bytes = (elf_addr_t __user *)
225 STACK_ALLOC(p, sizeof(k_rand_bytes));
226 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
229 /* Create the ELF interpreter info */
230 elf_info = (elf_addr_t *)current->mm->saved_auxv;
231 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
232 #define NEW_AUX_ENT(id, val) \
234 elf_info[ei_index++] = id; \
235 elf_info[ei_index++] = val; \
240 * ARCH_DLINFO must come first so PPC can do its special alignment of
242 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
243 * ARCH_DLINFO changes
247 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
248 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
249 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
250 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
251 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
252 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
253 NEW_AUX_ENT(AT_BASE, interp_load_addr);
254 NEW_AUX_ENT(AT_FLAGS, 0);
255 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
256 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
257 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
258 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
259 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
260 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
261 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
263 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
265 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
267 NEW_AUX_ENT(AT_PLATFORM,
268 (elf_addr_t)(unsigned long)u_platform);
270 if (k_base_platform) {
271 NEW_AUX_ENT(AT_BASE_PLATFORM,
272 (elf_addr_t)(unsigned long)u_base_platform);
274 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
275 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
278 /* AT_NULL is zero; clear the rest too */
279 memset(&elf_info[ei_index], 0,
280 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
282 /* And advance past the AT_NULL entry. */
285 sp = STACK_ADD(p, ei_index);
287 items = (argc + 1) + (envc + 1) + 1;
288 bprm->p = STACK_ROUND(sp, items);
290 /* Point sp at the lowest address on the stack */
291 #ifdef CONFIG_STACK_GROWSUP
292 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
293 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
295 sp = (elf_addr_t __user *)bprm->p;
300 * Grow the stack manually; some architectures have a limit on how
301 * far ahead a user-space access may be in order to grow the stack.
303 vma = find_extend_vma(current->mm, bprm->p);
307 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
308 if (__put_user(argc, sp++))
311 envp = argv + argc + 1;
313 /* Populate argv and envp */
314 p = current->mm->arg_end = current->mm->arg_start;
317 if (__put_user((elf_addr_t)p, argv++))
319 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320 if (!len || len > MAX_ARG_STRLEN)
324 if (__put_user(0, argv))
326 current->mm->arg_end = current->mm->env_start = p;
329 if (__put_user((elf_addr_t)p, envp++))
331 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
332 if (!len || len > MAX_ARG_STRLEN)
336 if (__put_user(0, envp))
338 current->mm->env_end = p;
340 /* Put the elf_info on the stack in the right place. */
341 sp = (elf_addr_t __user *)envp + 1;
342 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
349 static unsigned long elf_map(struct file *filep, unsigned long addr,
350 struct elf_phdr *eppnt, int prot, int type,
351 unsigned long total_size)
353 unsigned long map_addr;
354 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
355 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
356 addr = ELF_PAGESTART(addr);
357 size = ELF_PAGEALIGN(size);
359 /* mmap() will return -EINVAL if given a zero size, but a
360 * segment with zero filesize is perfectly valid */
365 * total_size is the size of the ELF (interpreter) image.
366 * The _first_ mmap needs to know the full size, otherwise
367 * randomization might put this image into an overlapping
368 * position with the ELF binary image. (since size < total_size)
369 * So we first map the 'big' image - and unmap the remainder at
370 * the end. (which unmap is needed for ELF images with holes.)
373 total_size = ELF_PAGEALIGN(total_size);
374 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
375 if (!BAD_ADDR(map_addr))
376 vm_munmap(map_addr+size, total_size-size);
378 map_addr = vm_mmap(filep, addr, size, prot, type, off);
383 #endif /* !elf_map */
385 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
387 int i, first_idx = -1, last_idx = -1;
389 for (i = 0; i < nr; i++) {
390 if (cmds[i].p_type == PT_LOAD) {
399 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
400 ELF_PAGESTART(cmds[first_idx].p_vaddr);
404 /* This is much more generalized than the library routine read function,
405 so we keep this separate. Technically the library read function
406 is only provided so that we can read a.out libraries that have
409 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
410 struct file *interpreter, unsigned long *interp_map_addr,
411 unsigned long no_base)
413 struct elf_phdr *elf_phdata;
414 struct elf_phdr *eppnt;
415 unsigned long load_addr = 0;
416 int load_addr_set = 0;
417 unsigned long last_bss = 0, elf_bss = 0;
418 unsigned long error = ~0UL;
419 unsigned long total_size;
422 /* First of all, some simple consistency checks */
423 if (interp_elf_ex->e_type != ET_EXEC &&
424 interp_elf_ex->e_type != ET_DYN)
426 if (!elf_check_arch(interp_elf_ex))
428 if (!interpreter->f_op || !interpreter->f_op->mmap)
432 * If the size of this structure has changed, then punt, since
433 * we will be doing the wrong thing.
435 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
437 if (interp_elf_ex->e_phnum < 1 ||
438 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
441 /* Now read in all of the header information */
442 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
443 if (size > ELF_MIN_ALIGN)
445 elf_phdata = kmalloc(size, GFP_KERNEL);
449 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
450 (char *)elf_phdata, size);
452 if (retval != size) {
458 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
465 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
466 if (eppnt->p_type == PT_LOAD) {
467 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
469 unsigned long vaddr = 0;
470 unsigned long k, map_addr;
472 if (eppnt->p_flags & PF_R)
473 elf_prot = PROT_READ;
474 if (eppnt->p_flags & PF_W)
475 elf_prot |= PROT_WRITE;
476 if (eppnt->p_flags & PF_X)
477 elf_prot |= PROT_EXEC;
478 vaddr = eppnt->p_vaddr;
479 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
480 elf_type |= MAP_FIXED;
481 else if (no_base && interp_elf_ex->e_type == ET_DYN)
484 map_addr = elf_map(interpreter, load_addr + vaddr,
485 eppnt, elf_prot, elf_type, total_size);
487 if (!*interp_map_addr)
488 *interp_map_addr = map_addr;
490 if (BAD_ADDR(map_addr))
493 if (!load_addr_set &&
494 interp_elf_ex->e_type == ET_DYN) {
495 load_addr = map_addr - ELF_PAGESTART(vaddr);
500 * Check to see if the section's size will overflow the
501 * allowed task size. Note that p_filesz must always be
502 * <= p_memsize so it's only necessary to check p_memsz.
504 k = load_addr + eppnt->p_vaddr;
506 eppnt->p_filesz > eppnt->p_memsz ||
507 eppnt->p_memsz > TASK_SIZE ||
508 TASK_SIZE - eppnt->p_memsz < k) {
514 * Find the end of the file mapping for this phdr, and
515 * keep track of the largest address we see for this.
517 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
522 * Do the same thing for the memory mapping - between
523 * elf_bss and last_bss is the bss section.
525 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
531 if (last_bss > elf_bss) {
533 * Now fill out the bss section. First pad the last page up
534 * to the page boundary, and then perform a mmap to make sure
535 * that there are zero-mapped pages up to and including the
538 if (padzero(elf_bss)) {
543 /* What we have mapped so far */
544 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
546 /* Map the last of the bss segment */
547 error = vm_brk(elf_bss, last_bss - elf_bss);
561 * These are the functions used to load ELF style executables and shared
562 * libraries. There is no binary dependent code anywhere else.
565 #define INTERPRETER_NONE 0
566 #define INTERPRETER_ELF 2
568 #ifndef STACK_RND_MASK
569 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
572 static unsigned long randomize_stack_top(unsigned long stack_top)
574 unsigned int random_variable = 0;
576 if ((current->flags & PF_RANDOMIZE) &&
577 !(current->personality & ADDR_NO_RANDOMIZE)) {
578 random_variable = get_random_int() & STACK_RND_MASK;
579 random_variable <<= PAGE_SHIFT;
581 #ifdef CONFIG_STACK_GROWSUP
582 return PAGE_ALIGN(stack_top) + random_variable;
584 return PAGE_ALIGN(stack_top) - random_variable;
588 static int load_elf_binary(struct linux_binprm *bprm)
590 struct file *interpreter = NULL; /* to shut gcc up */
591 unsigned long load_addr = 0, load_bias = 0;
592 int load_addr_set = 0;
593 char * elf_interpreter = NULL;
595 struct elf_phdr *elf_ppnt, *elf_phdata;
596 unsigned long elf_bss, elf_brk;
599 unsigned long elf_entry;
600 unsigned long interp_load_addr = 0;
601 unsigned long start_code, end_code, start_data, end_data;
602 unsigned long reloc_func_desc __maybe_unused = 0;
603 int executable_stack = EXSTACK_DEFAULT;
604 unsigned long def_flags = 0;
605 struct pt_regs *regs = current_pt_regs();
607 struct elfhdr elf_ex;
608 struct elfhdr interp_elf_ex;
611 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
617 /* Get the exec-header */
618 loc->elf_ex = *((struct elfhdr *)bprm->buf);
621 /* First of all, some simple consistency checks */
622 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
625 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
627 if (!elf_check_arch(&loc->elf_ex))
629 if (!bprm->file->f_op || !bprm->file->f_op->mmap)
632 /* Now read in all of the header information */
633 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
635 if (loc->elf_ex.e_phnum < 1 ||
636 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
638 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
640 elf_phdata = kmalloc(size, GFP_KERNEL);
644 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
645 (char *)elf_phdata, size);
646 if (retval != size) {
652 elf_ppnt = elf_phdata;
661 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
662 if (elf_ppnt->p_type == PT_INTERP) {
663 /* This is the program interpreter used for
664 * shared libraries - for now assume that this
665 * is an a.out format binary
668 if (elf_ppnt->p_filesz > PATH_MAX ||
669 elf_ppnt->p_filesz < 2)
673 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
675 if (!elf_interpreter)
678 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
681 if (retval != elf_ppnt->p_filesz) {
684 goto out_free_interp;
686 /* make sure path is NULL terminated */
688 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
689 goto out_free_interp;
691 interpreter = open_exec(elf_interpreter);
692 retval = PTR_ERR(interpreter);
693 if (IS_ERR(interpreter))
694 goto out_free_interp;
697 * If the binary is not readable then enforce
698 * mm->dumpable = 0 regardless of the interpreter's
701 would_dump(bprm, interpreter);
703 retval = kernel_read(interpreter, 0, bprm->buf,
705 if (retval != BINPRM_BUF_SIZE) {
708 goto out_free_dentry;
711 /* Get the exec headers */
712 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
718 elf_ppnt = elf_phdata;
719 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
720 if (elf_ppnt->p_type == PT_GNU_STACK) {
721 if (elf_ppnt->p_flags & PF_X)
722 executable_stack = EXSTACK_ENABLE_X;
724 executable_stack = EXSTACK_DISABLE_X;
728 /* Some simple consistency checks for the interpreter */
729 if (elf_interpreter) {
731 /* Not an ELF interpreter */
732 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
733 goto out_free_dentry;
734 /* Verify the interpreter has a valid arch */
735 if (!elf_check_arch(&loc->interp_elf_ex))
736 goto out_free_dentry;
739 /* Flush all traces of the currently running executable */
740 retval = flush_old_exec(bprm);
742 goto out_free_dentry;
744 /* OK, This is the point of no return */
745 current->mm->def_flags = def_flags;
747 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
748 may depend on the personality. */
749 SET_PERSONALITY(loc->elf_ex);
750 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
751 current->personality |= READ_IMPLIES_EXEC;
753 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754 current->flags |= PF_RANDOMIZE;
756 setup_new_exec(bprm);
758 /* Do this so that we can load the interpreter, if need be. We will
759 change some of these later */
760 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
763 send_sig(SIGKILL, current, 0);
764 goto out_free_dentry;
767 current->mm->start_stack = bprm->p;
769 /* Now we do a little grungy work by mmapping the ELF image into
770 the correct location in memory. */
771 for(i = 0, elf_ppnt = elf_phdata;
772 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
773 int elf_prot = 0, elf_flags;
774 unsigned long k, vaddr;
776 if (elf_ppnt->p_type != PT_LOAD)
779 if (unlikely (elf_brk > elf_bss)) {
782 /* There was a PT_LOAD segment with p_memsz > p_filesz
783 before this one. Map anonymous pages, if needed,
784 and clear the area. */
785 retval = set_brk(elf_bss + load_bias,
786 elf_brk + load_bias);
788 send_sig(SIGKILL, current, 0);
789 goto out_free_dentry;
791 nbyte = ELF_PAGEOFFSET(elf_bss);
793 nbyte = ELF_MIN_ALIGN - nbyte;
794 if (nbyte > elf_brk - elf_bss)
795 nbyte = elf_brk - elf_bss;
796 if (clear_user((void __user *)elf_bss +
799 * This bss-zeroing can fail if the ELF
800 * file specifies odd protections. So
801 * we don't check the return value
807 if (elf_ppnt->p_flags & PF_R)
808 elf_prot |= PROT_READ;
809 if (elf_ppnt->p_flags & PF_W)
810 elf_prot |= PROT_WRITE;
811 if (elf_ppnt->p_flags & PF_X)
812 elf_prot |= PROT_EXEC;
814 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
816 vaddr = elf_ppnt->p_vaddr;
817 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
818 elf_flags |= MAP_FIXED;
819 } else if (loc->elf_ex.e_type == ET_DYN) {
820 /* Try and get dynamic programs out of the way of the
821 * default mmap base, as well as whatever program they
822 * might try to exec. This is because the brk will
823 * follow the loader, and is not movable. */
824 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
825 /* Memory randomization might have been switched off
826 * in runtime via sysctl or explicit setting of
828 * If that is the case, retain the original non-zero
829 * load_bias value in order to establish proper
830 * non-randomized mappings.
832 if (current->flags & PF_RANDOMIZE)
835 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
837 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
841 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
842 elf_prot, elf_flags, 0);
843 if (BAD_ADDR(error)) {
844 send_sig(SIGKILL, current, 0);
845 retval = IS_ERR((void *)error) ?
846 PTR_ERR((void*)error) : -EINVAL;
847 goto out_free_dentry;
850 if (!load_addr_set) {
852 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
853 if (loc->elf_ex.e_type == ET_DYN) {
855 ELF_PAGESTART(load_bias + vaddr);
856 load_addr += load_bias;
857 reloc_func_desc = load_bias;
860 k = elf_ppnt->p_vaddr;
867 * Check to see if the section's size will overflow the
868 * allowed task size. Note that p_filesz must always be
869 * <= p_memsz so it is only necessary to check p_memsz.
871 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
872 elf_ppnt->p_memsz > TASK_SIZE ||
873 TASK_SIZE - elf_ppnt->p_memsz < k) {
874 /* set_brk can never work. Avoid overflows. */
875 send_sig(SIGKILL, current, 0);
877 goto out_free_dentry;
880 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
884 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
888 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
893 loc->elf_ex.e_entry += load_bias;
894 elf_bss += load_bias;
895 elf_brk += load_bias;
896 start_code += load_bias;
897 end_code += load_bias;
898 start_data += load_bias;
899 end_data += load_bias;
901 /* Calling set_brk effectively mmaps the pages that we need
902 * for the bss and break sections. We must do this before
903 * mapping in the interpreter, to make sure it doesn't wind
904 * up getting placed where the bss needs to go.
906 retval = set_brk(elf_bss, elf_brk);
908 send_sig(SIGKILL, current, 0);
909 goto out_free_dentry;
911 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
912 send_sig(SIGSEGV, current, 0);
913 retval = -EFAULT; /* Nobody gets to see this, but.. */
914 goto out_free_dentry;
917 if (elf_interpreter) {
918 unsigned long interp_map_addr = 0;
920 elf_entry = load_elf_interp(&loc->interp_elf_ex,
924 if (!IS_ERR((void *)elf_entry)) {
926 * load_elf_interp() returns relocation
929 interp_load_addr = elf_entry;
930 elf_entry += loc->interp_elf_ex.e_entry;
932 if (BAD_ADDR(elf_entry)) {
933 force_sig(SIGSEGV, current);
934 retval = IS_ERR((void *)elf_entry) ?
935 (int)elf_entry : -EINVAL;
936 goto out_free_dentry;
938 reloc_func_desc = interp_load_addr;
940 allow_write_access(interpreter);
942 kfree(elf_interpreter);
944 elf_entry = loc->elf_ex.e_entry;
945 if (BAD_ADDR(elf_entry)) {
946 force_sig(SIGSEGV, current);
948 goto out_free_dentry;
954 set_binfmt(&elf_format);
956 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
957 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
959 send_sig(SIGKILL, current, 0);
962 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
964 install_exec_creds(bprm);
965 retval = create_elf_tables(bprm, &loc->elf_ex,
966 load_addr, interp_load_addr);
968 send_sig(SIGKILL, current, 0);
971 /* N.B. passed_fileno might not be initialized? */
972 current->mm->end_code = end_code;
973 current->mm->start_code = start_code;
974 current->mm->start_data = start_data;
975 current->mm->end_data = end_data;
976 current->mm->start_stack = bprm->p;
978 #ifdef arch_randomize_brk
979 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
980 current->mm->brk = current->mm->start_brk =
981 arch_randomize_brk(current->mm);
982 #ifdef CONFIG_COMPAT_BRK
983 current->brk_randomized = 1;
988 if (current->personality & MMAP_PAGE_ZERO) {
989 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
990 and some applications "depend" upon this behavior.
991 Since we do not have the power to recompile these, we
992 emulate the SVr4 behavior. Sigh. */
993 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
994 MAP_FIXED | MAP_PRIVATE, 0);
999 * The ABI may specify that certain registers be set up in special
1000 * ways (on i386 %edx is the address of a DT_FINI function, for
1001 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1002 * that the e_entry field is the address of the function descriptor
1003 * for the startup routine, rather than the address of the startup
1004 * routine itself. This macro performs whatever initialization to
1005 * the regs structure is required as well as any relocations to the
1006 * function descriptor entries when executing dynamically links apps.
1008 ELF_PLAT_INIT(regs, reloc_func_desc);
1011 start_thread(regs, elf_entry, bprm->p);
1020 allow_write_access(interpreter);
1024 kfree(elf_interpreter);
1030 /* This is really simpleminded and specialized - we are loading an
1031 a.out library that is given an ELF header. */
1032 static int load_elf_library(struct file *file)
1034 struct elf_phdr *elf_phdata;
1035 struct elf_phdr *eppnt;
1036 unsigned long elf_bss, bss, len;
1037 int retval, error, i, j;
1038 struct elfhdr elf_ex;
1041 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1042 if (retval != sizeof(elf_ex))
1045 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1048 /* First of all, some simple consistency checks */
1049 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1050 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1053 /* Now read in all of the header information */
1055 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1056 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1059 elf_phdata = kmalloc(j, GFP_KERNEL);
1065 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1069 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1070 if ((eppnt + i)->p_type == PT_LOAD)
1075 while (eppnt->p_type != PT_LOAD)
1078 /* Now use mmap to map the library into memory. */
1079 error = vm_mmap(file,
1080 ELF_PAGESTART(eppnt->p_vaddr),
1082 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1083 PROT_READ | PROT_WRITE | PROT_EXEC,
1084 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1086 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1087 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1090 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1091 if (padzero(elf_bss)) {
1096 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1098 bss = eppnt->p_memsz + eppnt->p_vaddr;
1100 vm_brk(len, bss - len);
1109 #ifdef CONFIG_ELF_CORE
1113 * Modelled on fs/exec.c:aout_core_dump()
1114 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1118 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1119 * that are useful for post-mortem analysis are included in every core dump.
1120 * In that way we ensure that the core dump is fully interpretable later
1121 * without matching up the same kernel and hardware config to see what PC values
1122 * meant. These special mappings include - vDSO, vsyscall, and other
1123 * architecture specific mappings
1125 static bool always_dump_vma(struct vm_area_struct *vma)
1127 /* Any vsyscall mappings? */
1128 if (vma == get_gate_vma(vma->vm_mm))
1131 * arch_vma_name() returns non-NULL for special architecture mappings,
1132 * such as vDSO sections.
1134 if (arch_vma_name(vma))
1141 * Decide what to dump of a segment, part, all or none.
1143 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1144 unsigned long mm_flags)
1146 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1148 /* always dump the vdso and vsyscall sections */
1149 if (always_dump_vma(vma))
1152 if (vma->vm_flags & VM_DONTDUMP)
1155 /* Hugetlb memory check */
1156 if (vma->vm_flags & VM_HUGETLB) {
1157 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1159 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1164 /* Do not dump I/O mapped devices or special mappings */
1165 if (vma->vm_flags & VM_IO)
1168 /* By default, dump shared memory if mapped from an anonymous file. */
1169 if (vma->vm_flags & VM_SHARED) {
1170 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1171 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1176 /* Dump segments that have been written to. */
1177 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1179 if (vma->vm_file == NULL)
1182 if (FILTER(MAPPED_PRIVATE))
1186 * If this looks like the beginning of a DSO or executable mapping,
1187 * check for an ELF header. If we find one, dump the first page to
1188 * aid in determining what was mapped here.
1190 if (FILTER(ELF_HEADERS) &&
1191 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1192 u32 __user *header = (u32 __user *) vma->vm_start;
1194 mm_segment_t fs = get_fs();
1196 * Doing it this way gets the constant folded by GCC.
1200 char elfmag[SELFMAG];
1202 BUILD_BUG_ON(SELFMAG != sizeof word);
1203 magic.elfmag[EI_MAG0] = ELFMAG0;
1204 magic.elfmag[EI_MAG1] = ELFMAG1;
1205 magic.elfmag[EI_MAG2] = ELFMAG2;
1206 magic.elfmag[EI_MAG3] = ELFMAG3;
1208 * Switch to the user "segment" for get_user(),
1209 * then put back what elf_core_dump() had in place.
1212 if (unlikely(get_user(word, header)))
1215 if (word == magic.cmp)
1224 return vma->vm_end - vma->vm_start;
1227 /* An ELF note in memory */
1232 unsigned int datasz;
1236 static int notesize(struct memelfnote *en)
1240 sz = sizeof(struct elf_note);
1241 sz += roundup(strlen(en->name) + 1, 4);
1242 sz += roundup(en->datasz, 4);
1247 static int alignfile(struct coredump_params *cprm)
1249 static const char buf[4] = { 0, };
1250 return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
1253 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1256 en.n_namesz = strlen(men->name) + 1;
1257 en.n_descsz = men->datasz;
1258 en.n_type = men->type;
1260 if (!dump_emit(cprm, &en, sizeof(en)))
1262 if (!dump_emit(cprm, men->name, en.n_namesz))
1264 if (!alignfile(cprm))
1266 if (!dump_emit(cprm, men->data, men->datasz))
1268 if (!alignfile(cprm))
1274 static void fill_elf_header(struct elfhdr *elf, int segs,
1275 u16 machine, u32 flags)
1277 memset(elf, 0, sizeof(*elf));
1279 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1280 elf->e_ident[EI_CLASS] = ELF_CLASS;
1281 elf->e_ident[EI_DATA] = ELF_DATA;
1282 elf->e_ident[EI_VERSION] = EV_CURRENT;
1283 elf->e_ident[EI_OSABI] = ELF_OSABI;
1285 elf->e_type = ET_CORE;
1286 elf->e_machine = machine;
1287 elf->e_version = EV_CURRENT;
1288 elf->e_phoff = sizeof(struct elfhdr);
1289 elf->e_flags = flags;
1290 elf->e_ehsize = sizeof(struct elfhdr);
1291 elf->e_phentsize = sizeof(struct elf_phdr);
1292 elf->e_phnum = segs;
1297 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1299 phdr->p_type = PT_NOTE;
1300 phdr->p_offset = offset;
1303 phdr->p_filesz = sz;
1310 static void fill_note(struct memelfnote *note, const char *name, int type,
1311 unsigned int sz, void *data)
1321 * fill up all the fields in prstatus from the given task struct, except
1322 * registers which need to be filled up separately.
1324 static void fill_prstatus(struct elf_prstatus *prstatus,
1325 struct task_struct *p, long signr)
1327 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1328 prstatus->pr_sigpend = p->pending.signal.sig[0];
1329 prstatus->pr_sighold = p->blocked.sig[0];
1331 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1333 prstatus->pr_pid = task_pid_vnr(p);
1334 prstatus->pr_pgrp = task_pgrp_vnr(p);
1335 prstatus->pr_sid = task_session_vnr(p);
1336 if (thread_group_leader(p)) {
1337 struct task_cputime cputime;
1340 * This is the record for the group leader. It shows the
1341 * group-wide total, not its individual thread total.
1343 thread_group_cputime(p, &cputime);
1344 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1345 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1347 cputime_t utime, stime;
1349 task_cputime(p, &utime, &stime);
1350 cputime_to_timeval(utime, &prstatus->pr_utime);
1351 cputime_to_timeval(stime, &prstatus->pr_stime);
1353 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1354 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1357 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1358 struct mm_struct *mm)
1360 const struct cred *cred;
1361 unsigned int i, len;
1363 /* first copy the parameters from user space */
1364 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1366 len = mm->arg_end - mm->arg_start;
1367 if (len >= ELF_PRARGSZ)
1368 len = ELF_PRARGSZ-1;
1369 if (copy_from_user(&psinfo->pr_psargs,
1370 (const char __user *)mm->arg_start, len))
1372 for(i = 0; i < len; i++)
1373 if (psinfo->pr_psargs[i] == 0)
1374 psinfo->pr_psargs[i] = ' ';
1375 psinfo->pr_psargs[len] = 0;
1378 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1380 psinfo->pr_pid = task_pid_vnr(p);
1381 psinfo->pr_pgrp = task_pgrp_vnr(p);
1382 psinfo->pr_sid = task_session_vnr(p);
1384 i = p->state ? ffz(~p->state) + 1 : 0;
1385 psinfo->pr_state = i;
1386 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1387 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1388 psinfo->pr_nice = task_nice(p);
1389 psinfo->pr_flag = p->flags;
1391 cred = __task_cred(p);
1392 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1393 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1395 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1400 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1402 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1406 while (auxv[i - 2] != AT_NULL);
1407 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1410 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1413 mm_segment_t old_fs = get_fs();
1415 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1417 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1420 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1422 * Format of NT_FILE note:
1424 * long count -- how many files are mapped
1425 * long page_size -- units for file_ofs
1426 * array of [COUNT] elements of
1430 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1432 static void fill_files_note(struct memelfnote *note)
1434 struct vm_area_struct *vma;
1435 unsigned count, size, names_ofs, remaining, n;
1437 user_long_t *start_end_ofs;
1438 char *name_base, *name_curpos;
1440 /* *Estimated* file count and total data size needed */
1441 count = current->mm->map_count;
1444 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1446 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1448 size = round_up(size, PAGE_SIZE);
1449 data = vmalloc(size);
1453 start_end_ofs = data + 2;
1454 name_base = name_curpos = ((char *)data) + names_ofs;
1455 remaining = size - names_ofs;
1457 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1459 const char *filename;
1461 file = vma->vm_file;
1464 filename = d_path(&file->f_path, name_curpos, remaining);
1465 if (IS_ERR(filename)) {
1466 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1468 size = size * 5 / 4;
1474 /* d_path() fills at the end, move name down */
1475 /* n = strlen(filename) + 1: */
1476 n = (name_curpos + remaining) - filename;
1477 remaining = filename - name_curpos;
1478 memmove(name_curpos, filename, n);
1481 *start_end_ofs++ = vma->vm_start;
1482 *start_end_ofs++ = vma->vm_end;
1483 *start_end_ofs++ = vma->vm_pgoff;
1487 /* Now we know exact count of files, can store it */
1489 data[1] = PAGE_SIZE;
1491 * Count usually is less than current->mm->map_count,
1492 * we need to move filenames down.
1494 n = current->mm->map_count - count;
1496 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1497 memmove(name_base - shift_bytes, name_base,
1498 name_curpos - name_base);
1499 name_curpos -= shift_bytes;
1502 size = name_curpos - (char *)data;
1503 fill_note(note, "CORE", NT_FILE, size, data);
1507 #ifdef CORE_DUMP_USE_REGSET
1508 #include <linux/regset.h>
1510 struct elf_thread_core_info {
1511 struct elf_thread_core_info *next;
1512 struct task_struct *task;
1513 struct elf_prstatus prstatus;
1514 struct memelfnote notes[0];
1517 struct elf_note_info {
1518 struct elf_thread_core_info *thread;
1519 struct memelfnote psinfo;
1520 struct memelfnote signote;
1521 struct memelfnote auxv;
1522 struct memelfnote files;
1523 user_siginfo_t csigdata;
1529 * When a regset has a writeback hook, we call it on each thread before
1530 * dumping user memory. On register window machines, this makes sure the
1531 * user memory backing the register data is up to date before we read it.
1533 static void do_thread_regset_writeback(struct task_struct *task,
1534 const struct user_regset *regset)
1536 if (regset->writeback)
1537 regset->writeback(task, regset, 1);
1541 #define PR_REG_SIZE(S) sizeof(S)
1544 #ifndef PRSTATUS_SIZE
1545 #define PRSTATUS_SIZE(S) sizeof(S)
1549 #define PR_REG_PTR(S) (&((S)->pr_reg))
1552 #ifndef SET_PR_FPVALID
1553 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1556 static int fill_thread_core_info(struct elf_thread_core_info *t,
1557 const struct user_regset_view *view,
1558 long signr, size_t *total)
1563 * NT_PRSTATUS is the one special case, because the regset data
1564 * goes into the pr_reg field inside the note contents, rather
1565 * than being the whole note contents. We fill the reset in here.
1566 * We assume that regset 0 is NT_PRSTATUS.
1568 fill_prstatus(&t->prstatus, t->task, signr);
1569 (void) view->regsets[0].get(t->task, &view->regsets[0],
1570 0, PR_REG_SIZE(t->prstatus.pr_reg),
1571 PR_REG_PTR(&t->prstatus), NULL);
1573 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1574 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1575 *total += notesize(&t->notes[0]);
1577 do_thread_regset_writeback(t->task, &view->regsets[0]);
1580 * Each other regset might generate a note too. For each regset
1581 * that has no core_note_type or is inactive, we leave t->notes[i]
1582 * all zero and we'll know to skip writing it later.
1584 for (i = 1; i < view->n; ++i) {
1585 const struct user_regset *regset = &view->regsets[i];
1586 do_thread_regset_writeback(t->task, regset);
1587 if (regset->core_note_type && regset->get &&
1588 (!regset->active || regset->active(t->task, regset))) {
1590 size_t size = regset->n * regset->size;
1591 void *data = kmalloc(size, GFP_KERNEL);
1592 if (unlikely(!data))
1594 ret = regset->get(t->task, regset,
1595 0, size, data, NULL);
1599 if (regset->core_note_type != NT_PRFPREG)
1600 fill_note(&t->notes[i], "LINUX",
1601 regset->core_note_type,
1604 SET_PR_FPVALID(&t->prstatus, 1);
1605 fill_note(&t->notes[i], "CORE",
1606 NT_PRFPREG, size, data);
1608 *total += notesize(&t->notes[i]);
1616 static int fill_note_info(struct elfhdr *elf, int phdrs,
1617 struct elf_note_info *info,
1618 siginfo_t *siginfo, struct pt_regs *regs)
1620 struct task_struct *dump_task = current;
1621 const struct user_regset_view *view = task_user_regset_view(dump_task);
1622 struct elf_thread_core_info *t;
1623 struct elf_prpsinfo *psinfo;
1624 struct core_thread *ct;
1628 info->thread = NULL;
1630 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1631 if (psinfo == NULL) {
1632 info->psinfo.data = NULL; /* So we don't free this wrongly */
1636 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1639 * Figure out how many notes we're going to need for each thread.
1641 info->thread_notes = 0;
1642 for (i = 0; i < view->n; ++i)
1643 if (view->regsets[i].core_note_type != 0)
1644 ++info->thread_notes;
1647 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1648 * since it is our one special case.
1650 if (unlikely(info->thread_notes == 0) ||
1651 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1657 * Initialize the ELF file header.
1659 fill_elf_header(elf, phdrs,
1660 view->e_machine, view->e_flags);
1663 * Allocate a structure for each thread.
1665 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1666 t = kzalloc(offsetof(struct elf_thread_core_info,
1667 notes[info->thread_notes]),
1673 if (ct->task == dump_task || !info->thread) {
1674 t->next = info->thread;
1678 * Make sure to keep the original task at
1679 * the head of the list.
1681 t->next = info->thread->next;
1682 info->thread->next = t;
1687 * Now fill in each thread's information.
1689 for (t = info->thread; t != NULL; t = t->next)
1690 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1694 * Fill in the two process-wide notes.
1696 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1697 info->size += notesize(&info->psinfo);
1699 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1700 info->size += notesize(&info->signote);
1702 fill_auxv_note(&info->auxv, current->mm);
1703 info->size += notesize(&info->auxv);
1705 fill_files_note(&info->files);
1706 info->size += notesize(&info->files);
1711 static size_t get_note_info_size(struct elf_note_info *info)
1717 * Write all the notes for each thread. When writing the first thread, the
1718 * process-wide notes are interleaved after the first thread-specific note.
1720 static int write_note_info(struct elf_note_info *info,
1721 struct coredump_params *cprm)
1724 struct elf_thread_core_info *t = info->thread;
1729 if (!writenote(&t->notes[0], cprm))
1732 if (first && !writenote(&info->psinfo, cprm))
1734 if (first && !writenote(&info->signote, cprm))
1736 if (first && !writenote(&info->auxv, cprm))
1738 if (first && !writenote(&info->files, cprm))
1741 for (i = 1; i < info->thread_notes; ++i)
1742 if (t->notes[i].data &&
1743 !writenote(&t->notes[i], cprm))
1753 static void free_note_info(struct elf_note_info *info)
1755 struct elf_thread_core_info *threads = info->thread;
1758 struct elf_thread_core_info *t = threads;
1760 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1761 for (i = 1; i < info->thread_notes; ++i)
1762 kfree(t->notes[i].data);
1765 kfree(info->psinfo.data);
1766 vfree(info->files.data);
1771 /* Here is the structure in which status of each thread is captured. */
1772 struct elf_thread_status
1774 struct list_head list;
1775 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1776 elf_fpregset_t fpu; /* NT_PRFPREG */
1777 struct task_struct *thread;
1778 #ifdef ELF_CORE_COPY_XFPREGS
1779 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1781 struct memelfnote notes[3];
1786 * In order to add the specific thread information for the elf file format,
1787 * we need to keep a linked list of every threads pr_status and then create
1788 * a single section for them in the final core file.
1790 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1793 struct task_struct *p = t->thread;
1796 fill_prstatus(&t->prstatus, p, signr);
1797 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1799 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1802 sz += notesize(&t->notes[0]);
1804 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1806 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1809 sz += notesize(&t->notes[1]);
1812 #ifdef ELF_CORE_COPY_XFPREGS
1813 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1814 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1815 sizeof(t->xfpu), &t->xfpu);
1817 sz += notesize(&t->notes[2]);
1823 struct elf_note_info {
1824 struct memelfnote *notes;
1825 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1826 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1827 struct list_head thread_list;
1828 elf_fpregset_t *fpu;
1829 #ifdef ELF_CORE_COPY_XFPREGS
1830 elf_fpxregset_t *xfpu;
1832 user_siginfo_t csigdata;
1833 int thread_status_size;
1837 static int elf_note_info_init(struct elf_note_info *info)
1839 memset(info, 0, sizeof(*info));
1840 INIT_LIST_HEAD(&info->thread_list);
1842 /* Allocate space for ELF notes */
1843 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1846 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1849 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1850 if (!info->prstatus)
1852 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1855 #ifdef ELF_CORE_COPY_XFPREGS
1856 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1863 static int fill_note_info(struct elfhdr *elf, int phdrs,
1864 struct elf_note_info *info,
1865 siginfo_t *siginfo, struct pt_regs *regs)
1867 struct list_head *t;
1869 if (!elf_note_info_init(info))
1872 if (siginfo->si_signo) {
1873 struct core_thread *ct;
1874 struct elf_thread_status *ets;
1876 for (ct = current->mm->core_state->dumper.next;
1877 ct; ct = ct->next) {
1878 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1882 ets->thread = ct->task;
1883 list_add(&ets->list, &info->thread_list);
1886 list_for_each(t, &info->thread_list) {
1889 ets = list_entry(t, struct elf_thread_status, list);
1890 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1891 info->thread_status_size += sz;
1894 /* now collect the dump for the current */
1895 memset(info->prstatus, 0, sizeof(*info->prstatus));
1896 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1897 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1900 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1903 * Set up the notes in similar form to SVR4 core dumps made
1904 * with info from their /proc.
1907 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1908 sizeof(*info->prstatus), info->prstatus);
1909 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1910 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1911 sizeof(*info->psinfo), info->psinfo);
1913 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1914 fill_auxv_note(info->notes + 3, current->mm);
1915 fill_files_note(info->notes + 4);
1919 /* Try to dump the FPU. */
1920 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1922 if (info->prstatus->pr_fpvalid)
1923 fill_note(info->notes + info->numnote++,
1924 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1925 #ifdef ELF_CORE_COPY_XFPREGS
1926 if (elf_core_copy_task_xfpregs(current, info->xfpu))
1927 fill_note(info->notes + info->numnote++,
1928 "LINUX", ELF_CORE_XFPREG_TYPE,
1929 sizeof(*info->xfpu), info->xfpu);
1935 static size_t get_note_info_size(struct elf_note_info *info)
1940 for (i = 0; i < info->numnote; i++)
1941 sz += notesize(info->notes + i);
1943 sz += info->thread_status_size;
1948 static int write_note_info(struct elf_note_info *info,
1949 struct coredump_params *cprm)
1952 struct list_head *t;
1954 for (i = 0; i < info->numnote; i++)
1955 if (!writenote(info->notes + i, cprm))
1958 /* write out the thread status notes section */
1959 list_for_each(t, &info->thread_list) {
1960 struct elf_thread_status *tmp =
1961 list_entry(t, struct elf_thread_status, list);
1963 for (i = 0; i < tmp->num_notes; i++)
1964 if (!writenote(&tmp->notes[i], cprm))
1971 static void free_note_info(struct elf_note_info *info)
1973 while (!list_empty(&info->thread_list)) {
1974 struct list_head *tmp = info->thread_list.next;
1976 kfree(list_entry(tmp, struct elf_thread_status, list));
1979 /* Free data allocated by fill_files_note(): */
1980 vfree(info->notes[4].data);
1982 kfree(info->prstatus);
1983 kfree(info->psinfo);
1986 #ifdef ELF_CORE_COPY_XFPREGS
1993 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1994 struct vm_area_struct *gate_vma)
1996 struct vm_area_struct *ret = tsk->mm->mmap;
2003 * Helper function for iterating across a vma list. It ensures that the caller
2004 * will visit `gate_vma' prior to terminating the search.
2006 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2007 struct vm_area_struct *gate_vma)
2009 struct vm_area_struct *ret;
2011 ret = this_vma->vm_next;
2014 if (this_vma == gate_vma)
2019 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2020 elf_addr_t e_shoff, int segs)
2022 elf->e_shoff = e_shoff;
2023 elf->e_shentsize = sizeof(*shdr4extnum);
2025 elf->e_shstrndx = SHN_UNDEF;
2027 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2029 shdr4extnum->sh_type = SHT_NULL;
2030 shdr4extnum->sh_size = elf->e_shnum;
2031 shdr4extnum->sh_link = elf->e_shstrndx;
2032 shdr4extnum->sh_info = segs;
2035 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2036 unsigned long mm_flags)
2038 struct vm_area_struct *vma;
2041 for (vma = first_vma(current, gate_vma); vma != NULL;
2042 vma = next_vma(vma, gate_vma))
2043 size += vma_dump_size(vma, mm_flags);
2050 * This is a two-pass process; first we find the offsets of the bits,
2051 * and then they are actually written out. If we run out of core limit
2054 static int elf_core_dump(struct coredump_params *cprm)
2059 struct vm_area_struct *vma, *gate_vma;
2060 struct elfhdr *elf = NULL;
2061 loff_t offset = 0, dataoff;
2062 struct elf_note_info info;
2063 struct elf_phdr *phdr4note = NULL;
2064 struct elf_shdr *shdr4extnum = NULL;
2069 * We no longer stop all VM operations.
2071 * This is because those proceses that could possibly change map_count
2072 * or the mmap / vma pages are now blocked in do_exit on current
2073 * finishing this core dump.
2075 * Only ptrace can touch these memory addresses, but it doesn't change
2076 * the map_count or the pages allocated. So no possibility of crashing
2077 * exists while dumping the mm->vm_next areas to the core file.
2080 /* alloc memory for large data structures: too large to be on stack */
2081 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2085 * The number of segs are recored into ELF header as 16bit value.
2086 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2088 segs = current->mm->map_count;
2089 segs += elf_core_extra_phdrs();
2091 gate_vma = get_gate_vma(current->mm);
2092 if (gate_vma != NULL)
2095 /* for notes section */
2098 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2099 * this, kernel supports extended numbering. Have a look at
2100 * include/linux/elf.h for further information. */
2101 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2104 * Collect all the non-memory information about the process for the
2105 * notes. This also sets up the file header.
2107 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2115 offset += sizeof(*elf); /* Elf header */
2116 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2118 /* Write notes phdr entry */
2120 size_t sz = get_note_info_size(&info);
2122 sz += elf_coredump_extra_notes_size();
2124 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2128 fill_elf_note_phdr(phdr4note, sz, offset);
2132 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2134 offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2135 offset += elf_core_extra_data_size();
2138 if (e_phnum == PN_XNUM) {
2139 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2142 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2147 if (!dump_emit(cprm, elf, sizeof(*elf)))
2150 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2153 /* Write program headers for segments dump */
2154 for (vma = first_vma(current, gate_vma); vma != NULL;
2155 vma = next_vma(vma, gate_vma)) {
2156 struct elf_phdr phdr;
2158 phdr.p_type = PT_LOAD;
2159 phdr.p_offset = offset;
2160 phdr.p_vaddr = vma->vm_start;
2162 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2163 phdr.p_memsz = vma->vm_end - vma->vm_start;
2164 offset += phdr.p_filesz;
2165 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2166 if (vma->vm_flags & VM_WRITE)
2167 phdr.p_flags |= PF_W;
2168 if (vma->vm_flags & VM_EXEC)
2169 phdr.p_flags |= PF_X;
2170 phdr.p_align = ELF_EXEC_PAGESIZE;
2172 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2176 if (!elf_core_write_extra_phdrs(cprm, offset))
2179 /* write out the notes section */
2180 if (!write_note_info(&info, cprm))
2183 if (elf_coredump_extra_notes_write(cprm))
2187 if (!dump_align(cprm, ELF_EXEC_PAGESIZE))
2190 for (vma = first_vma(current, gate_vma); vma != NULL;
2191 vma = next_vma(vma, gate_vma)) {
2195 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2197 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2201 page = get_dump_page(addr);
2203 void *kaddr = kmap(page);
2204 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2206 page_cache_release(page);
2208 stop = !dump_skip(cprm, PAGE_SIZE);
2214 if (!elf_core_write_extra_data(cprm))
2217 if (e_phnum == PN_XNUM)
2218 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2225 free_note_info(&info);
2233 #endif /* CONFIG_ELF_CORE */
2235 static int __init init_elf_binfmt(void)
2237 register_binfmt(&elf_format);
2241 static void __exit exit_elf_binfmt(void)
2243 /* Remove the COFF and ELF loaders. */
2244 unregister_binfmt(&elf_format);
2247 core_initcall(init_elf_binfmt);
2248 module_exit(exit_elf_binfmt);
2249 MODULE_LICENSE("GPL");