]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/binfmt_elf.c
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
[karo-tx-linux.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
244         if (k_platform) {
245                 NEW_AUX_ENT(AT_PLATFORM,
246                             (elf_addr_t)(unsigned long)u_platform);
247         }
248         if (k_base_platform) {
249                 NEW_AUX_ENT(AT_BASE_PLATFORM,
250                             (elf_addr_t)(unsigned long)u_base_platform);
251         }
252         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
253                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
254         }
255 #undef NEW_AUX_ENT
256         /* AT_NULL is zero; clear the rest too */
257         memset(&elf_info[ei_index], 0,
258                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
259
260         /* And advance past the AT_NULL entry.  */
261         ei_index += 2;
262
263         sp = STACK_ADD(p, ei_index);
264
265         items = (argc + 1) + (envc + 1) + 1;
266         bprm->p = STACK_ROUND(sp, items);
267
268         /* Point sp at the lowest address on the stack */
269 #ifdef CONFIG_STACK_GROWSUP
270         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
271         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
272 #else
273         sp = (elf_addr_t __user *)bprm->p;
274 #endif
275
276
277         /*
278          * Grow the stack manually; some architectures have a limit on how
279          * far ahead a user-space access may be in order to grow the stack.
280          */
281         vma = find_extend_vma(current->mm, bprm->p);
282         if (!vma)
283                 return -EFAULT;
284
285         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
286         if (__put_user(argc, sp++))
287                 return -EFAULT;
288         argv = sp;
289         envp = argv + argc + 1;
290
291         /* Populate argv and envp */
292         p = current->mm->arg_end = current->mm->arg_start;
293         while (argc-- > 0) {
294                 size_t len;
295                 if (__put_user((elf_addr_t)p, argv++))
296                         return -EFAULT;
297                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298                 if (!len || len > MAX_ARG_STRLEN)
299                         return -EINVAL;
300                 p += len;
301         }
302         if (__put_user(0, argv))
303                 return -EFAULT;
304         current->mm->arg_end = current->mm->env_start = p;
305         while (envc-- > 0) {
306                 size_t len;
307                 if (__put_user((elf_addr_t)p, envp++))
308                         return -EFAULT;
309                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
310                 if (!len || len > MAX_ARG_STRLEN)
311                         return -EINVAL;
312                 p += len;
313         }
314         if (__put_user(0, envp))
315                 return -EFAULT;
316         current->mm->env_end = p;
317
318         /* Put the elf_info on the stack in the right place.  */
319         sp = (elf_addr_t __user *)envp + 1;
320         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
321                 return -EFAULT;
322         return 0;
323 }
324
325 static unsigned long elf_map(struct file *filep, unsigned long addr,
326                 struct elf_phdr *eppnt, int prot, int type,
327                 unsigned long total_size)
328 {
329         unsigned long map_addr;
330         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
331         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
332         addr = ELF_PAGESTART(addr);
333         size = ELF_PAGEALIGN(size);
334
335         /* mmap() will return -EINVAL if given a zero size, but a
336          * segment with zero filesize is perfectly valid */
337         if (!size)
338                 return addr;
339
340         /*
341         * total_size is the size of the ELF (interpreter) image.
342         * The _first_ mmap needs to know the full size, otherwise
343         * randomization might put this image into an overlapping
344         * position with the ELF binary image. (since size < total_size)
345         * So we first map the 'big' image - and unmap the remainder at
346         * the end. (which unmap is needed for ELF images with holes.)
347         */
348         if (total_size) {
349                 total_size = ELF_PAGEALIGN(total_size);
350                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
351                 if (!BAD_ADDR(map_addr))
352                         vm_munmap(map_addr+size, total_size-size);
353         } else
354                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
355
356         return(map_addr);
357 }
358
359 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
360 {
361         int i, first_idx = -1, last_idx = -1;
362
363         for (i = 0; i < nr; i++) {
364                 if (cmds[i].p_type == PT_LOAD) {
365                         last_idx = i;
366                         if (first_idx == -1)
367                                 first_idx = i;
368                 }
369         }
370         if (first_idx == -1)
371                 return 0;
372
373         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
374                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
375 }
376
377
378 /* This is much more generalized than the library routine read function,
379    so we keep this separate.  Technically the library read function
380    is only provided so that we can read a.out libraries that have
381    an ELF header */
382
383 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
384                 struct file *interpreter, unsigned long *interp_map_addr,
385                 unsigned long no_base)
386 {
387         struct elf_phdr *elf_phdata;
388         struct elf_phdr *eppnt;
389         unsigned long load_addr = 0;
390         int load_addr_set = 0;
391         unsigned long last_bss = 0, elf_bss = 0;
392         unsigned long error = ~0UL;
393         unsigned long total_size;
394         int retval, i, size;
395
396         /* First of all, some simple consistency checks */
397         if (interp_elf_ex->e_type != ET_EXEC &&
398             interp_elf_ex->e_type != ET_DYN)
399                 goto out;
400         if (!elf_check_arch(interp_elf_ex))
401                 goto out;
402         if (!interpreter->f_op || !interpreter->f_op->mmap)
403                 goto out;
404
405         /*
406          * If the size of this structure has changed, then punt, since
407          * we will be doing the wrong thing.
408          */
409         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
410                 goto out;
411         if (interp_elf_ex->e_phnum < 1 ||
412                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
413                 goto out;
414
415         /* Now read in all of the header information */
416         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
417         if (size > ELF_MIN_ALIGN)
418                 goto out;
419         elf_phdata = kmalloc(size, GFP_KERNEL);
420         if (!elf_phdata)
421                 goto out;
422
423         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
424                              (char *)elf_phdata, size);
425         error = -EIO;
426         if (retval != size) {
427                 if (retval < 0)
428                         error = retval; 
429                 goto out_close;
430         }
431
432         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
433         if (!total_size) {
434                 error = -EINVAL;
435                 goto out_close;
436         }
437
438         eppnt = elf_phdata;
439         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
440                 if (eppnt->p_type == PT_LOAD) {
441                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
442                         int elf_prot = 0;
443                         unsigned long vaddr = 0;
444                         unsigned long k, map_addr;
445
446                         if (eppnt->p_flags & PF_R)
447                                 elf_prot = PROT_READ;
448                         if (eppnt->p_flags & PF_W)
449                                 elf_prot |= PROT_WRITE;
450                         if (eppnt->p_flags & PF_X)
451                                 elf_prot |= PROT_EXEC;
452                         vaddr = eppnt->p_vaddr;
453                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
454                                 elf_type |= MAP_FIXED;
455                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
456                                 load_addr = -vaddr;
457
458                         map_addr = elf_map(interpreter, load_addr + vaddr,
459                                         eppnt, elf_prot, elf_type, total_size);
460                         total_size = 0;
461                         if (!*interp_map_addr)
462                                 *interp_map_addr = map_addr;
463                         error = map_addr;
464                         if (BAD_ADDR(map_addr))
465                                 goto out_close;
466
467                         if (!load_addr_set &&
468                             interp_elf_ex->e_type == ET_DYN) {
469                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
470                                 load_addr_set = 1;
471                         }
472
473                         /*
474                          * Check to see if the section's size will overflow the
475                          * allowed task size. Note that p_filesz must always be
476                          * <= p_memsize so it's only necessary to check p_memsz.
477                          */
478                         k = load_addr + eppnt->p_vaddr;
479                         if (BAD_ADDR(k) ||
480                             eppnt->p_filesz > eppnt->p_memsz ||
481                             eppnt->p_memsz > TASK_SIZE ||
482                             TASK_SIZE - eppnt->p_memsz < k) {
483                                 error = -ENOMEM;
484                                 goto out_close;
485                         }
486
487                         /*
488                          * Find the end of the file mapping for this phdr, and
489                          * keep track of the largest address we see for this.
490                          */
491                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
492                         if (k > elf_bss)
493                                 elf_bss = k;
494
495                         /*
496                          * Do the same thing for the memory mapping - between
497                          * elf_bss and last_bss is the bss section.
498                          */
499                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
500                         if (k > last_bss)
501                                 last_bss = k;
502                 }
503         }
504
505         if (last_bss > elf_bss) {
506                 /*
507                  * Now fill out the bss section.  First pad the last page up
508                  * to the page boundary, and then perform a mmap to make sure
509                  * that there are zero-mapped pages up to and including the
510                  * last bss page.
511                  */
512                 if (padzero(elf_bss)) {
513                         error = -EFAULT;
514                         goto out_close;
515                 }
516
517                 /* What we have mapped so far */
518                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
519
520                 /* Map the last of the bss segment */
521                 error = vm_brk(elf_bss, last_bss - elf_bss);
522                 if (BAD_ADDR(error))
523                         goto out_close;
524         }
525
526         error = load_addr;
527
528 out_close:
529         kfree(elf_phdata);
530 out:
531         return error;
532 }
533
534 /*
535  * These are the functions used to load ELF style executables and shared
536  * libraries.  There is no binary dependent code anywhere else.
537  */
538
539 #define INTERPRETER_NONE 0
540 #define INTERPRETER_ELF 2
541
542 #ifndef STACK_RND_MASK
543 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
544 #endif
545
546 static unsigned long randomize_stack_top(unsigned long stack_top)
547 {
548         unsigned int random_variable = 0;
549
550         if ((current->flags & PF_RANDOMIZE) &&
551                 !(current->personality & ADDR_NO_RANDOMIZE)) {
552                 random_variable = get_random_int() & STACK_RND_MASK;
553                 random_variable <<= PAGE_SHIFT;
554         }
555 #ifdef CONFIG_STACK_GROWSUP
556         return PAGE_ALIGN(stack_top) + random_variable;
557 #else
558         return PAGE_ALIGN(stack_top) - random_variable;
559 #endif
560 }
561
562 static int load_elf_binary(struct linux_binprm *bprm)
563 {
564         struct file *interpreter = NULL; /* to shut gcc up */
565         unsigned long load_addr = 0, load_bias = 0;
566         int load_addr_set = 0;
567         char * elf_interpreter = NULL;
568         unsigned long error;
569         struct elf_phdr *elf_ppnt, *elf_phdata;
570         unsigned long elf_bss, elf_brk;
571         int retval, i;
572         unsigned int size;
573         unsigned long elf_entry;
574         unsigned long interp_load_addr = 0;
575         unsigned long start_code, end_code, start_data, end_data;
576         unsigned long reloc_func_desc __maybe_unused = 0;
577         int executable_stack = EXSTACK_DEFAULT;
578         unsigned long def_flags = 0;
579         struct pt_regs *regs = current_pt_regs();
580         struct {
581                 struct elfhdr elf_ex;
582                 struct elfhdr interp_elf_ex;
583         } *loc;
584
585         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
586         if (!loc) {
587                 retval = -ENOMEM;
588                 goto out_ret;
589         }
590         
591         /* Get the exec-header */
592         loc->elf_ex = *((struct elfhdr *)bprm->buf);
593
594         retval = -ENOEXEC;
595         /* First of all, some simple consistency checks */
596         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
597                 goto out;
598
599         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
600                 goto out;
601         if (!elf_check_arch(&loc->elf_ex))
602                 goto out;
603         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
604                 goto out;
605
606         /* Now read in all of the header information */
607         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
608                 goto out;
609         if (loc->elf_ex.e_phnum < 1 ||
610                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
611                 goto out;
612         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
613         retval = -ENOMEM;
614         elf_phdata = kmalloc(size, GFP_KERNEL);
615         if (!elf_phdata)
616                 goto out;
617
618         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
619                              (char *)elf_phdata, size);
620         if (retval != size) {
621                 if (retval >= 0)
622                         retval = -EIO;
623                 goto out_free_ph;
624         }
625
626         elf_ppnt = elf_phdata;
627         elf_bss = 0;
628         elf_brk = 0;
629
630         start_code = ~0UL;
631         end_code = 0;
632         start_data = 0;
633         end_data = 0;
634
635         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
636                 if (elf_ppnt->p_type == PT_INTERP) {
637                         /* This is the program interpreter used for
638                          * shared libraries - for now assume that this
639                          * is an a.out format binary
640                          */
641                         retval = -ENOEXEC;
642                         if (elf_ppnt->p_filesz > PATH_MAX || 
643                             elf_ppnt->p_filesz < 2)
644                                 goto out_free_ph;
645
646                         retval = -ENOMEM;
647                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
648                                                   GFP_KERNEL);
649                         if (!elf_interpreter)
650                                 goto out_free_ph;
651
652                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
653                                              elf_interpreter,
654                                              elf_ppnt->p_filesz);
655                         if (retval != elf_ppnt->p_filesz) {
656                                 if (retval >= 0)
657                                         retval = -EIO;
658                                 goto out_free_interp;
659                         }
660                         /* make sure path is NULL terminated */
661                         retval = -ENOEXEC;
662                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663                                 goto out_free_interp;
664
665                         interpreter = open_exec(elf_interpreter);
666                         retval = PTR_ERR(interpreter);
667                         if (IS_ERR(interpreter))
668                                 goto out_free_interp;
669
670                         /*
671                          * If the binary is not readable then enforce
672                          * mm->dumpable = 0 regardless of the interpreter's
673                          * permissions.
674                          */
675                         would_dump(bprm, interpreter);
676
677                         retval = kernel_read(interpreter, 0, bprm->buf,
678                                              BINPRM_BUF_SIZE);
679                         if (retval != BINPRM_BUF_SIZE) {
680                                 if (retval >= 0)
681                                         retval = -EIO;
682                                 goto out_free_dentry;
683                         }
684
685                         /* Get the exec headers */
686                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
687                         break;
688                 }
689                 elf_ppnt++;
690         }
691
692         elf_ppnt = elf_phdata;
693         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
694                 if (elf_ppnt->p_type == PT_GNU_STACK) {
695                         if (elf_ppnt->p_flags & PF_X)
696                                 executable_stack = EXSTACK_ENABLE_X;
697                         else
698                                 executable_stack = EXSTACK_DISABLE_X;
699                         break;
700                 }
701
702         /* Some simple consistency checks for the interpreter */
703         if (elf_interpreter) {
704                 retval = -ELIBBAD;
705                 /* Not an ELF interpreter */
706                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
707                         goto out_free_dentry;
708                 /* Verify the interpreter has a valid arch */
709                 if (!elf_check_arch(&loc->interp_elf_ex))
710                         goto out_free_dentry;
711         }
712
713         /* Flush all traces of the currently running executable */
714         retval = flush_old_exec(bprm);
715         if (retval)
716                 goto out_free_dentry;
717
718         /* OK, This is the point of no return */
719         current->mm->def_flags = def_flags;
720
721         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
722            may depend on the personality.  */
723         SET_PERSONALITY(loc->elf_ex);
724         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
725                 current->personality |= READ_IMPLIES_EXEC;
726
727         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
728                 current->flags |= PF_RANDOMIZE;
729
730         setup_new_exec(bprm);
731
732         /* Do this so that we can load the interpreter, if need be.  We will
733            change some of these later */
734         current->mm->free_area_cache = current->mm->mmap_base;
735         current->mm->cached_hole_size = 0;
736         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
737                                  executable_stack);
738         if (retval < 0) {
739                 send_sig(SIGKILL, current, 0);
740                 goto out_free_dentry;
741         }
742         
743         current->mm->start_stack = bprm->p;
744
745         /* Now we do a little grungy work by mmapping the ELF image into
746            the correct location in memory. */
747         for(i = 0, elf_ppnt = elf_phdata;
748             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
749                 int elf_prot = 0, elf_flags;
750                 unsigned long k, vaddr;
751
752                 if (elf_ppnt->p_type != PT_LOAD)
753                         continue;
754
755                 if (unlikely (elf_brk > elf_bss)) {
756                         unsigned long nbyte;
757                     
758                         /* There was a PT_LOAD segment with p_memsz > p_filesz
759                            before this one. Map anonymous pages, if needed,
760                            and clear the area.  */
761                         retval = set_brk(elf_bss + load_bias,
762                                          elf_brk + load_bias);
763                         if (retval) {
764                                 send_sig(SIGKILL, current, 0);
765                                 goto out_free_dentry;
766                         }
767                         nbyte = ELF_PAGEOFFSET(elf_bss);
768                         if (nbyte) {
769                                 nbyte = ELF_MIN_ALIGN - nbyte;
770                                 if (nbyte > elf_brk - elf_bss)
771                                         nbyte = elf_brk - elf_bss;
772                                 if (clear_user((void __user *)elf_bss +
773                                                         load_bias, nbyte)) {
774                                         /*
775                                          * This bss-zeroing can fail if the ELF
776                                          * file specifies odd protections. So
777                                          * we don't check the return value
778                                          */
779                                 }
780                         }
781                 }
782
783                 if (elf_ppnt->p_flags & PF_R)
784                         elf_prot |= PROT_READ;
785                 if (elf_ppnt->p_flags & PF_W)
786                         elf_prot |= PROT_WRITE;
787                 if (elf_ppnt->p_flags & PF_X)
788                         elf_prot |= PROT_EXEC;
789
790                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
791
792                 vaddr = elf_ppnt->p_vaddr;
793                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
794                         elf_flags |= MAP_FIXED;
795                 } else if (loc->elf_ex.e_type == ET_DYN) {
796                         /* Try and get dynamic programs out of the way of the
797                          * default mmap base, as well as whatever program they
798                          * might try to exec.  This is because the brk will
799                          * follow the loader, and is not movable.  */
800 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
801                         /* Memory randomization might have been switched off
802                          * in runtime via sysctl.
803                          * If that is the case, retain the original non-zero
804                          * load_bias value in order to establish proper
805                          * non-randomized mappings.
806                          */
807                         if (current->flags & PF_RANDOMIZE)
808                                 load_bias = 0;
809                         else
810                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
811 #else
812                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
813 #endif
814                 }
815
816                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
817                                 elf_prot, elf_flags, 0);
818                 if (BAD_ADDR(error)) {
819                         send_sig(SIGKILL, current, 0);
820                         retval = IS_ERR((void *)error) ?
821                                 PTR_ERR((void*)error) : -EINVAL;
822                         goto out_free_dentry;
823                 }
824
825                 if (!load_addr_set) {
826                         load_addr_set = 1;
827                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
828                         if (loc->elf_ex.e_type == ET_DYN) {
829                                 load_bias += error -
830                                              ELF_PAGESTART(load_bias + vaddr);
831                                 load_addr += load_bias;
832                                 reloc_func_desc = load_bias;
833                         }
834                 }
835                 k = elf_ppnt->p_vaddr;
836                 if (k < start_code)
837                         start_code = k;
838                 if (start_data < k)
839                         start_data = k;
840
841                 /*
842                  * Check to see if the section's size will overflow the
843                  * allowed task size. Note that p_filesz must always be
844                  * <= p_memsz so it is only necessary to check p_memsz.
845                  */
846                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
847                     elf_ppnt->p_memsz > TASK_SIZE ||
848                     TASK_SIZE - elf_ppnt->p_memsz < k) {
849                         /* set_brk can never work. Avoid overflows. */
850                         send_sig(SIGKILL, current, 0);
851                         retval = -EINVAL;
852                         goto out_free_dentry;
853                 }
854
855                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
856
857                 if (k > elf_bss)
858                         elf_bss = k;
859                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
860                         end_code = k;
861                 if (end_data < k)
862                         end_data = k;
863                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
864                 if (k > elf_brk)
865                         elf_brk = k;
866         }
867
868         loc->elf_ex.e_entry += load_bias;
869         elf_bss += load_bias;
870         elf_brk += load_bias;
871         start_code += load_bias;
872         end_code += load_bias;
873         start_data += load_bias;
874         end_data += load_bias;
875
876         /* Calling set_brk effectively mmaps the pages that we need
877          * for the bss and break sections.  We must do this before
878          * mapping in the interpreter, to make sure it doesn't wind
879          * up getting placed where the bss needs to go.
880          */
881         retval = set_brk(elf_bss, elf_brk);
882         if (retval) {
883                 send_sig(SIGKILL, current, 0);
884                 goto out_free_dentry;
885         }
886         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
887                 send_sig(SIGSEGV, current, 0);
888                 retval = -EFAULT; /* Nobody gets to see this, but.. */
889                 goto out_free_dentry;
890         }
891
892         if (elf_interpreter) {
893                 unsigned long interp_map_addr = 0;
894
895                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
896                                             interpreter,
897                                             &interp_map_addr,
898                                             load_bias);
899                 if (!IS_ERR((void *)elf_entry)) {
900                         /*
901                          * load_elf_interp() returns relocation
902                          * adjustment
903                          */
904                         interp_load_addr = elf_entry;
905                         elf_entry += loc->interp_elf_ex.e_entry;
906                 }
907                 if (BAD_ADDR(elf_entry)) {
908                         force_sig(SIGSEGV, current);
909                         retval = IS_ERR((void *)elf_entry) ?
910                                         (int)elf_entry : -EINVAL;
911                         goto out_free_dentry;
912                 }
913                 reloc_func_desc = interp_load_addr;
914
915                 allow_write_access(interpreter);
916                 fput(interpreter);
917                 kfree(elf_interpreter);
918         } else {
919                 elf_entry = loc->elf_ex.e_entry;
920                 if (BAD_ADDR(elf_entry)) {
921                         force_sig(SIGSEGV, current);
922                         retval = -EINVAL;
923                         goto out_free_dentry;
924                 }
925         }
926
927         kfree(elf_phdata);
928
929         set_binfmt(&elf_format);
930
931 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
932         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
933         if (retval < 0) {
934                 send_sig(SIGKILL, current, 0);
935                 goto out;
936         }
937 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
938
939         install_exec_creds(bprm);
940         retval = create_elf_tables(bprm, &loc->elf_ex,
941                           load_addr, interp_load_addr);
942         if (retval < 0) {
943                 send_sig(SIGKILL, current, 0);
944                 goto out;
945         }
946         /* N.B. passed_fileno might not be initialized? */
947         current->mm->end_code = end_code;
948         current->mm->start_code = start_code;
949         current->mm->start_data = start_data;
950         current->mm->end_data = end_data;
951         current->mm->start_stack = bprm->p;
952
953 #ifdef arch_randomize_brk
954         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
955                 current->mm->brk = current->mm->start_brk =
956                         arch_randomize_brk(current->mm);
957 #ifdef CONFIG_COMPAT_BRK
958                 current->brk_randomized = 1;
959 #endif
960         }
961 #endif
962
963         if (current->personality & MMAP_PAGE_ZERO) {
964                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
965                    and some applications "depend" upon this behavior.
966                    Since we do not have the power to recompile these, we
967                    emulate the SVr4 behavior. Sigh. */
968                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
969                                 MAP_FIXED | MAP_PRIVATE, 0);
970         }
971
972 #ifdef ELF_PLAT_INIT
973         /*
974          * The ABI may specify that certain registers be set up in special
975          * ways (on i386 %edx is the address of a DT_FINI function, for
976          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
977          * that the e_entry field is the address of the function descriptor
978          * for the startup routine, rather than the address of the startup
979          * routine itself.  This macro performs whatever initialization to
980          * the regs structure is required as well as any relocations to the
981          * function descriptor entries when executing dynamically links apps.
982          */
983         ELF_PLAT_INIT(regs, reloc_func_desc);
984 #endif
985
986         start_thread(regs, elf_entry, bprm->p);
987         retval = 0;
988 out:
989         kfree(loc);
990 out_ret:
991         return retval;
992
993         /* error cleanup */
994 out_free_dentry:
995         allow_write_access(interpreter);
996         if (interpreter)
997                 fput(interpreter);
998 out_free_interp:
999         kfree(elf_interpreter);
1000 out_free_ph:
1001         kfree(elf_phdata);
1002         goto out;
1003 }
1004
1005 /* This is really simpleminded and specialized - we are loading an
1006    a.out library that is given an ELF header. */
1007 static int load_elf_library(struct file *file)
1008 {
1009         struct elf_phdr *elf_phdata;
1010         struct elf_phdr *eppnt;
1011         unsigned long elf_bss, bss, len;
1012         int retval, error, i, j;
1013         struct elfhdr elf_ex;
1014
1015         error = -ENOEXEC;
1016         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1017         if (retval != sizeof(elf_ex))
1018                 goto out;
1019
1020         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1021                 goto out;
1022
1023         /* First of all, some simple consistency checks */
1024         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1025             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1026                 goto out;
1027
1028         /* Now read in all of the header information */
1029
1030         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1031         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1032
1033         error = -ENOMEM;
1034         elf_phdata = kmalloc(j, GFP_KERNEL);
1035         if (!elf_phdata)
1036                 goto out;
1037
1038         eppnt = elf_phdata;
1039         error = -ENOEXEC;
1040         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1041         if (retval != j)
1042                 goto out_free_ph;
1043
1044         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1045                 if ((eppnt + i)->p_type == PT_LOAD)
1046                         j++;
1047         if (j != 1)
1048                 goto out_free_ph;
1049
1050         while (eppnt->p_type != PT_LOAD)
1051                 eppnt++;
1052
1053         /* Now use mmap to map the library into memory. */
1054         error = vm_mmap(file,
1055                         ELF_PAGESTART(eppnt->p_vaddr),
1056                         (eppnt->p_filesz +
1057                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1058                         PROT_READ | PROT_WRITE | PROT_EXEC,
1059                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1060                         (eppnt->p_offset -
1061                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1062         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1063                 goto out_free_ph;
1064
1065         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1066         if (padzero(elf_bss)) {
1067                 error = -EFAULT;
1068                 goto out_free_ph;
1069         }
1070
1071         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1072                             ELF_MIN_ALIGN - 1);
1073         bss = eppnt->p_memsz + eppnt->p_vaddr;
1074         if (bss > len)
1075                 vm_brk(len, bss - len);
1076         error = 0;
1077
1078 out_free_ph:
1079         kfree(elf_phdata);
1080 out:
1081         return error;
1082 }
1083
1084 #ifdef CONFIG_ELF_CORE
1085 /*
1086  * ELF core dumper
1087  *
1088  * Modelled on fs/exec.c:aout_core_dump()
1089  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1090  */
1091
1092 /*
1093  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1094  * that are useful for post-mortem analysis are included in every core dump.
1095  * In that way we ensure that the core dump is fully interpretable later
1096  * without matching up the same kernel and hardware config to see what PC values
1097  * meant. These special mappings include - vDSO, vsyscall, and other
1098  * architecture specific mappings
1099  */
1100 static bool always_dump_vma(struct vm_area_struct *vma)
1101 {
1102         /* Any vsyscall mappings? */
1103         if (vma == get_gate_vma(vma->vm_mm))
1104                 return true;
1105         /*
1106          * arch_vma_name() returns non-NULL for special architecture mappings,
1107          * such as vDSO sections.
1108          */
1109         if (arch_vma_name(vma))
1110                 return true;
1111
1112         return false;
1113 }
1114
1115 /*
1116  * Decide what to dump of a segment, part, all or none.
1117  */
1118 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1119                                    unsigned long mm_flags)
1120 {
1121 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1122
1123         /* always dump the vdso and vsyscall sections */
1124         if (always_dump_vma(vma))
1125                 goto whole;
1126
1127         if (vma->vm_flags & VM_DONTDUMP)
1128                 return 0;
1129
1130         /* Hugetlb memory check */
1131         if (vma->vm_flags & VM_HUGETLB) {
1132                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1133                         goto whole;
1134                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1135                         goto whole;
1136         }
1137
1138         /* Do not dump I/O mapped devices or special mappings */
1139         if (vma->vm_flags & VM_IO)
1140                 return 0;
1141
1142         /* By default, dump shared memory if mapped from an anonymous file. */
1143         if (vma->vm_flags & VM_SHARED) {
1144                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1145                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1146                         goto whole;
1147                 return 0;
1148         }
1149
1150         /* Dump segments that have been written to.  */
1151         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1152                 goto whole;
1153         if (vma->vm_file == NULL)
1154                 return 0;
1155
1156         if (FILTER(MAPPED_PRIVATE))
1157                 goto whole;
1158
1159         /*
1160          * If this looks like the beginning of a DSO or executable mapping,
1161          * check for an ELF header.  If we find one, dump the first page to
1162          * aid in determining what was mapped here.
1163          */
1164         if (FILTER(ELF_HEADERS) &&
1165             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1166                 u32 __user *header = (u32 __user *) vma->vm_start;
1167                 u32 word;
1168                 mm_segment_t fs = get_fs();
1169                 /*
1170                  * Doing it this way gets the constant folded by GCC.
1171                  */
1172                 union {
1173                         u32 cmp;
1174                         char elfmag[SELFMAG];
1175                 } magic;
1176                 BUILD_BUG_ON(SELFMAG != sizeof word);
1177                 magic.elfmag[EI_MAG0] = ELFMAG0;
1178                 magic.elfmag[EI_MAG1] = ELFMAG1;
1179                 magic.elfmag[EI_MAG2] = ELFMAG2;
1180                 magic.elfmag[EI_MAG3] = ELFMAG3;
1181                 /*
1182                  * Switch to the user "segment" for get_user(),
1183                  * then put back what elf_core_dump() had in place.
1184                  */
1185                 set_fs(USER_DS);
1186                 if (unlikely(get_user(word, header)))
1187                         word = 0;
1188                 set_fs(fs);
1189                 if (word == magic.cmp)
1190                         return PAGE_SIZE;
1191         }
1192
1193 #undef  FILTER
1194
1195         return 0;
1196
1197 whole:
1198         return vma->vm_end - vma->vm_start;
1199 }
1200
1201 /* An ELF note in memory */
1202 struct memelfnote
1203 {
1204         const char *name;
1205         int type;
1206         unsigned int datasz;
1207         void *data;
1208 };
1209
1210 static int notesize(struct memelfnote *en)
1211 {
1212         int sz;
1213
1214         sz = sizeof(struct elf_note);
1215         sz += roundup(strlen(en->name) + 1, 4);
1216         sz += roundup(en->datasz, 4);
1217
1218         return sz;
1219 }
1220
1221 #define DUMP_WRITE(addr, nr, foffset)   \
1222         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1223
1224 static int alignfile(struct file *file, loff_t *foffset)
1225 {
1226         static const char buf[4] = { 0, };
1227         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1228         return 1;
1229 }
1230
1231 static int writenote(struct memelfnote *men, struct file *file,
1232                         loff_t *foffset)
1233 {
1234         struct elf_note en;
1235         en.n_namesz = strlen(men->name) + 1;
1236         en.n_descsz = men->datasz;
1237         en.n_type = men->type;
1238
1239         DUMP_WRITE(&en, sizeof(en), foffset);
1240         DUMP_WRITE(men->name, en.n_namesz, foffset);
1241         if (!alignfile(file, foffset))
1242                 return 0;
1243         DUMP_WRITE(men->data, men->datasz, foffset);
1244         if (!alignfile(file, foffset))
1245                 return 0;
1246
1247         return 1;
1248 }
1249 #undef DUMP_WRITE
1250
1251 static void fill_elf_header(struct elfhdr *elf, int segs,
1252                             u16 machine, u32 flags)
1253 {
1254         memset(elf, 0, sizeof(*elf));
1255
1256         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1257         elf->e_ident[EI_CLASS] = ELF_CLASS;
1258         elf->e_ident[EI_DATA] = ELF_DATA;
1259         elf->e_ident[EI_VERSION] = EV_CURRENT;
1260         elf->e_ident[EI_OSABI] = ELF_OSABI;
1261
1262         elf->e_type = ET_CORE;
1263         elf->e_machine = machine;
1264         elf->e_version = EV_CURRENT;
1265         elf->e_phoff = sizeof(struct elfhdr);
1266         elf->e_flags = flags;
1267         elf->e_ehsize = sizeof(struct elfhdr);
1268         elf->e_phentsize = sizeof(struct elf_phdr);
1269         elf->e_phnum = segs;
1270
1271         return;
1272 }
1273
1274 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1275 {
1276         phdr->p_type = PT_NOTE;
1277         phdr->p_offset = offset;
1278         phdr->p_vaddr = 0;
1279         phdr->p_paddr = 0;
1280         phdr->p_filesz = sz;
1281         phdr->p_memsz = 0;
1282         phdr->p_flags = 0;
1283         phdr->p_align = 0;
1284         return;
1285 }
1286
1287 static void fill_note(struct memelfnote *note, const char *name, int type, 
1288                 unsigned int sz, void *data)
1289 {
1290         note->name = name;
1291         note->type = type;
1292         note->datasz = sz;
1293         note->data = data;
1294         return;
1295 }
1296
1297 /*
1298  * fill up all the fields in prstatus from the given task struct, except
1299  * registers which need to be filled up separately.
1300  */
1301 static void fill_prstatus(struct elf_prstatus *prstatus,
1302                 struct task_struct *p, long signr)
1303 {
1304         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1305         prstatus->pr_sigpend = p->pending.signal.sig[0];
1306         prstatus->pr_sighold = p->blocked.sig[0];
1307         rcu_read_lock();
1308         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1309         rcu_read_unlock();
1310         prstatus->pr_pid = task_pid_vnr(p);
1311         prstatus->pr_pgrp = task_pgrp_vnr(p);
1312         prstatus->pr_sid = task_session_vnr(p);
1313         if (thread_group_leader(p)) {
1314                 struct task_cputime cputime;
1315
1316                 /*
1317                  * This is the record for the group leader.  It shows the
1318                  * group-wide total, not its individual thread total.
1319                  */
1320                 thread_group_cputime(p, &cputime);
1321                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1322                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1323         } else {
1324                 cputime_t utime, stime;
1325
1326                 task_cputime(p, &utime, &stime);
1327                 cputime_to_timeval(utime, &prstatus->pr_utime);
1328                 cputime_to_timeval(stime, &prstatus->pr_stime);
1329         }
1330         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1331         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1332 }
1333
1334 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1335                        struct mm_struct *mm)
1336 {
1337         const struct cred *cred;
1338         unsigned int i, len;
1339         
1340         /* first copy the parameters from user space */
1341         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1342
1343         len = mm->arg_end - mm->arg_start;
1344         if (len >= ELF_PRARGSZ)
1345                 len = ELF_PRARGSZ-1;
1346         if (copy_from_user(&psinfo->pr_psargs,
1347                            (const char __user *)mm->arg_start, len))
1348                 return -EFAULT;
1349         for(i = 0; i < len; i++)
1350                 if (psinfo->pr_psargs[i] == 0)
1351                         psinfo->pr_psargs[i] = ' ';
1352         psinfo->pr_psargs[len] = 0;
1353
1354         rcu_read_lock();
1355         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1356         rcu_read_unlock();
1357         psinfo->pr_pid = task_pid_vnr(p);
1358         psinfo->pr_pgrp = task_pgrp_vnr(p);
1359         psinfo->pr_sid = task_session_vnr(p);
1360
1361         i = p->state ? ffz(~p->state) + 1 : 0;
1362         psinfo->pr_state = i;
1363         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1364         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1365         psinfo->pr_nice = task_nice(p);
1366         psinfo->pr_flag = p->flags;
1367         rcu_read_lock();
1368         cred = __task_cred(p);
1369         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1370         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1371         rcu_read_unlock();
1372         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1373         
1374         return 0;
1375 }
1376
1377 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1378 {
1379         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1380         int i = 0;
1381         do
1382                 i += 2;
1383         while (auxv[i - 2] != AT_NULL);
1384         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1385 }
1386
1387 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1388                 siginfo_t *siginfo)
1389 {
1390         mm_segment_t old_fs = get_fs();
1391         set_fs(KERNEL_DS);
1392         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1393         set_fs(old_fs);
1394         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1395 }
1396
1397 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1398 /*
1399  * Format of NT_FILE note:
1400  *
1401  * long count     -- how many files are mapped
1402  * long page_size -- units for file_ofs
1403  * array of [COUNT] elements of
1404  *   long start
1405  *   long end
1406  *   long file_ofs
1407  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1408  */
1409 static void fill_files_note(struct memelfnote *note)
1410 {
1411         struct vm_area_struct *vma;
1412         unsigned count, size, names_ofs, remaining, n;
1413         user_long_t *data;
1414         user_long_t *start_end_ofs;
1415         char *name_base, *name_curpos;
1416
1417         /* *Estimated* file count and total data size needed */
1418         count = current->mm->map_count;
1419         size = count * 64;
1420
1421         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1422  alloc:
1423         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1424                 goto err;
1425         size = round_up(size, PAGE_SIZE);
1426         data = vmalloc(size);
1427         if (!data)
1428                 goto err;
1429
1430         start_end_ofs = data + 2;
1431         name_base = name_curpos = ((char *)data) + names_ofs;
1432         remaining = size - names_ofs;
1433         count = 0;
1434         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1435                 struct file *file;
1436                 const char *filename;
1437
1438                 file = vma->vm_file;
1439                 if (!file)
1440                         continue;
1441                 filename = d_path(&file->f_path, name_curpos, remaining);
1442                 if (IS_ERR(filename)) {
1443                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1444                                 vfree(data);
1445                                 size = size * 5 / 4;
1446                                 goto alloc;
1447                         }
1448                         continue;
1449                 }
1450
1451                 /* d_path() fills at the end, move name down */
1452                 /* n = strlen(filename) + 1: */
1453                 n = (name_curpos + remaining) - filename;
1454                 remaining = filename - name_curpos;
1455                 memmove(name_curpos, filename, n);
1456                 name_curpos += n;
1457
1458                 *start_end_ofs++ = vma->vm_start;
1459                 *start_end_ofs++ = vma->vm_end;
1460                 *start_end_ofs++ = vma->vm_pgoff;
1461                 count++;
1462         }
1463
1464         /* Now we know exact count of files, can store it */
1465         data[0] = count;
1466         data[1] = PAGE_SIZE;
1467         /*
1468          * Count usually is less than current->mm->map_count,
1469          * we need to move filenames down.
1470          */
1471         n = current->mm->map_count - count;
1472         if (n != 0) {
1473                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1474                 memmove(name_base - shift_bytes, name_base,
1475                         name_curpos - name_base);
1476                 name_curpos -= shift_bytes;
1477         }
1478
1479         size = name_curpos - (char *)data;
1480         fill_note(note, "CORE", NT_FILE, size, data);
1481  err: ;
1482 }
1483
1484 #ifdef CORE_DUMP_USE_REGSET
1485 #include <linux/regset.h>
1486
1487 struct elf_thread_core_info {
1488         struct elf_thread_core_info *next;
1489         struct task_struct *task;
1490         struct elf_prstatus prstatus;
1491         struct memelfnote notes[0];
1492 };
1493
1494 struct elf_note_info {
1495         struct elf_thread_core_info *thread;
1496         struct memelfnote psinfo;
1497         struct memelfnote signote;
1498         struct memelfnote auxv;
1499         struct memelfnote files;
1500         user_siginfo_t csigdata;
1501         size_t size;
1502         int thread_notes;
1503 };
1504
1505 /*
1506  * When a regset has a writeback hook, we call it on each thread before
1507  * dumping user memory.  On register window machines, this makes sure the
1508  * user memory backing the register data is up to date before we read it.
1509  */
1510 static void do_thread_regset_writeback(struct task_struct *task,
1511                                        const struct user_regset *regset)
1512 {
1513         if (regset->writeback)
1514                 regset->writeback(task, regset, 1);
1515 }
1516
1517 #ifndef PR_REG_SIZE
1518 #define PR_REG_SIZE(S) sizeof(S)
1519 #endif
1520
1521 #ifndef PRSTATUS_SIZE
1522 #define PRSTATUS_SIZE(S) sizeof(S)
1523 #endif
1524
1525 #ifndef PR_REG_PTR
1526 #define PR_REG_PTR(S) (&((S)->pr_reg))
1527 #endif
1528
1529 #ifndef SET_PR_FPVALID
1530 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1531 #endif
1532
1533 static int fill_thread_core_info(struct elf_thread_core_info *t,
1534                                  const struct user_regset_view *view,
1535                                  long signr, size_t *total)
1536 {
1537         unsigned int i;
1538
1539         /*
1540          * NT_PRSTATUS is the one special case, because the regset data
1541          * goes into the pr_reg field inside the note contents, rather
1542          * than being the whole note contents.  We fill the reset in here.
1543          * We assume that regset 0 is NT_PRSTATUS.
1544          */
1545         fill_prstatus(&t->prstatus, t->task, signr);
1546         (void) view->regsets[0].get(t->task, &view->regsets[0],
1547                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1548                                     PR_REG_PTR(&t->prstatus), NULL);
1549
1550         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1551                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1552         *total += notesize(&t->notes[0]);
1553
1554         do_thread_regset_writeback(t->task, &view->regsets[0]);
1555
1556         /*
1557          * Each other regset might generate a note too.  For each regset
1558          * that has no core_note_type or is inactive, we leave t->notes[i]
1559          * all zero and we'll know to skip writing it later.
1560          */
1561         for (i = 1; i < view->n; ++i) {
1562                 const struct user_regset *regset = &view->regsets[i];
1563                 do_thread_regset_writeback(t->task, regset);
1564                 if (regset->core_note_type && regset->get &&
1565                     (!regset->active || regset->active(t->task, regset))) {
1566                         int ret;
1567                         size_t size = regset->n * regset->size;
1568                         void *data = kmalloc(size, GFP_KERNEL);
1569                         if (unlikely(!data))
1570                                 return 0;
1571                         ret = regset->get(t->task, regset,
1572                                           0, size, data, NULL);
1573                         if (unlikely(ret))
1574                                 kfree(data);
1575                         else {
1576                                 if (regset->core_note_type != NT_PRFPREG)
1577                                         fill_note(&t->notes[i], "LINUX",
1578                                                   regset->core_note_type,
1579                                                   size, data);
1580                                 else {
1581                                         SET_PR_FPVALID(&t->prstatus, 1);
1582                                         fill_note(&t->notes[i], "CORE",
1583                                                   NT_PRFPREG, size, data);
1584                                 }
1585                                 *total += notesize(&t->notes[i]);
1586                         }
1587                 }
1588         }
1589
1590         return 1;
1591 }
1592
1593 static int fill_note_info(struct elfhdr *elf, int phdrs,
1594                           struct elf_note_info *info,
1595                           siginfo_t *siginfo, struct pt_regs *regs)
1596 {
1597         struct task_struct *dump_task = current;
1598         const struct user_regset_view *view = task_user_regset_view(dump_task);
1599         struct elf_thread_core_info *t;
1600         struct elf_prpsinfo *psinfo;
1601         struct core_thread *ct;
1602         unsigned int i;
1603
1604         info->size = 0;
1605         info->thread = NULL;
1606
1607         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1608         if (psinfo == NULL) {
1609                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1610                 return 0;
1611         }
1612
1613         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1614
1615         /*
1616          * Figure out how many notes we're going to need for each thread.
1617          */
1618         info->thread_notes = 0;
1619         for (i = 0; i < view->n; ++i)
1620                 if (view->regsets[i].core_note_type != 0)
1621                         ++info->thread_notes;
1622
1623         /*
1624          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1625          * since it is our one special case.
1626          */
1627         if (unlikely(info->thread_notes == 0) ||
1628             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1629                 WARN_ON(1);
1630                 return 0;
1631         }
1632
1633         /*
1634          * Initialize the ELF file header.
1635          */
1636         fill_elf_header(elf, phdrs,
1637                         view->e_machine, view->e_flags);
1638
1639         /*
1640          * Allocate a structure for each thread.
1641          */
1642         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1643                 t = kzalloc(offsetof(struct elf_thread_core_info,
1644                                      notes[info->thread_notes]),
1645                             GFP_KERNEL);
1646                 if (unlikely(!t))
1647                         return 0;
1648
1649                 t->task = ct->task;
1650                 if (ct->task == dump_task || !info->thread) {
1651                         t->next = info->thread;
1652                         info->thread = t;
1653                 } else {
1654                         /*
1655                          * Make sure to keep the original task at
1656                          * the head of the list.
1657                          */
1658                         t->next = info->thread->next;
1659                         info->thread->next = t;
1660                 }
1661         }
1662
1663         /*
1664          * Now fill in each thread's information.
1665          */
1666         for (t = info->thread; t != NULL; t = t->next)
1667                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1668                         return 0;
1669
1670         /*
1671          * Fill in the two process-wide notes.
1672          */
1673         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1674         info->size += notesize(&info->psinfo);
1675
1676         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1677         info->size += notesize(&info->signote);
1678
1679         fill_auxv_note(&info->auxv, current->mm);
1680         info->size += notesize(&info->auxv);
1681
1682         fill_files_note(&info->files);
1683         info->size += notesize(&info->files);
1684
1685         return 1;
1686 }
1687
1688 static size_t get_note_info_size(struct elf_note_info *info)
1689 {
1690         return info->size;
1691 }
1692
1693 /*
1694  * Write all the notes for each thread.  When writing the first thread, the
1695  * process-wide notes are interleaved after the first thread-specific note.
1696  */
1697 static int write_note_info(struct elf_note_info *info,
1698                            struct file *file, loff_t *foffset)
1699 {
1700         bool first = 1;
1701         struct elf_thread_core_info *t = info->thread;
1702
1703         do {
1704                 int i;
1705
1706                 if (!writenote(&t->notes[0], file, foffset))
1707                         return 0;
1708
1709                 if (first && !writenote(&info->psinfo, file, foffset))
1710                         return 0;
1711                 if (first && !writenote(&info->signote, file, foffset))
1712                         return 0;
1713                 if (first && !writenote(&info->auxv, file, foffset))
1714                         return 0;
1715                 if (first && !writenote(&info->files, file, foffset))
1716                         return 0;
1717
1718                 for (i = 1; i < info->thread_notes; ++i)
1719                         if (t->notes[i].data &&
1720                             !writenote(&t->notes[i], file, foffset))
1721                                 return 0;
1722
1723                 first = 0;
1724                 t = t->next;
1725         } while (t);
1726
1727         return 1;
1728 }
1729
1730 static void free_note_info(struct elf_note_info *info)
1731 {
1732         struct elf_thread_core_info *threads = info->thread;
1733         while (threads) {
1734                 unsigned int i;
1735                 struct elf_thread_core_info *t = threads;
1736                 threads = t->next;
1737                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1738                 for (i = 1; i < info->thread_notes; ++i)
1739                         kfree(t->notes[i].data);
1740                 kfree(t);
1741         }
1742         kfree(info->psinfo.data);
1743         vfree(info->files.data);
1744 }
1745
1746 #else
1747
1748 /* Here is the structure in which status of each thread is captured. */
1749 struct elf_thread_status
1750 {
1751         struct list_head list;
1752         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1753         elf_fpregset_t fpu;             /* NT_PRFPREG */
1754         struct task_struct *thread;
1755 #ifdef ELF_CORE_COPY_XFPREGS
1756         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1757 #endif
1758         struct memelfnote notes[3];
1759         int num_notes;
1760 };
1761
1762 /*
1763  * In order to add the specific thread information for the elf file format,
1764  * we need to keep a linked list of every threads pr_status and then create
1765  * a single section for them in the final core file.
1766  */
1767 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1768 {
1769         int sz = 0;
1770         struct task_struct *p = t->thread;
1771         t->num_notes = 0;
1772
1773         fill_prstatus(&t->prstatus, p, signr);
1774         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1775         
1776         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1777                   &(t->prstatus));
1778         t->num_notes++;
1779         sz += notesize(&t->notes[0]);
1780
1781         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1782                                                                 &t->fpu))) {
1783                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1784                           &(t->fpu));
1785                 t->num_notes++;
1786                 sz += notesize(&t->notes[1]);
1787         }
1788
1789 #ifdef ELF_CORE_COPY_XFPREGS
1790         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1791                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1792                           sizeof(t->xfpu), &t->xfpu);
1793                 t->num_notes++;
1794                 sz += notesize(&t->notes[2]);
1795         }
1796 #endif  
1797         return sz;
1798 }
1799
1800 struct elf_note_info {
1801         struct memelfnote *notes;
1802         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1803         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1804         struct list_head thread_list;
1805         elf_fpregset_t *fpu;
1806 #ifdef ELF_CORE_COPY_XFPREGS
1807         elf_fpxregset_t *xfpu;
1808 #endif
1809         user_siginfo_t csigdata;
1810         int thread_status_size;
1811         int numnote;
1812 };
1813
1814 static int elf_note_info_init(struct elf_note_info *info)
1815 {
1816         memset(info, 0, sizeof(*info));
1817         INIT_LIST_HEAD(&info->thread_list);
1818
1819         /* Allocate space for ELF notes */
1820         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1821         if (!info->notes)
1822                 return 0;
1823         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1824         if (!info->psinfo)
1825                 return 0;
1826         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1827         if (!info->prstatus)
1828                 return 0;
1829         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1830         if (!info->fpu)
1831                 return 0;
1832 #ifdef ELF_CORE_COPY_XFPREGS
1833         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1834         if (!info->xfpu)
1835                 return 0;
1836 #endif
1837         return 1;
1838 }
1839
1840 static int fill_note_info(struct elfhdr *elf, int phdrs,
1841                           struct elf_note_info *info,
1842                           siginfo_t *siginfo, struct pt_regs *regs)
1843 {
1844         struct list_head *t;
1845
1846         if (!elf_note_info_init(info))
1847                 return 0;
1848
1849         if (siginfo->si_signo) {
1850                 struct core_thread *ct;
1851                 struct elf_thread_status *ets;
1852
1853                 for (ct = current->mm->core_state->dumper.next;
1854                                                 ct; ct = ct->next) {
1855                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1856                         if (!ets)
1857                                 return 0;
1858
1859                         ets->thread = ct->task;
1860                         list_add(&ets->list, &info->thread_list);
1861                 }
1862
1863                 list_for_each(t, &info->thread_list) {
1864                         int sz;
1865
1866                         ets = list_entry(t, struct elf_thread_status, list);
1867                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1868                         info->thread_status_size += sz;
1869                 }
1870         }
1871         /* now collect the dump for the current */
1872         memset(info->prstatus, 0, sizeof(*info->prstatus));
1873         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1874         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1875
1876         /* Set up header */
1877         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1878
1879         /*
1880          * Set up the notes in similar form to SVR4 core dumps made
1881          * with info from their /proc.
1882          */
1883
1884         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1885                   sizeof(*info->prstatus), info->prstatus);
1886         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1887         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1888                   sizeof(*info->psinfo), info->psinfo);
1889
1890         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1891         fill_auxv_note(info->notes + 3, current->mm);
1892         fill_files_note(info->notes + 4);
1893
1894         info->numnote = 5;
1895
1896         /* Try to dump the FPU. */
1897         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1898                                                                info->fpu);
1899         if (info->prstatus->pr_fpvalid)
1900                 fill_note(info->notes + info->numnote++,
1901                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1902 #ifdef ELF_CORE_COPY_XFPREGS
1903         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1904                 fill_note(info->notes + info->numnote++,
1905                           "LINUX", ELF_CORE_XFPREG_TYPE,
1906                           sizeof(*info->xfpu), info->xfpu);
1907 #endif
1908
1909         return 1;
1910 }
1911
1912 static size_t get_note_info_size(struct elf_note_info *info)
1913 {
1914         int sz = 0;
1915         int i;
1916
1917         for (i = 0; i < info->numnote; i++)
1918                 sz += notesize(info->notes + i);
1919
1920         sz += info->thread_status_size;
1921
1922         return sz;
1923 }
1924
1925 static int write_note_info(struct elf_note_info *info,
1926                            struct file *file, loff_t *foffset)
1927 {
1928         int i;
1929         struct list_head *t;
1930
1931         for (i = 0; i < info->numnote; i++)
1932                 if (!writenote(info->notes + i, file, foffset))
1933                         return 0;
1934
1935         /* write out the thread status notes section */
1936         list_for_each(t, &info->thread_list) {
1937                 struct elf_thread_status *tmp =
1938                                 list_entry(t, struct elf_thread_status, list);
1939
1940                 for (i = 0; i < tmp->num_notes; i++)
1941                         if (!writenote(&tmp->notes[i], file, foffset))
1942                                 return 0;
1943         }
1944
1945         return 1;
1946 }
1947
1948 static void free_note_info(struct elf_note_info *info)
1949 {
1950         while (!list_empty(&info->thread_list)) {
1951                 struct list_head *tmp = info->thread_list.next;
1952                 list_del(tmp);
1953                 kfree(list_entry(tmp, struct elf_thread_status, list));
1954         }
1955
1956         /* Free data allocated by fill_files_note(): */
1957         vfree(info->notes[4].data);
1958
1959         kfree(info->prstatus);
1960         kfree(info->psinfo);
1961         kfree(info->notes);
1962         kfree(info->fpu);
1963 #ifdef ELF_CORE_COPY_XFPREGS
1964         kfree(info->xfpu);
1965 #endif
1966 }
1967
1968 #endif
1969
1970 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1971                                         struct vm_area_struct *gate_vma)
1972 {
1973         struct vm_area_struct *ret = tsk->mm->mmap;
1974
1975         if (ret)
1976                 return ret;
1977         return gate_vma;
1978 }
1979 /*
1980  * Helper function for iterating across a vma list.  It ensures that the caller
1981  * will visit `gate_vma' prior to terminating the search.
1982  */
1983 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1984                                         struct vm_area_struct *gate_vma)
1985 {
1986         struct vm_area_struct *ret;
1987
1988         ret = this_vma->vm_next;
1989         if (ret)
1990                 return ret;
1991         if (this_vma == gate_vma)
1992                 return NULL;
1993         return gate_vma;
1994 }
1995
1996 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1997                              elf_addr_t e_shoff, int segs)
1998 {
1999         elf->e_shoff = e_shoff;
2000         elf->e_shentsize = sizeof(*shdr4extnum);
2001         elf->e_shnum = 1;
2002         elf->e_shstrndx = SHN_UNDEF;
2003
2004         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2005
2006         shdr4extnum->sh_type = SHT_NULL;
2007         shdr4extnum->sh_size = elf->e_shnum;
2008         shdr4extnum->sh_link = elf->e_shstrndx;
2009         shdr4extnum->sh_info = segs;
2010 }
2011
2012 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2013                                      unsigned long mm_flags)
2014 {
2015         struct vm_area_struct *vma;
2016         size_t size = 0;
2017
2018         for (vma = first_vma(current, gate_vma); vma != NULL;
2019              vma = next_vma(vma, gate_vma))
2020                 size += vma_dump_size(vma, mm_flags);
2021         return size;
2022 }
2023
2024 /*
2025  * Actual dumper
2026  *
2027  * This is a two-pass process; first we find the offsets of the bits,
2028  * and then they are actually written out.  If we run out of core limit
2029  * we just truncate.
2030  */
2031 static int elf_core_dump(struct coredump_params *cprm)
2032 {
2033         int has_dumped = 0;
2034         mm_segment_t fs;
2035         int segs;
2036         size_t size = 0;
2037         struct vm_area_struct *vma, *gate_vma;
2038         struct elfhdr *elf = NULL;
2039         loff_t offset = 0, dataoff, foffset;
2040         struct elf_note_info info;
2041         struct elf_phdr *phdr4note = NULL;
2042         struct elf_shdr *shdr4extnum = NULL;
2043         Elf_Half e_phnum;
2044         elf_addr_t e_shoff;
2045
2046         /*
2047          * We no longer stop all VM operations.
2048          * 
2049          * This is because those proceses that could possibly change map_count
2050          * or the mmap / vma pages are now blocked in do_exit on current
2051          * finishing this core dump.
2052          *
2053          * Only ptrace can touch these memory addresses, but it doesn't change
2054          * the map_count or the pages allocated. So no possibility of crashing
2055          * exists while dumping the mm->vm_next areas to the core file.
2056          */
2057   
2058         /* alloc memory for large data structures: too large to be on stack */
2059         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2060         if (!elf)
2061                 goto out;
2062         /*
2063          * The number of segs are recored into ELF header as 16bit value.
2064          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2065          */
2066         segs = current->mm->map_count;
2067         segs += elf_core_extra_phdrs();
2068
2069         gate_vma = get_gate_vma(current->mm);
2070         if (gate_vma != NULL)
2071                 segs++;
2072
2073         /* for notes section */
2074         segs++;
2075
2076         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2077          * this, kernel supports extended numbering. Have a look at
2078          * include/linux/elf.h for further information. */
2079         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2080
2081         /*
2082          * Collect all the non-memory information about the process for the
2083          * notes.  This also sets up the file header.
2084          */
2085         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2086                 goto cleanup;
2087
2088         has_dumped = 1;
2089         current->flags |= PF_DUMPCORE;
2090   
2091         fs = get_fs();
2092         set_fs(KERNEL_DS);
2093
2094         offset += sizeof(*elf);                         /* Elf header */
2095         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2096         foffset = offset;
2097
2098         /* Write notes phdr entry */
2099         {
2100                 size_t sz = get_note_info_size(&info);
2101
2102                 sz += elf_coredump_extra_notes_size();
2103
2104                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2105                 if (!phdr4note)
2106                         goto end_coredump;
2107
2108                 fill_elf_note_phdr(phdr4note, sz, offset);
2109                 offset += sz;
2110         }
2111
2112         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2113
2114         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2115         offset += elf_core_extra_data_size();
2116         e_shoff = offset;
2117
2118         if (e_phnum == PN_XNUM) {
2119                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2120                 if (!shdr4extnum)
2121                         goto end_coredump;
2122                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2123         }
2124
2125         offset = dataoff;
2126
2127         size += sizeof(*elf);
2128         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2129                 goto end_coredump;
2130
2131         size += sizeof(*phdr4note);
2132         if (size > cprm->limit
2133             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2134                 goto end_coredump;
2135
2136         /* Write program headers for segments dump */
2137         for (vma = first_vma(current, gate_vma); vma != NULL;
2138                         vma = next_vma(vma, gate_vma)) {
2139                 struct elf_phdr phdr;
2140
2141                 phdr.p_type = PT_LOAD;
2142                 phdr.p_offset = offset;
2143                 phdr.p_vaddr = vma->vm_start;
2144                 phdr.p_paddr = 0;
2145                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2146                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2147                 offset += phdr.p_filesz;
2148                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2149                 if (vma->vm_flags & VM_WRITE)
2150                         phdr.p_flags |= PF_W;
2151                 if (vma->vm_flags & VM_EXEC)
2152                         phdr.p_flags |= PF_X;
2153                 phdr.p_align = ELF_EXEC_PAGESIZE;
2154
2155                 size += sizeof(phdr);
2156                 if (size > cprm->limit
2157                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2158                         goto end_coredump;
2159         }
2160
2161         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2162                 goto end_coredump;
2163
2164         /* write out the notes section */
2165         if (!write_note_info(&info, cprm->file, &foffset))
2166                 goto end_coredump;
2167
2168         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2169                 goto end_coredump;
2170
2171         /* Align to page */
2172         if (!dump_seek(cprm->file, dataoff - foffset))
2173                 goto end_coredump;
2174
2175         for (vma = first_vma(current, gate_vma); vma != NULL;
2176                         vma = next_vma(vma, gate_vma)) {
2177                 unsigned long addr;
2178                 unsigned long end;
2179
2180                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2181
2182                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2183                         struct page *page;
2184                         int stop;
2185
2186                         page = get_dump_page(addr);
2187                         if (page) {
2188                                 void *kaddr = kmap(page);
2189                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2190                                         !dump_write(cprm->file, kaddr,
2191                                                     PAGE_SIZE);
2192                                 kunmap(page);
2193                                 page_cache_release(page);
2194                         } else
2195                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2196                         if (stop)
2197                                 goto end_coredump;
2198                 }
2199         }
2200
2201         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2202                 goto end_coredump;
2203
2204         if (e_phnum == PN_XNUM) {
2205                 size += sizeof(*shdr4extnum);
2206                 if (size > cprm->limit
2207                     || !dump_write(cprm->file, shdr4extnum,
2208                                    sizeof(*shdr4extnum)))
2209                         goto end_coredump;
2210         }
2211
2212 end_coredump:
2213         set_fs(fs);
2214
2215 cleanup:
2216         free_note_info(&info);
2217         kfree(shdr4extnum);
2218         kfree(phdr4note);
2219         kfree(elf);
2220 out:
2221         return has_dumped;
2222 }
2223
2224 #endif          /* CONFIG_ELF_CORE */
2225
2226 static int __init init_elf_binfmt(void)
2227 {
2228         register_binfmt(&elf_format);
2229         return 0;
2230 }
2231
2232 static void __exit exit_elf_binfmt(void)
2233 {
2234         /* Remove the COFF and ELF loaders. */
2235         unregister_binfmt(&elf_format);
2236 }
2237
2238 core_initcall(init_elf_binfmt);
2239 module_exit(exit_elf_binfmt);
2240 MODULE_LICENSE("GPL");