]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/binfmt_elf.c
Add linux-next specific files for 20130731
[karo-tx-linux.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 /*
144  * Use get_random_int() to implement AT_RANDOM while avoiding depletion
145  * of the entropy pool.
146  */
147 static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
148 {
149         unsigned char *p = buf;
150
151         while (nbytes) {
152                 unsigned int random_variable;
153                 size_t chunk = min(nbytes, sizeof(random_variable));
154
155                 random_variable = get_random_int();
156                 memcpy(p, &random_variable, chunk);
157                 p += chunk;
158                 nbytes -= chunk;
159         }
160 }
161
162 static int
163 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
164                 unsigned long load_addr, unsigned long interp_load_addr)
165 {
166         unsigned long p = bprm->p;
167         int argc = bprm->argc;
168         int envc = bprm->envc;
169         elf_addr_t __user *argv;
170         elf_addr_t __user *envp;
171         elf_addr_t __user *sp;
172         elf_addr_t __user *u_platform;
173         elf_addr_t __user *u_base_platform;
174         elf_addr_t __user *u_rand_bytes;
175         const char *k_platform = ELF_PLATFORM;
176         const char *k_base_platform = ELF_BASE_PLATFORM;
177         unsigned char k_rand_bytes[16];
178         int items;
179         elf_addr_t *elf_info;
180         int ei_index = 0;
181         const struct cred *cred = current_cred();
182         struct vm_area_struct *vma;
183
184         /*
185          * In some cases (e.g. Hyper-Threading), we want to avoid L1
186          * evictions by the processes running on the same package. One
187          * thing we can do is to shuffle the initial stack for them.
188          */
189
190         p = arch_align_stack(p);
191
192         /*
193          * If this architecture has a platform capability string, copy it
194          * to userspace.  In some cases (Sparc), this info is impossible
195          * for userspace to get any other way, in others (i386) it is
196          * merely difficult.
197          */
198         u_platform = NULL;
199         if (k_platform) {
200                 size_t len = strlen(k_platform) + 1;
201
202                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
203                 if (__copy_to_user(u_platform, k_platform, len))
204                         return -EFAULT;
205         }
206
207         /*
208          * If this architecture has a "base" platform capability
209          * string, copy it to userspace.
210          */
211         u_base_platform = NULL;
212         if (k_base_platform) {
213                 size_t len = strlen(k_base_platform) + 1;
214
215                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
216                 if (__copy_to_user(u_base_platform, k_base_platform, len))
217                         return -EFAULT;
218         }
219
220         /*
221          * Generate 16 random bytes for userspace PRNG seeding.
222          */
223         get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
224         u_rand_bytes = (elf_addr_t __user *)
225                        STACK_ALLOC(p, sizeof(k_rand_bytes));
226         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
227                 return -EFAULT;
228
229         /* Create the ELF interpreter info */
230         elf_info = (elf_addr_t *)current->mm->saved_auxv;
231         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
232 #define NEW_AUX_ENT(id, val) \
233         do { \
234                 elf_info[ei_index++] = id; \
235                 elf_info[ei_index++] = val; \
236         } while (0)
237
238 #ifdef ARCH_DLINFO
239         /* 
240          * ARCH_DLINFO must come first so PPC can do its special alignment of
241          * AUXV.
242          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
243          * ARCH_DLINFO changes
244          */
245         ARCH_DLINFO;
246 #endif
247         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
248         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
249         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
250         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
251         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
252         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
253         NEW_AUX_ENT(AT_BASE, interp_load_addr);
254         NEW_AUX_ENT(AT_FLAGS, 0);
255         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
256         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
257         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
258         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
259         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
260         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
261         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
262 #ifdef ELF_HWCAP2
263         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
264 #endif
265         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
266         if (k_platform) {
267                 NEW_AUX_ENT(AT_PLATFORM,
268                             (elf_addr_t)(unsigned long)u_platform);
269         }
270         if (k_base_platform) {
271                 NEW_AUX_ENT(AT_BASE_PLATFORM,
272                             (elf_addr_t)(unsigned long)u_base_platform);
273         }
274         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
275                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
276         }
277 #undef NEW_AUX_ENT
278         /* AT_NULL is zero; clear the rest too */
279         memset(&elf_info[ei_index], 0,
280                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
281
282         /* And advance past the AT_NULL entry.  */
283         ei_index += 2;
284
285         sp = STACK_ADD(p, ei_index);
286
287         items = (argc + 1) + (envc + 1) + 1;
288         bprm->p = STACK_ROUND(sp, items);
289
290         /* Point sp at the lowest address on the stack */
291 #ifdef CONFIG_STACK_GROWSUP
292         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
293         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
294 #else
295         sp = (elf_addr_t __user *)bprm->p;
296 #endif
297
298
299         /*
300          * Grow the stack manually; some architectures have a limit on how
301          * far ahead a user-space access may be in order to grow the stack.
302          */
303         vma = find_extend_vma(current->mm, bprm->p);
304         if (!vma)
305                 return -EFAULT;
306
307         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
308         if (__put_user(argc, sp++))
309                 return -EFAULT;
310         argv = sp;
311         envp = argv + argc + 1;
312
313         /* Populate argv and envp */
314         p = current->mm->arg_end = current->mm->arg_start;
315         while (argc-- > 0) {
316                 size_t len;
317                 if (__put_user((elf_addr_t)p, argv++))
318                         return -EFAULT;
319                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320                 if (!len || len > MAX_ARG_STRLEN)
321                         return -EINVAL;
322                 p += len;
323         }
324         if (__put_user(0, argv))
325                 return -EFAULT;
326         current->mm->arg_end = current->mm->env_start = p;
327         while (envc-- > 0) {
328                 size_t len;
329                 if (__put_user((elf_addr_t)p, envp++))
330                         return -EFAULT;
331                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
332                 if (!len || len > MAX_ARG_STRLEN)
333                         return -EINVAL;
334                 p += len;
335         }
336         if (__put_user(0, envp))
337                 return -EFAULT;
338         current->mm->env_end = p;
339
340         /* Put the elf_info on the stack in the right place.  */
341         sp = (elf_addr_t __user *)envp + 1;
342         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
343                 return -EFAULT;
344         return 0;
345 }
346
347 #ifndef elf_map
348
349 static unsigned long elf_map(struct file *filep, unsigned long addr,
350                 struct elf_phdr *eppnt, int prot, int type,
351                 unsigned long total_size)
352 {
353         unsigned long map_addr;
354         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
355         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
356         addr = ELF_PAGESTART(addr);
357         size = ELF_PAGEALIGN(size);
358
359         /* mmap() will return -EINVAL if given a zero size, but a
360          * segment with zero filesize is perfectly valid */
361         if (!size)
362                 return addr;
363
364         /*
365         * total_size is the size of the ELF (interpreter) image.
366         * The _first_ mmap needs to know the full size, otherwise
367         * randomization might put this image into an overlapping
368         * position with the ELF binary image. (since size < total_size)
369         * So we first map the 'big' image - and unmap the remainder at
370         * the end. (which unmap is needed for ELF images with holes.)
371         */
372         if (total_size) {
373                 total_size = ELF_PAGEALIGN(total_size);
374                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
375                 if (!BAD_ADDR(map_addr))
376                         vm_munmap(map_addr+size, total_size-size);
377         } else
378                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
379
380         return(map_addr);
381 }
382
383 #endif /* !elf_map */
384
385 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
386 {
387         int i, first_idx = -1, last_idx = -1;
388
389         for (i = 0; i < nr; i++) {
390                 if (cmds[i].p_type == PT_LOAD) {
391                         last_idx = i;
392                         if (first_idx == -1)
393                                 first_idx = i;
394                 }
395         }
396         if (first_idx == -1)
397                 return 0;
398
399         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
400                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
401 }
402
403
404 /* This is much more generalized than the library routine read function,
405    so we keep this separate.  Technically the library read function
406    is only provided so that we can read a.out libraries that have
407    an ELF header */
408
409 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
410                 struct file *interpreter, unsigned long *interp_map_addr,
411                 unsigned long no_base)
412 {
413         struct elf_phdr *elf_phdata;
414         struct elf_phdr *eppnt;
415         unsigned long load_addr = 0;
416         int load_addr_set = 0;
417         unsigned long last_bss = 0, elf_bss = 0;
418         unsigned long error = ~0UL;
419         unsigned long total_size;
420         int retval, i, size;
421
422         /* First of all, some simple consistency checks */
423         if (interp_elf_ex->e_type != ET_EXEC &&
424             interp_elf_ex->e_type != ET_DYN)
425                 goto out;
426         if (!elf_check_arch(interp_elf_ex))
427                 goto out;
428         if (!interpreter->f_op || !interpreter->f_op->mmap)
429                 goto out;
430
431         /*
432          * If the size of this structure has changed, then punt, since
433          * we will be doing the wrong thing.
434          */
435         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
436                 goto out;
437         if (interp_elf_ex->e_phnum < 1 ||
438                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
439                 goto out;
440
441         /* Now read in all of the header information */
442         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
443         if (size > ELF_MIN_ALIGN)
444                 goto out;
445         elf_phdata = kmalloc(size, GFP_KERNEL);
446         if (!elf_phdata)
447                 goto out;
448
449         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
450                              (char *)elf_phdata, size);
451         error = -EIO;
452         if (retval != size) {
453                 if (retval < 0)
454                         error = retval; 
455                 goto out_close;
456         }
457
458         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
459         if (!total_size) {
460                 error = -EINVAL;
461                 goto out_close;
462         }
463
464         eppnt = elf_phdata;
465         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
466                 if (eppnt->p_type == PT_LOAD) {
467                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
468                         int elf_prot = 0;
469                         unsigned long vaddr = 0;
470                         unsigned long k, map_addr;
471
472                         if (eppnt->p_flags & PF_R)
473                                 elf_prot = PROT_READ;
474                         if (eppnt->p_flags & PF_W)
475                                 elf_prot |= PROT_WRITE;
476                         if (eppnt->p_flags & PF_X)
477                                 elf_prot |= PROT_EXEC;
478                         vaddr = eppnt->p_vaddr;
479                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
480                                 elf_type |= MAP_FIXED;
481                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
482                                 load_addr = -vaddr;
483
484                         map_addr = elf_map(interpreter, load_addr + vaddr,
485                                         eppnt, elf_prot, elf_type, total_size);
486                         total_size = 0;
487                         if (!*interp_map_addr)
488                                 *interp_map_addr = map_addr;
489                         error = map_addr;
490                         if (BAD_ADDR(map_addr))
491                                 goto out_close;
492
493                         if (!load_addr_set &&
494                             interp_elf_ex->e_type == ET_DYN) {
495                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
496                                 load_addr_set = 1;
497                         }
498
499                         /*
500                          * Check to see if the section's size will overflow the
501                          * allowed task size. Note that p_filesz must always be
502                          * <= p_memsize so it's only necessary to check p_memsz.
503                          */
504                         k = load_addr + eppnt->p_vaddr;
505                         if (BAD_ADDR(k) ||
506                             eppnt->p_filesz > eppnt->p_memsz ||
507                             eppnt->p_memsz > TASK_SIZE ||
508                             TASK_SIZE - eppnt->p_memsz < k) {
509                                 error = -ENOMEM;
510                                 goto out_close;
511                         }
512
513                         /*
514                          * Find the end of the file mapping for this phdr, and
515                          * keep track of the largest address we see for this.
516                          */
517                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
518                         if (k > elf_bss)
519                                 elf_bss = k;
520
521                         /*
522                          * Do the same thing for the memory mapping - between
523                          * elf_bss and last_bss is the bss section.
524                          */
525                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
526                         if (k > last_bss)
527                                 last_bss = k;
528                 }
529         }
530
531         if (last_bss > elf_bss) {
532                 /*
533                  * Now fill out the bss section.  First pad the last page up
534                  * to the page boundary, and then perform a mmap to make sure
535                  * that there are zero-mapped pages up to and including the
536                  * last bss page.
537                  */
538                 if (padzero(elf_bss)) {
539                         error = -EFAULT;
540                         goto out_close;
541                 }
542
543                 /* What we have mapped so far */
544                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
545
546                 /* Map the last of the bss segment */
547                 error = vm_brk(elf_bss, last_bss - elf_bss);
548                 if (BAD_ADDR(error))
549                         goto out_close;
550         }
551
552         error = load_addr;
553
554 out_close:
555         kfree(elf_phdata);
556 out:
557         return error;
558 }
559
560 /*
561  * These are the functions used to load ELF style executables and shared
562  * libraries.  There is no binary dependent code anywhere else.
563  */
564
565 #define INTERPRETER_NONE 0
566 #define INTERPRETER_ELF 2
567
568 #ifndef STACK_RND_MASK
569 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
570 #endif
571
572 static unsigned long randomize_stack_top(unsigned long stack_top)
573 {
574         unsigned int random_variable = 0;
575
576         if ((current->flags & PF_RANDOMIZE) &&
577                 !(current->personality & ADDR_NO_RANDOMIZE)) {
578                 random_variable = get_random_int() & STACK_RND_MASK;
579                 random_variable <<= PAGE_SHIFT;
580         }
581 #ifdef CONFIG_STACK_GROWSUP
582         return PAGE_ALIGN(stack_top) + random_variable;
583 #else
584         return PAGE_ALIGN(stack_top) - random_variable;
585 #endif
586 }
587
588 static int load_elf_binary(struct linux_binprm *bprm)
589 {
590         struct file *interpreter = NULL; /* to shut gcc up */
591         unsigned long load_addr = 0, load_bias = 0;
592         int load_addr_set = 0;
593         char * elf_interpreter = NULL;
594         unsigned long error;
595         struct elf_phdr *elf_ppnt, *elf_phdata;
596         unsigned long elf_bss, elf_brk;
597         int retval, i;
598         unsigned int size;
599         unsigned long elf_entry;
600         unsigned long interp_load_addr = 0;
601         unsigned long start_code, end_code, start_data, end_data;
602         unsigned long reloc_func_desc __maybe_unused = 0;
603         int executable_stack = EXSTACK_DEFAULT;
604         unsigned long def_flags = 0;
605         struct pt_regs *regs = current_pt_regs();
606         struct {
607                 struct elfhdr elf_ex;
608                 struct elfhdr interp_elf_ex;
609         } *loc;
610
611         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
612         if (!loc) {
613                 retval = -ENOMEM;
614                 goto out_ret;
615         }
616         
617         /* Get the exec-header */
618         loc->elf_ex = *((struct elfhdr *)bprm->buf);
619
620         retval = -ENOEXEC;
621         /* First of all, some simple consistency checks */
622         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
623                 goto out;
624
625         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
626                 goto out;
627         if (!elf_check_arch(&loc->elf_ex))
628                 goto out;
629         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
630                 goto out;
631
632         /* Now read in all of the header information */
633         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
634                 goto out;
635         if (loc->elf_ex.e_phnum < 1 ||
636                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
637                 goto out;
638         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
639         retval = -ENOMEM;
640         elf_phdata = kmalloc(size, GFP_KERNEL);
641         if (!elf_phdata)
642                 goto out;
643
644         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
645                              (char *)elf_phdata, size);
646         if (retval != size) {
647                 if (retval >= 0)
648                         retval = -EIO;
649                 goto out_free_ph;
650         }
651
652         elf_ppnt = elf_phdata;
653         elf_bss = 0;
654         elf_brk = 0;
655
656         start_code = ~0UL;
657         end_code = 0;
658         start_data = 0;
659         end_data = 0;
660
661         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
662                 if (elf_ppnt->p_type == PT_INTERP) {
663                         /* This is the program interpreter used for
664                          * shared libraries - for now assume that this
665                          * is an a.out format binary
666                          */
667                         retval = -ENOEXEC;
668                         if (elf_ppnt->p_filesz > PATH_MAX || 
669                             elf_ppnt->p_filesz < 2)
670                                 goto out_free_ph;
671
672                         retval = -ENOMEM;
673                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
674                                                   GFP_KERNEL);
675                         if (!elf_interpreter)
676                                 goto out_free_ph;
677
678                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
679                                              elf_interpreter,
680                                              elf_ppnt->p_filesz);
681                         if (retval != elf_ppnt->p_filesz) {
682                                 if (retval >= 0)
683                                         retval = -EIO;
684                                 goto out_free_interp;
685                         }
686                         /* make sure path is NULL terminated */
687                         retval = -ENOEXEC;
688                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
689                                 goto out_free_interp;
690
691                         interpreter = open_exec(elf_interpreter);
692                         retval = PTR_ERR(interpreter);
693                         if (IS_ERR(interpreter))
694                                 goto out_free_interp;
695
696                         /*
697                          * If the binary is not readable then enforce
698                          * mm->dumpable = 0 regardless of the interpreter's
699                          * permissions.
700                          */
701                         would_dump(bprm, interpreter);
702
703                         retval = kernel_read(interpreter, 0, bprm->buf,
704                                              BINPRM_BUF_SIZE);
705                         if (retval != BINPRM_BUF_SIZE) {
706                                 if (retval >= 0)
707                                         retval = -EIO;
708                                 goto out_free_dentry;
709                         }
710
711                         /* Get the exec headers */
712                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
713                         break;
714                 }
715                 elf_ppnt++;
716         }
717
718         elf_ppnt = elf_phdata;
719         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
720                 if (elf_ppnt->p_type == PT_GNU_STACK) {
721                         if (elf_ppnt->p_flags & PF_X)
722                                 executable_stack = EXSTACK_ENABLE_X;
723                         else
724                                 executable_stack = EXSTACK_DISABLE_X;
725                         break;
726                 }
727
728         /* Some simple consistency checks for the interpreter */
729         if (elf_interpreter) {
730                 retval = -ELIBBAD;
731                 /* Not an ELF interpreter */
732                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
733                         goto out_free_dentry;
734                 /* Verify the interpreter has a valid arch */
735                 if (!elf_check_arch(&loc->interp_elf_ex))
736                         goto out_free_dentry;
737         }
738
739         /* Flush all traces of the currently running executable */
740         retval = flush_old_exec(bprm);
741         if (retval)
742                 goto out_free_dentry;
743
744         /* OK, This is the point of no return */
745         current->mm->def_flags = def_flags;
746
747         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
748            may depend on the personality.  */
749         SET_PERSONALITY(loc->elf_ex);
750         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
751                 current->personality |= READ_IMPLIES_EXEC;
752
753         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754                 current->flags |= PF_RANDOMIZE;
755
756         setup_new_exec(bprm);
757
758         /* Do this so that we can load the interpreter, if need be.  We will
759            change some of these later */
760         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
761                                  executable_stack);
762         if (retval < 0) {
763                 send_sig(SIGKILL, current, 0);
764                 goto out_free_dentry;
765         }
766         
767         current->mm->start_stack = bprm->p;
768
769         /* Now we do a little grungy work by mmapping the ELF image into
770            the correct location in memory. */
771         for(i = 0, elf_ppnt = elf_phdata;
772             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
773                 int elf_prot = 0, elf_flags;
774                 unsigned long k, vaddr;
775
776                 if (elf_ppnt->p_type != PT_LOAD)
777                         continue;
778
779                 if (unlikely (elf_brk > elf_bss)) {
780                         unsigned long nbyte;
781                     
782                         /* There was a PT_LOAD segment with p_memsz > p_filesz
783                            before this one. Map anonymous pages, if needed,
784                            and clear the area.  */
785                         retval = set_brk(elf_bss + load_bias,
786                                          elf_brk + load_bias);
787                         if (retval) {
788                                 send_sig(SIGKILL, current, 0);
789                                 goto out_free_dentry;
790                         }
791                         nbyte = ELF_PAGEOFFSET(elf_bss);
792                         if (nbyte) {
793                                 nbyte = ELF_MIN_ALIGN - nbyte;
794                                 if (nbyte > elf_brk - elf_bss)
795                                         nbyte = elf_brk - elf_bss;
796                                 if (clear_user((void __user *)elf_bss +
797                                                         load_bias, nbyte)) {
798                                         /*
799                                          * This bss-zeroing can fail if the ELF
800                                          * file specifies odd protections. So
801                                          * we don't check the return value
802                                          */
803                                 }
804                         }
805                 }
806
807                 if (elf_ppnt->p_flags & PF_R)
808                         elf_prot |= PROT_READ;
809                 if (elf_ppnt->p_flags & PF_W)
810                         elf_prot |= PROT_WRITE;
811                 if (elf_ppnt->p_flags & PF_X)
812                         elf_prot |= PROT_EXEC;
813
814                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
815
816                 vaddr = elf_ppnt->p_vaddr;
817                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
818                         elf_flags |= MAP_FIXED;
819                 } else if (loc->elf_ex.e_type == ET_DYN) {
820                         /* Try and get dynamic programs out of the way of the
821                          * default mmap base, as well as whatever program they
822                          * might try to exec.  This is because the brk will
823                          * follow the loader, and is not movable.  */
824 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
825                         /* Memory randomization might have been switched off
826                          * in runtime via sysctl or explicit setting of
827                          * personality flags.
828                          * If that is the case, retain the original non-zero
829                          * load_bias value in order to establish proper
830                          * non-randomized mappings.
831                          */
832                         if (current->flags & PF_RANDOMIZE)
833                                 load_bias = 0;
834                         else
835                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
836 #else
837                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
838 #endif
839                 }
840
841                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
842                                 elf_prot, elf_flags, 0);
843                 if (BAD_ADDR(error)) {
844                         send_sig(SIGKILL, current, 0);
845                         retval = IS_ERR((void *)error) ?
846                                 PTR_ERR((void*)error) : -EINVAL;
847                         goto out_free_dentry;
848                 }
849
850                 if (!load_addr_set) {
851                         load_addr_set = 1;
852                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
853                         if (loc->elf_ex.e_type == ET_DYN) {
854                                 load_bias += error -
855                                              ELF_PAGESTART(load_bias + vaddr);
856                                 load_addr += load_bias;
857                                 reloc_func_desc = load_bias;
858                         }
859                 }
860                 k = elf_ppnt->p_vaddr;
861                 if (k < start_code)
862                         start_code = k;
863                 if (start_data < k)
864                         start_data = k;
865
866                 /*
867                  * Check to see if the section's size will overflow the
868                  * allowed task size. Note that p_filesz must always be
869                  * <= p_memsz so it is only necessary to check p_memsz.
870                  */
871                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
872                     elf_ppnt->p_memsz > TASK_SIZE ||
873                     TASK_SIZE - elf_ppnt->p_memsz < k) {
874                         /* set_brk can never work. Avoid overflows. */
875                         send_sig(SIGKILL, current, 0);
876                         retval = -EINVAL;
877                         goto out_free_dentry;
878                 }
879
880                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
881
882                 if (k > elf_bss)
883                         elf_bss = k;
884                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
885                         end_code = k;
886                 if (end_data < k)
887                         end_data = k;
888                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
889                 if (k > elf_brk)
890                         elf_brk = k;
891         }
892
893         loc->elf_ex.e_entry += load_bias;
894         elf_bss += load_bias;
895         elf_brk += load_bias;
896         start_code += load_bias;
897         end_code += load_bias;
898         start_data += load_bias;
899         end_data += load_bias;
900
901         /* Calling set_brk effectively mmaps the pages that we need
902          * for the bss and break sections.  We must do this before
903          * mapping in the interpreter, to make sure it doesn't wind
904          * up getting placed where the bss needs to go.
905          */
906         retval = set_brk(elf_bss, elf_brk);
907         if (retval) {
908                 send_sig(SIGKILL, current, 0);
909                 goto out_free_dentry;
910         }
911         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
912                 send_sig(SIGSEGV, current, 0);
913                 retval = -EFAULT; /* Nobody gets to see this, but.. */
914                 goto out_free_dentry;
915         }
916
917         if (elf_interpreter) {
918                 unsigned long interp_map_addr = 0;
919
920                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
921                                             interpreter,
922                                             &interp_map_addr,
923                                             load_bias);
924                 if (!IS_ERR((void *)elf_entry)) {
925                         /*
926                          * load_elf_interp() returns relocation
927                          * adjustment
928                          */
929                         interp_load_addr = elf_entry;
930                         elf_entry += loc->interp_elf_ex.e_entry;
931                 }
932                 if (BAD_ADDR(elf_entry)) {
933                         force_sig(SIGSEGV, current);
934                         retval = IS_ERR((void *)elf_entry) ?
935                                         (int)elf_entry : -EINVAL;
936                         goto out_free_dentry;
937                 }
938                 reloc_func_desc = interp_load_addr;
939
940                 allow_write_access(interpreter);
941                 fput(interpreter);
942                 kfree(elf_interpreter);
943         } else {
944                 elf_entry = loc->elf_ex.e_entry;
945                 if (BAD_ADDR(elf_entry)) {
946                         force_sig(SIGSEGV, current);
947                         retval = -EINVAL;
948                         goto out_free_dentry;
949                 }
950         }
951
952         kfree(elf_phdata);
953
954         set_binfmt(&elf_format);
955
956 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
957         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
958         if (retval < 0) {
959                 send_sig(SIGKILL, current, 0);
960                 goto out;
961         }
962 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
963
964         install_exec_creds(bprm);
965         retval = create_elf_tables(bprm, &loc->elf_ex,
966                           load_addr, interp_load_addr);
967         if (retval < 0) {
968                 send_sig(SIGKILL, current, 0);
969                 goto out;
970         }
971         /* N.B. passed_fileno might not be initialized? */
972         current->mm->end_code = end_code;
973         current->mm->start_code = start_code;
974         current->mm->start_data = start_data;
975         current->mm->end_data = end_data;
976         current->mm->start_stack = bprm->p;
977
978 #ifdef arch_randomize_brk
979         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
980                 current->mm->brk = current->mm->start_brk =
981                         arch_randomize_brk(current->mm);
982 #ifdef CONFIG_COMPAT_BRK
983                 current->brk_randomized = 1;
984 #endif
985         }
986 #endif
987
988         if (current->personality & MMAP_PAGE_ZERO) {
989                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
990                    and some applications "depend" upon this behavior.
991                    Since we do not have the power to recompile these, we
992                    emulate the SVr4 behavior. Sigh. */
993                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
994                                 MAP_FIXED | MAP_PRIVATE, 0);
995         }
996
997 #ifdef ELF_PLAT_INIT
998         /*
999          * The ABI may specify that certain registers be set up in special
1000          * ways (on i386 %edx is the address of a DT_FINI function, for
1001          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1002          * that the e_entry field is the address of the function descriptor
1003          * for the startup routine, rather than the address of the startup
1004          * routine itself.  This macro performs whatever initialization to
1005          * the regs structure is required as well as any relocations to the
1006          * function descriptor entries when executing dynamically links apps.
1007          */
1008         ELF_PLAT_INIT(regs, reloc_func_desc);
1009 #endif
1010
1011         start_thread(regs, elf_entry, bprm->p);
1012         retval = 0;
1013 out:
1014         kfree(loc);
1015 out_ret:
1016         return retval;
1017
1018         /* error cleanup */
1019 out_free_dentry:
1020         allow_write_access(interpreter);
1021         if (interpreter)
1022                 fput(interpreter);
1023 out_free_interp:
1024         kfree(elf_interpreter);
1025 out_free_ph:
1026         kfree(elf_phdata);
1027         goto out;
1028 }
1029
1030 /* This is really simpleminded and specialized - we are loading an
1031    a.out library that is given an ELF header. */
1032 static int load_elf_library(struct file *file)
1033 {
1034         struct elf_phdr *elf_phdata;
1035         struct elf_phdr *eppnt;
1036         unsigned long elf_bss, bss, len;
1037         int retval, error, i, j;
1038         struct elfhdr elf_ex;
1039
1040         error = -ENOEXEC;
1041         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1042         if (retval != sizeof(elf_ex))
1043                 goto out;
1044
1045         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1046                 goto out;
1047
1048         /* First of all, some simple consistency checks */
1049         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1050             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1051                 goto out;
1052
1053         /* Now read in all of the header information */
1054
1055         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1056         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1057
1058         error = -ENOMEM;
1059         elf_phdata = kmalloc(j, GFP_KERNEL);
1060         if (!elf_phdata)
1061                 goto out;
1062
1063         eppnt = elf_phdata;
1064         error = -ENOEXEC;
1065         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1066         if (retval != j)
1067                 goto out_free_ph;
1068
1069         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1070                 if ((eppnt + i)->p_type == PT_LOAD)
1071                         j++;
1072         if (j != 1)
1073                 goto out_free_ph;
1074
1075         while (eppnt->p_type != PT_LOAD)
1076                 eppnt++;
1077
1078         /* Now use mmap to map the library into memory. */
1079         error = vm_mmap(file,
1080                         ELF_PAGESTART(eppnt->p_vaddr),
1081                         (eppnt->p_filesz +
1082                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1083                         PROT_READ | PROT_WRITE | PROT_EXEC,
1084                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1085                         (eppnt->p_offset -
1086                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1087         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1088                 goto out_free_ph;
1089
1090         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1091         if (padzero(elf_bss)) {
1092                 error = -EFAULT;
1093                 goto out_free_ph;
1094         }
1095
1096         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1097                             ELF_MIN_ALIGN - 1);
1098         bss = eppnt->p_memsz + eppnt->p_vaddr;
1099         if (bss > len)
1100                 vm_brk(len, bss - len);
1101         error = 0;
1102
1103 out_free_ph:
1104         kfree(elf_phdata);
1105 out:
1106         return error;
1107 }
1108
1109 #ifdef CONFIG_ELF_CORE
1110 /*
1111  * ELF core dumper
1112  *
1113  * Modelled on fs/exec.c:aout_core_dump()
1114  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1115  */
1116
1117 /*
1118  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1119  * that are useful for post-mortem analysis are included in every core dump.
1120  * In that way we ensure that the core dump is fully interpretable later
1121  * without matching up the same kernel and hardware config to see what PC values
1122  * meant. These special mappings include - vDSO, vsyscall, and other
1123  * architecture specific mappings
1124  */
1125 static bool always_dump_vma(struct vm_area_struct *vma)
1126 {
1127         /* Any vsyscall mappings? */
1128         if (vma == get_gate_vma(vma->vm_mm))
1129                 return true;
1130         /*
1131          * arch_vma_name() returns non-NULL for special architecture mappings,
1132          * such as vDSO sections.
1133          */
1134         if (arch_vma_name(vma))
1135                 return true;
1136
1137         return false;
1138 }
1139
1140 /*
1141  * Decide what to dump of a segment, part, all or none.
1142  */
1143 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1144                                    unsigned long mm_flags)
1145 {
1146 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1147
1148         /* always dump the vdso and vsyscall sections */
1149         if (always_dump_vma(vma))
1150                 goto whole;
1151
1152         if (vma->vm_flags & VM_DONTDUMP)
1153                 return 0;
1154
1155         /* Hugetlb memory check */
1156         if (vma->vm_flags & VM_HUGETLB) {
1157                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1158                         goto whole;
1159                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1160                         goto whole;
1161                 return 0;
1162         }
1163
1164         /* Do not dump I/O mapped devices or special mappings */
1165         if (vma->vm_flags & VM_IO)
1166                 return 0;
1167
1168         /* By default, dump shared memory if mapped from an anonymous file. */
1169         if (vma->vm_flags & VM_SHARED) {
1170                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1171                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1172                         goto whole;
1173                 return 0;
1174         }
1175
1176         /* Dump segments that have been written to.  */
1177         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1178                 goto whole;
1179         if (vma->vm_file == NULL)
1180                 return 0;
1181
1182         if (FILTER(MAPPED_PRIVATE))
1183                 goto whole;
1184
1185         /*
1186          * If this looks like the beginning of a DSO or executable mapping,
1187          * check for an ELF header.  If we find one, dump the first page to
1188          * aid in determining what was mapped here.
1189          */
1190         if (FILTER(ELF_HEADERS) &&
1191             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1192                 u32 __user *header = (u32 __user *) vma->vm_start;
1193                 u32 word;
1194                 mm_segment_t fs = get_fs();
1195                 /*
1196                  * Doing it this way gets the constant folded by GCC.
1197                  */
1198                 union {
1199                         u32 cmp;
1200                         char elfmag[SELFMAG];
1201                 } magic;
1202                 BUILD_BUG_ON(SELFMAG != sizeof word);
1203                 magic.elfmag[EI_MAG0] = ELFMAG0;
1204                 magic.elfmag[EI_MAG1] = ELFMAG1;
1205                 magic.elfmag[EI_MAG2] = ELFMAG2;
1206                 magic.elfmag[EI_MAG3] = ELFMAG3;
1207                 /*
1208                  * Switch to the user "segment" for get_user(),
1209                  * then put back what elf_core_dump() had in place.
1210                  */
1211                 set_fs(USER_DS);
1212                 if (unlikely(get_user(word, header)))
1213                         word = 0;
1214                 set_fs(fs);
1215                 if (word == magic.cmp)
1216                         return PAGE_SIZE;
1217         }
1218
1219 #undef  FILTER
1220
1221         return 0;
1222
1223 whole:
1224         return vma->vm_end - vma->vm_start;
1225 }
1226
1227 /* An ELF note in memory */
1228 struct memelfnote
1229 {
1230         const char *name;
1231         int type;
1232         unsigned int datasz;
1233         void *data;
1234 };
1235
1236 static int notesize(struct memelfnote *en)
1237 {
1238         int sz;
1239
1240         sz = sizeof(struct elf_note);
1241         sz += roundup(strlen(en->name) + 1, 4);
1242         sz += roundup(en->datasz, 4);
1243
1244         return sz;
1245 }
1246
1247 static int alignfile(struct coredump_params *cprm)
1248 {
1249         static const char buf[4] = { 0, };
1250         return dump_emit(cprm, buf, roundup(cprm->written, 4) - cprm->written);
1251 }
1252
1253 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1254 {
1255         struct elf_note en;
1256         en.n_namesz = strlen(men->name) + 1;
1257         en.n_descsz = men->datasz;
1258         en.n_type = men->type;
1259
1260         if (!dump_emit(cprm, &en, sizeof(en)))
1261                 return 0;
1262         if (!dump_emit(cprm, men->name, en.n_namesz))
1263                 return 0;
1264         if (!alignfile(cprm))
1265                 return 0;
1266         if (!dump_emit(cprm, men->data, men->datasz))
1267                 return 0;
1268         if (!alignfile(cprm))
1269                 return 0;
1270
1271         return 1;
1272 }
1273
1274 static void fill_elf_header(struct elfhdr *elf, int segs,
1275                             u16 machine, u32 flags)
1276 {
1277         memset(elf, 0, sizeof(*elf));
1278
1279         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1280         elf->e_ident[EI_CLASS] = ELF_CLASS;
1281         elf->e_ident[EI_DATA] = ELF_DATA;
1282         elf->e_ident[EI_VERSION] = EV_CURRENT;
1283         elf->e_ident[EI_OSABI] = ELF_OSABI;
1284
1285         elf->e_type = ET_CORE;
1286         elf->e_machine = machine;
1287         elf->e_version = EV_CURRENT;
1288         elf->e_phoff = sizeof(struct elfhdr);
1289         elf->e_flags = flags;
1290         elf->e_ehsize = sizeof(struct elfhdr);
1291         elf->e_phentsize = sizeof(struct elf_phdr);
1292         elf->e_phnum = segs;
1293
1294         return;
1295 }
1296
1297 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1298 {
1299         phdr->p_type = PT_NOTE;
1300         phdr->p_offset = offset;
1301         phdr->p_vaddr = 0;
1302         phdr->p_paddr = 0;
1303         phdr->p_filesz = sz;
1304         phdr->p_memsz = 0;
1305         phdr->p_flags = 0;
1306         phdr->p_align = 0;
1307         return;
1308 }
1309
1310 static void fill_note(struct memelfnote *note, const char *name, int type, 
1311                 unsigned int sz, void *data)
1312 {
1313         note->name = name;
1314         note->type = type;
1315         note->datasz = sz;
1316         note->data = data;
1317         return;
1318 }
1319
1320 /*
1321  * fill up all the fields in prstatus from the given task struct, except
1322  * registers which need to be filled up separately.
1323  */
1324 static void fill_prstatus(struct elf_prstatus *prstatus,
1325                 struct task_struct *p, long signr)
1326 {
1327         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1328         prstatus->pr_sigpend = p->pending.signal.sig[0];
1329         prstatus->pr_sighold = p->blocked.sig[0];
1330         rcu_read_lock();
1331         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1332         rcu_read_unlock();
1333         prstatus->pr_pid = task_pid_vnr(p);
1334         prstatus->pr_pgrp = task_pgrp_vnr(p);
1335         prstatus->pr_sid = task_session_vnr(p);
1336         if (thread_group_leader(p)) {
1337                 struct task_cputime cputime;
1338
1339                 /*
1340                  * This is the record for the group leader.  It shows the
1341                  * group-wide total, not its individual thread total.
1342                  */
1343                 thread_group_cputime(p, &cputime);
1344                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1345                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1346         } else {
1347                 cputime_t utime, stime;
1348
1349                 task_cputime(p, &utime, &stime);
1350                 cputime_to_timeval(utime, &prstatus->pr_utime);
1351                 cputime_to_timeval(stime, &prstatus->pr_stime);
1352         }
1353         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1354         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1355 }
1356
1357 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1358                        struct mm_struct *mm)
1359 {
1360         const struct cred *cred;
1361         unsigned int i, len;
1362         
1363         /* first copy the parameters from user space */
1364         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1365
1366         len = mm->arg_end - mm->arg_start;
1367         if (len >= ELF_PRARGSZ)
1368                 len = ELF_PRARGSZ-1;
1369         if (copy_from_user(&psinfo->pr_psargs,
1370                            (const char __user *)mm->arg_start, len))
1371                 return -EFAULT;
1372         for(i = 0; i < len; i++)
1373                 if (psinfo->pr_psargs[i] == 0)
1374                         psinfo->pr_psargs[i] = ' ';
1375         psinfo->pr_psargs[len] = 0;
1376
1377         rcu_read_lock();
1378         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1379         rcu_read_unlock();
1380         psinfo->pr_pid = task_pid_vnr(p);
1381         psinfo->pr_pgrp = task_pgrp_vnr(p);
1382         psinfo->pr_sid = task_session_vnr(p);
1383
1384         i = p->state ? ffz(~p->state) + 1 : 0;
1385         psinfo->pr_state = i;
1386         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1387         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1388         psinfo->pr_nice = task_nice(p);
1389         psinfo->pr_flag = p->flags;
1390         rcu_read_lock();
1391         cred = __task_cred(p);
1392         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1393         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1394         rcu_read_unlock();
1395         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1396         
1397         return 0;
1398 }
1399
1400 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1401 {
1402         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1403         int i = 0;
1404         do
1405                 i += 2;
1406         while (auxv[i - 2] != AT_NULL);
1407         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1408 }
1409
1410 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1411                 siginfo_t *siginfo)
1412 {
1413         mm_segment_t old_fs = get_fs();
1414         set_fs(KERNEL_DS);
1415         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1416         set_fs(old_fs);
1417         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1418 }
1419
1420 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1421 /*
1422  * Format of NT_FILE note:
1423  *
1424  * long count     -- how many files are mapped
1425  * long page_size -- units for file_ofs
1426  * array of [COUNT] elements of
1427  *   long start
1428  *   long end
1429  *   long file_ofs
1430  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1431  */
1432 static void fill_files_note(struct memelfnote *note)
1433 {
1434         struct vm_area_struct *vma;
1435         unsigned count, size, names_ofs, remaining, n;
1436         user_long_t *data;
1437         user_long_t *start_end_ofs;
1438         char *name_base, *name_curpos;
1439
1440         /* *Estimated* file count and total data size needed */
1441         count = current->mm->map_count;
1442         size = count * 64;
1443
1444         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1445  alloc:
1446         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1447                 goto err;
1448         size = round_up(size, PAGE_SIZE);
1449         data = vmalloc(size);
1450         if (!data)
1451                 goto err;
1452
1453         start_end_ofs = data + 2;
1454         name_base = name_curpos = ((char *)data) + names_ofs;
1455         remaining = size - names_ofs;
1456         count = 0;
1457         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1458                 struct file *file;
1459                 const char *filename;
1460
1461                 file = vma->vm_file;
1462                 if (!file)
1463                         continue;
1464                 filename = d_path(&file->f_path, name_curpos, remaining);
1465                 if (IS_ERR(filename)) {
1466                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1467                                 vfree(data);
1468                                 size = size * 5 / 4;
1469                                 goto alloc;
1470                         }
1471                         continue;
1472                 }
1473
1474                 /* d_path() fills at the end, move name down */
1475                 /* n = strlen(filename) + 1: */
1476                 n = (name_curpos + remaining) - filename;
1477                 remaining = filename - name_curpos;
1478                 memmove(name_curpos, filename, n);
1479                 name_curpos += n;
1480
1481                 *start_end_ofs++ = vma->vm_start;
1482                 *start_end_ofs++ = vma->vm_end;
1483                 *start_end_ofs++ = vma->vm_pgoff;
1484                 count++;
1485         }
1486
1487         /* Now we know exact count of files, can store it */
1488         data[0] = count;
1489         data[1] = PAGE_SIZE;
1490         /*
1491          * Count usually is less than current->mm->map_count,
1492          * we need to move filenames down.
1493          */
1494         n = current->mm->map_count - count;
1495         if (n != 0) {
1496                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1497                 memmove(name_base - shift_bytes, name_base,
1498                         name_curpos - name_base);
1499                 name_curpos -= shift_bytes;
1500         }
1501
1502         size = name_curpos - (char *)data;
1503         fill_note(note, "CORE", NT_FILE, size, data);
1504  err: ;
1505 }
1506
1507 #ifdef CORE_DUMP_USE_REGSET
1508 #include <linux/regset.h>
1509
1510 struct elf_thread_core_info {
1511         struct elf_thread_core_info *next;
1512         struct task_struct *task;
1513         struct elf_prstatus prstatus;
1514         struct memelfnote notes[0];
1515 };
1516
1517 struct elf_note_info {
1518         struct elf_thread_core_info *thread;
1519         struct memelfnote psinfo;
1520         struct memelfnote signote;
1521         struct memelfnote auxv;
1522         struct memelfnote files;
1523         user_siginfo_t csigdata;
1524         size_t size;
1525         int thread_notes;
1526 };
1527
1528 /*
1529  * When a regset has a writeback hook, we call it on each thread before
1530  * dumping user memory.  On register window machines, this makes sure the
1531  * user memory backing the register data is up to date before we read it.
1532  */
1533 static void do_thread_regset_writeback(struct task_struct *task,
1534                                        const struct user_regset *regset)
1535 {
1536         if (regset->writeback)
1537                 regset->writeback(task, regset, 1);
1538 }
1539
1540 #ifndef PR_REG_SIZE
1541 #define PR_REG_SIZE(S) sizeof(S)
1542 #endif
1543
1544 #ifndef PRSTATUS_SIZE
1545 #define PRSTATUS_SIZE(S) sizeof(S)
1546 #endif
1547
1548 #ifndef PR_REG_PTR
1549 #define PR_REG_PTR(S) (&((S)->pr_reg))
1550 #endif
1551
1552 #ifndef SET_PR_FPVALID
1553 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1554 #endif
1555
1556 static int fill_thread_core_info(struct elf_thread_core_info *t,
1557                                  const struct user_regset_view *view,
1558                                  long signr, size_t *total)
1559 {
1560         unsigned int i;
1561
1562         /*
1563          * NT_PRSTATUS is the one special case, because the regset data
1564          * goes into the pr_reg field inside the note contents, rather
1565          * than being the whole note contents.  We fill the reset in here.
1566          * We assume that regset 0 is NT_PRSTATUS.
1567          */
1568         fill_prstatus(&t->prstatus, t->task, signr);
1569         (void) view->regsets[0].get(t->task, &view->regsets[0],
1570                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1571                                     PR_REG_PTR(&t->prstatus), NULL);
1572
1573         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1574                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1575         *total += notesize(&t->notes[0]);
1576
1577         do_thread_regset_writeback(t->task, &view->regsets[0]);
1578
1579         /*
1580          * Each other regset might generate a note too.  For each regset
1581          * that has no core_note_type or is inactive, we leave t->notes[i]
1582          * all zero and we'll know to skip writing it later.
1583          */
1584         for (i = 1; i < view->n; ++i) {
1585                 const struct user_regset *regset = &view->regsets[i];
1586                 do_thread_regset_writeback(t->task, regset);
1587                 if (regset->core_note_type && regset->get &&
1588                     (!regset->active || regset->active(t->task, regset))) {
1589                         int ret;
1590                         size_t size = regset->n * regset->size;
1591                         void *data = kmalloc(size, GFP_KERNEL);
1592                         if (unlikely(!data))
1593                                 return 0;
1594                         ret = regset->get(t->task, regset,
1595                                           0, size, data, NULL);
1596                         if (unlikely(ret))
1597                                 kfree(data);
1598                         else {
1599                                 if (regset->core_note_type != NT_PRFPREG)
1600                                         fill_note(&t->notes[i], "LINUX",
1601                                                   regset->core_note_type,
1602                                                   size, data);
1603                                 else {
1604                                         SET_PR_FPVALID(&t->prstatus, 1);
1605                                         fill_note(&t->notes[i], "CORE",
1606                                                   NT_PRFPREG, size, data);
1607                                 }
1608                                 *total += notesize(&t->notes[i]);
1609                         }
1610                 }
1611         }
1612
1613         return 1;
1614 }
1615
1616 static int fill_note_info(struct elfhdr *elf, int phdrs,
1617                           struct elf_note_info *info,
1618                           siginfo_t *siginfo, struct pt_regs *regs)
1619 {
1620         struct task_struct *dump_task = current;
1621         const struct user_regset_view *view = task_user_regset_view(dump_task);
1622         struct elf_thread_core_info *t;
1623         struct elf_prpsinfo *psinfo;
1624         struct core_thread *ct;
1625         unsigned int i;
1626
1627         info->size = 0;
1628         info->thread = NULL;
1629
1630         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1631         if (psinfo == NULL) {
1632                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1633                 return 0;
1634         }
1635
1636         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1637
1638         /*
1639          * Figure out how many notes we're going to need for each thread.
1640          */
1641         info->thread_notes = 0;
1642         for (i = 0; i < view->n; ++i)
1643                 if (view->regsets[i].core_note_type != 0)
1644                         ++info->thread_notes;
1645
1646         /*
1647          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1648          * since it is our one special case.
1649          */
1650         if (unlikely(info->thread_notes == 0) ||
1651             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1652                 WARN_ON(1);
1653                 return 0;
1654         }
1655
1656         /*
1657          * Initialize the ELF file header.
1658          */
1659         fill_elf_header(elf, phdrs,
1660                         view->e_machine, view->e_flags);
1661
1662         /*
1663          * Allocate a structure for each thread.
1664          */
1665         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1666                 t = kzalloc(offsetof(struct elf_thread_core_info,
1667                                      notes[info->thread_notes]),
1668                             GFP_KERNEL);
1669                 if (unlikely(!t))
1670                         return 0;
1671
1672                 t->task = ct->task;
1673                 if (ct->task == dump_task || !info->thread) {
1674                         t->next = info->thread;
1675                         info->thread = t;
1676                 } else {
1677                         /*
1678                          * Make sure to keep the original task at
1679                          * the head of the list.
1680                          */
1681                         t->next = info->thread->next;
1682                         info->thread->next = t;
1683                 }
1684         }
1685
1686         /*
1687          * Now fill in each thread's information.
1688          */
1689         for (t = info->thread; t != NULL; t = t->next)
1690                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1691                         return 0;
1692
1693         /*
1694          * Fill in the two process-wide notes.
1695          */
1696         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1697         info->size += notesize(&info->psinfo);
1698
1699         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1700         info->size += notesize(&info->signote);
1701
1702         fill_auxv_note(&info->auxv, current->mm);
1703         info->size += notesize(&info->auxv);
1704
1705         fill_files_note(&info->files);
1706         info->size += notesize(&info->files);
1707
1708         return 1;
1709 }
1710
1711 static size_t get_note_info_size(struct elf_note_info *info)
1712 {
1713         return info->size;
1714 }
1715
1716 /*
1717  * Write all the notes for each thread.  When writing the first thread, the
1718  * process-wide notes are interleaved after the first thread-specific note.
1719  */
1720 static int write_note_info(struct elf_note_info *info,
1721                            struct coredump_params *cprm)
1722 {
1723         bool first = 1;
1724         struct elf_thread_core_info *t = info->thread;
1725
1726         do {
1727                 int i;
1728
1729                 if (!writenote(&t->notes[0], cprm))
1730                         return 0;
1731
1732                 if (first && !writenote(&info->psinfo, cprm))
1733                         return 0;
1734                 if (first && !writenote(&info->signote, cprm))
1735                         return 0;
1736                 if (first && !writenote(&info->auxv, cprm))
1737                         return 0;
1738                 if (first && !writenote(&info->files, cprm))
1739                         return 0;
1740
1741                 for (i = 1; i < info->thread_notes; ++i)
1742                         if (t->notes[i].data &&
1743                             !writenote(&t->notes[i], cprm))
1744                                 return 0;
1745
1746                 first = 0;
1747                 t = t->next;
1748         } while (t);
1749
1750         return 1;
1751 }
1752
1753 static void free_note_info(struct elf_note_info *info)
1754 {
1755         struct elf_thread_core_info *threads = info->thread;
1756         while (threads) {
1757                 unsigned int i;
1758                 struct elf_thread_core_info *t = threads;
1759                 threads = t->next;
1760                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1761                 for (i = 1; i < info->thread_notes; ++i)
1762                         kfree(t->notes[i].data);
1763                 kfree(t);
1764         }
1765         kfree(info->psinfo.data);
1766         vfree(info->files.data);
1767 }
1768
1769 #else
1770
1771 /* Here is the structure in which status of each thread is captured. */
1772 struct elf_thread_status
1773 {
1774         struct list_head list;
1775         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1776         elf_fpregset_t fpu;             /* NT_PRFPREG */
1777         struct task_struct *thread;
1778 #ifdef ELF_CORE_COPY_XFPREGS
1779         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1780 #endif
1781         struct memelfnote notes[3];
1782         int num_notes;
1783 };
1784
1785 /*
1786  * In order to add the specific thread information for the elf file format,
1787  * we need to keep a linked list of every threads pr_status and then create
1788  * a single section for them in the final core file.
1789  */
1790 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1791 {
1792         int sz = 0;
1793         struct task_struct *p = t->thread;
1794         t->num_notes = 0;
1795
1796         fill_prstatus(&t->prstatus, p, signr);
1797         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1798         
1799         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1800                   &(t->prstatus));
1801         t->num_notes++;
1802         sz += notesize(&t->notes[0]);
1803
1804         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1805                                                                 &t->fpu))) {
1806                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1807                           &(t->fpu));
1808                 t->num_notes++;
1809                 sz += notesize(&t->notes[1]);
1810         }
1811
1812 #ifdef ELF_CORE_COPY_XFPREGS
1813         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1814                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1815                           sizeof(t->xfpu), &t->xfpu);
1816                 t->num_notes++;
1817                 sz += notesize(&t->notes[2]);
1818         }
1819 #endif  
1820         return sz;
1821 }
1822
1823 struct elf_note_info {
1824         struct memelfnote *notes;
1825         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1826         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1827         struct list_head thread_list;
1828         elf_fpregset_t *fpu;
1829 #ifdef ELF_CORE_COPY_XFPREGS
1830         elf_fpxregset_t *xfpu;
1831 #endif
1832         user_siginfo_t csigdata;
1833         int thread_status_size;
1834         int numnote;
1835 };
1836
1837 static int elf_note_info_init(struct elf_note_info *info)
1838 {
1839         memset(info, 0, sizeof(*info));
1840         INIT_LIST_HEAD(&info->thread_list);
1841
1842         /* Allocate space for ELF notes */
1843         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1844         if (!info->notes)
1845                 return 0;
1846         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1847         if (!info->psinfo)
1848                 return 0;
1849         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1850         if (!info->prstatus)
1851                 return 0;
1852         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1853         if (!info->fpu)
1854                 return 0;
1855 #ifdef ELF_CORE_COPY_XFPREGS
1856         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1857         if (!info->xfpu)
1858                 return 0;
1859 #endif
1860         return 1;
1861 }
1862
1863 static int fill_note_info(struct elfhdr *elf, int phdrs,
1864                           struct elf_note_info *info,
1865                           siginfo_t *siginfo, struct pt_regs *regs)
1866 {
1867         struct list_head *t;
1868
1869         if (!elf_note_info_init(info))
1870                 return 0;
1871
1872         if (siginfo->si_signo) {
1873                 struct core_thread *ct;
1874                 struct elf_thread_status *ets;
1875
1876                 for (ct = current->mm->core_state->dumper.next;
1877                                                 ct; ct = ct->next) {
1878                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1879                         if (!ets)
1880                                 return 0;
1881
1882                         ets->thread = ct->task;
1883                         list_add(&ets->list, &info->thread_list);
1884                 }
1885
1886                 list_for_each(t, &info->thread_list) {
1887                         int sz;
1888
1889                         ets = list_entry(t, struct elf_thread_status, list);
1890                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1891                         info->thread_status_size += sz;
1892                 }
1893         }
1894         /* now collect the dump for the current */
1895         memset(info->prstatus, 0, sizeof(*info->prstatus));
1896         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1897         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1898
1899         /* Set up header */
1900         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1901
1902         /*
1903          * Set up the notes in similar form to SVR4 core dumps made
1904          * with info from their /proc.
1905          */
1906
1907         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1908                   sizeof(*info->prstatus), info->prstatus);
1909         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1910         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1911                   sizeof(*info->psinfo), info->psinfo);
1912
1913         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1914         fill_auxv_note(info->notes + 3, current->mm);
1915         fill_files_note(info->notes + 4);
1916
1917         info->numnote = 5;
1918
1919         /* Try to dump the FPU. */
1920         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1921                                                                info->fpu);
1922         if (info->prstatus->pr_fpvalid)
1923                 fill_note(info->notes + info->numnote++,
1924                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1925 #ifdef ELF_CORE_COPY_XFPREGS
1926         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1927                 fill_note(info->notes + info->numnote++,
1928                           "LINUX", ELF_CORE_XFPREG_TYPE,
1929                           sizeof(*info->xfpu), info->xfpu);
1930 #endif
1931
1932         return 1;
1933 }
1934
1935 static size_t get_note_info_size(struct elf_note_info *info)
1936 {
1937         int sz = 0;
1938         int i;
1939
1940         for (i = 0; i < info->numnote; i++)
1941                 sz += notesize(info->notes + i);
1942
1943         sz += info->thread_status_size;
1944
1945         return sz;
1946 }
1947
1948 static int write_note_info(struct elf_note_info *info,
1949                            struct coredump_params *cprm)
1950 {
1951         int i;
1952         struct list_head *t;
1953
1954         for (i = 0; i < info->numnote; i++)
1955                 if (!writenote(info->notes + i, cprm))
1956                         return 0;
1957
1958         /* write out the thread status notes section */
1959         list_for_each(t, &info->thread_list) {
1960                 struct elf_thread_status *tmp =
1961                                 list_entry(t, struct elf_thread_status, list);
1962
1963                 for (i = 0; i < tmp->num_notes; i++)
1964                         if (!writenote(&tmp->notes[i], cprm))
1965                                 return 0;
1966         }
1967
1968         return 1;
1969 }
1970
1971 static void free_note_info(struct elf_note_info *info)
1972 {
1973         while (!list_empty(&info->thread_list)) {
1974                 struct list_head *tmp = info->thread_list.next;
1975                 list_del(tmp);
1976                 kfree(list_entry(tmp, struct elf_thread_status, list));
1977         }
1978
1979         /* Free data allocated by fill_files_note(): */
1980         vfree(info->notes[4].data);
1981
1982         kfree(info->prstatus);
1983         kfree(info->psinfo);
1984         kfree(info->notes);
1985         kfree(info->fpu);
1986 #ifdef ELF_CORE_COPY_XFPREGS
1987         kfree(info->xfpu);
1988 #endif
1989 }
1990
1991 #endif
1992
1993 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1994                                         struct vm_area_struct *gate_vma)
1995 {
1996         struct vm_area_struct *ret = tsk->mm->mmap;
1997
1998         if (ret)
1999                 return ret;
2000         return gate_vma;
2001 }
2002 /*
2003  * Helper function for iterating across a vma list.  It ensures that the caller
2004  * will visit `gate_vma' prior to terminating the search.
2005  */
2006 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2007                                         struct vm_area_struct *gate_vma)
2008 {
2009         struct vm_area_struct *ret;
2010
2011         ret = this_vma->vm_next;
2012         if (ret)
2013                 return ret;
2014         if (this_vma == gate_vma)
2015                 return NULL;
2016         return gate_vma;
2017 }
2018
2019 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2020                              elf_addr_t e_shoff, int segs)
2021 {
2022         elf->e_shoff = e_shoff;
2023         elf->e_shentsize = sizeof(*shdr4extnum);
2024         elf->e_shnum = 1;
2025         elf->e_shstrndx = SHN_UNDEF;
2026
2027         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2028
2029         shdr4extnum->sh_type = SHT_NULL;
2030         shdr4extnum->sh_size = elf->e_shnum;
2031         shdr4extnum->sh_link = elf->e_shstrndx;
2032         shdr4extnum->sh_info = segs;
2033 }
2034
2035 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2036                                      unsigned long mm_flags)
2037 {
2038         struct vm_area_struct *vma;
2039         size_t size = 0;
2040
2041         for (vma = first_vma(current, gate_vma); vma != NULL;
2042              vma = next_vma(vma, gate_vma))
2043                 size += vma_dump_size(vma, mm_flags);
2044         return size;
2045 }
2046
2047 /*
2048  * Actual dumper
2049  *
2050  * This is a two-pass process; first we find the offsets of the bits,
2051  * and then they are actually written out.  If we run out of core limit
2052  * we just truncate.
2053  */
2054 static int elf_core_dump(struct coredump_params *cprm)
2055 {
2056         int has_dumped = 0;
2057         mm_segment_t fs;
2058         int segs;
2059         struct vm_area_struct *vma, *gate_vma;
2060         struct elfhdr *elf = NULL;
2061         loff_t offset = 0, dataoff;
2062         struct elf_note_info info;
2063         struct elf_phdr *phdr4note = NULL;
2064         struct elf_shdr *shdr4extnum = NULL;
2065         Elf_Half e_phnum;
2066         elf_addr_t e_shoff;
2067
2068         /*
2069          * We no longer stop all VM operations.
2070          * 
2071          * This is because those proceses that could possibly change map_count
2072          * or the mmap / vma pages are now blocked in do_exit on current
2073          * finishing this core dump.
2074          *
2075          * Only ptrace can touch these memory addresses, but it doesn't change
2076          * the map_count or the pages allocated. So no possibility of crashing
2077          * exists while dumping the mm->vm_next areas to the core file.
2078          */
2079   
2080         /* alloc memory for large data structures: too large to be on stack */
2081         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2082         if (!elf)
2083                 goto out;
2084         /*
2085          * The number of segs are recored into ELF header as 16bit value.
2086          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2087          */
2088         segs = current->mm->map_count;
2089         segs += elf_core_extra_phdrs();
2090
2091         gate_vma = get_gate_vma(current->mm);
2092         if (gate_vma != NULL)
2093                 segs++;
2094
2095         /* for notes section */
2096         segs++;
2097
2098         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2099          * this, kernel supports extended numbering. Have a look at
2100          * include/linux/elf.h for further information. */
2101         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2102
2103         /*
2104          * Collect all the non-memory information about the process for the
2105          * notes.  This also sets up the file header.
2106          */
2107         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2108                 goto cleanup;
2109
2110         has_dumped = 1;
2111
2112         fs = get_fs();
2113         set_fs(KERNEL_DS);
2114
2115         offset += sizeof(*elf);                         /* Elf header */
2116         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2117
2118         /* Write notes phdr entry */
2119         {
2120                 size_t sz = get_note_info_size(&info);
2121
2122                 sz += elf_coredump_extra_notes_size();
2123
2124                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2125                 if (!phdr4note)
2126                         goto end_coredump;
2127
2128                 fill_elf_note_phdr(phdr4note, sz, offset);
2129                 offset += sz;
2130         }
2131
2132         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2133
2134         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2135         offset += elf_core_extra_data_size();
2136         e_shoff = offset;
2137
2138         if (e_phnum == PN_XNUM) {
2139                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2140                 if (!shdr4extnum)
2141                         goto end_coredump;
2142                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2143         }
2144
2145         offset = dataoff;
2146
2147         if (!dump_emit(cprm, elf, sizeof(*elf)))
2148                 goto end_coredump;
2149
2150         if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2151                 goto end_coredump;
2152
2153         /* Write program headers for segments dump */
2154         for (vma = first_vma(current, gate_vma); vma != NULL;
2155                         vma = next_vma(vma, gate_vma)) {
2156                 struct elf_phdr phdr;
2157
2158                 phdr.p_type = PT_LOAD;
2159                 phdr.p_offset = offset;
2160                 phdr.p_vaddr = vma->vm_start;
2161                 phdr.p_paddr = 0;
2162                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2163                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2164                 offset += phdr.p_filesz;
2165                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2166                 if (vma->vm_flags & VM_WRITE)
2167                         phdr.p_flags |= PF_W;
2168                 if (vma->vm_flags & VM_EXEC)
2169                         phdr.p_flags |= PF_X;
2170                 phdr.p_align = ELF_EXEC_PAGESIZE;
2171
2172                 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2173                         goto end_coredump;
2174         }
2175
2176         if (!elf_core_write_extra_phdrs(cprm, offset))
2177                 goto end_coredump;
2178
2179         /* write out the notes section */
2180         if (!write_note_info(&info, cprm))
2181                 goto end_coredump;
2182
2183         if (elf_coredump_extra_notes_write(cprm))
2184                 goto end_coredump;
2185
2186         /* Align to page */
2187         if (!dump_align(cprm, ELF_EXEC_PAGESIZE))
2188                 goto end_coredump;
2189
2190         for (vma = first_vma(current, gate_vma); vma != NULL;
2191                         vma = next_vma(vma, gate_vma)) {
2192                 unsigned long addr;
2193                 unsigned long end;
2194
2195                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2196
2197                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2198                         struct page *page;
2199                         int stop;
2200
2201                         page = get_dump_page(addr);
2202                         if (page) {
2203                                 void *kaddr = kmap(page);
2204                                 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2205                                 kunmap(page);
2206                                 page_cache_release(page);
2207                         } else
2208                                 stop = !dump_skip(cprm, PAGE_SIZE);
2209                         if (stop)
2210                                 goto end_coredump;
2211                 }
2212         }
2213
2214         if (!elf_core_write_extra_data(cprm))
2215                 goto end_coredump;
2216
2217         if (e_phnum == PN_XNUM)
2218                 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2219                         goto end_coredump;
2220
2221 end_coredump:
2222         set_fs(fs);
2223
2224 cleanup:
2225         free_note_info(&info);
2226         kfree(shdr4extnum);
2227         kfree(phdr4note);
2228         kfree(elf);
2229 out:
2230         return has_dumped;
2231 }
2232
2233 #endif          /* CONFIG_ELF_CORE */
2234
2235 static int __init init_elf_binfmt(void)
2236 {
2237         register_binfmt(&elf_format);
2238         return 0;
2239 }
2240
2241 static void __exit exit_elf_binfmt(void)
2242 {
2243         /* Remove the COFF and ELF loaders. */
2244         unregister_binfmt(&elf_format);
2245 }
2246
2247 core_initcall(init_elf_binfmt);
2248 module_exit(exit_elf_binfmt);
2249 MODULE_LICENSE("GPL");