]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/binfmt_elf.c
fs/9p: xattr: add trusted and security namespaces
[karo-tx-linux.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static int load_elf_library(struct file *);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51                                 int, int, unsigned long);
52
53 /*
54  * If we don't support core dumping, then supply a NULL so we
55  * don't even try.
56  */
57 #ifdef CONFIG_ELF_CORE
58 static int elf_core_dump(struct coredump_params *cprm);
59 #else
60 #define elf_core_dump   NULL
61 #endif
62
63 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
64 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
65 #else
66 #define ELF_MIN_ALIGN   PAGE_SIZE
67 #endif
68
69 #ifndef ELF_CORE_EFLAGS
70 #define ELF_CORE_EFLAGS 0
71 #endif
72
73 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
74 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
75 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
76
77 static struct linux_binfmt elf_format = {
78         .module         = THIS_MODULE,
79         .load_binary    = load_elf_binary,
80         .load_shlib     = load_elf_library,
81         .core_dump      = elf_core_dump,
82         .min_coredump   = ELF_EXEC_PAGESIZE,
83 };
84
85 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
86
87 static int set_brk(unsigned long start, unsigned long end)
88 {
89         start = ELF_PAGEALIGN(start);
90         end = ELF_PAGEALIGN(end);
91         if (end > start) {
92                 unsigned long addr;
93                 addr = vm_brk(start, end - start);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 #ifndef ELF_BASE_PLATFORM
135 /*
136  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138  * will be copied to the user stack in the same manner as AT_PLATFORM.
139  */
140 #define ELF_BASE_PLATFORM NULL
141 #endif
142
143 static int
144 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
145                 unsigned long load_addr, unsigned long interp_load_addr)
146 {
147         unsigned long p = bprm->p;
148         int argc = bprm->argc;
149         int envc = bprm->envc;
150         elf_addr_t __user *argv;
151         elf_addr_t __user *envp;
152         elf_addr_t __user *sp;
153         elf_addr_t __user *u_platform;
154         elf_addr_t __user *u_base_platform;
155         elf_addr_t __user *u_rand_bytes;
156         const char *k_platform = ELF_PLATFORM;
157         const char *k_base_platform = ELF_BASE_PLATFORM;
158         unsigned char k_rand_bytes[16];
159         int items;
160         elf_addr_t *elf_info;
161         int ei_index = 0;
162         const struct cred *cred = current_cred();
163         struct vm_area_struct *vma;
164
165         /*
166          * In some cases (e.g. Hyper-Threading), we want to avoid L1
167          * evictions by the processes running on the same package. One
168          * thing we can do is to shuffle the initial stack for them.
169          */
170
171         p = arch_align_stack(p);
172
173         /*
174          * If this architecture has a platform capability string, copy it
175          * to userspace.  In some cases (Sparc), this info is impossible
176          * for userspace to get any other way, in others (i386) it is
177          * merely difficult.
178          */
179         u_platform = NULL;
180         if (k_platform) {
181                 size_t len = strlen(k_platform) + 1;
182
183                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
184                 if (__copy_to_user(u_platform, k_platform, len))
185                         return -EFAULT;
186         }
187
188         /*
189          * If this architecture has a "base" platform capability
190          * string, copy it to userspace.
191          */
192         u_base_platform = NULL;
193         if (k_base_platform) {
194                 size_t len = strlen(k_base_platform) + 1;
195
196                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
197                 if (__copy_to_user(u_base_platform, k_base_platform, len))
198                         return -EFAULT;
199         }
200
201         /*
202          * Generate 16 random bytes for userspace PRNG seeding.
203          */
204         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
205         u_rand_bytes = (elf_addr_t __user *)
206                        STACK_ALLOC(p, sizeof(k_rand_bytes));
207         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
208                 return -EFAULT;
209
210         /* Create the ELF interpreter info */
211         elf_info = (elf_addr_t *)current->mm->saved_auxv;
212         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
213 #define NEW_AUX_ENT(id, val) \
214         do { \
215                 elf_info[ei_index++] = id; \
216                 elf_info[ei_index++] = val; \
217         } while (0)
218
219 #ifdef ARCH_DLINFO
220         /* 
221          * ARCH_DLINFO must come first so PPC can do its special alignment of
222          * AUXV.
223          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
224          * ARCH_DLINFO changes
225          */
226         ARCH_DLINFO;
227 #endif
228         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
229         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
230         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
231         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
232         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
233         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
234         NEW_AUX_ENT(AT_BASE, interp_load_addr);
235         NEW_AUX_ENT(AT_FLAGS, 0);
236         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
237         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
238         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
239         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
240         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
241         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
242         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
243         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
244         if (k_platform) {
245                 NEW_AUX_ENT(AT_PLATFORM,
246                             (elf_addr_t)(unsigned long)u_platform);
247         }
248         if (k_base_platform) {
249                 NEW_AUX_ENT(AT_BASE_PLATFORM,
250                             (elf_addr_t)(unsigned long)u_base_platform);
251         }
252         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
253                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
254         }
255 #undef NEW_AUX_ENT
256         /* AT_NULL is zero; clear the rest too */
257         memset(&elf_info[ei_index], 0,
258                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
259
260         /* And advance past the AT_NULL entry.  */
261         ei_index += 2;
262
263         sp = STACK_ADD(p, ei_index);
264
265         items = (argc + 1) + (envc + 1) + 1;
266         bprm->p = STACK_ROUND(sp, items);
267
268         /* Point sp at the lowest address on the stack */
269 #ifdef CONFIG_STACK_GROWSUP
270         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
271         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
272 #else
273         sp = (elf_addr_t __user *)bprm->p;
274 #endif
275
276
277         /*
278          * Grow the stack manually; some architectures have a limit on how
279          * far ahead a user-space access may be in order to grow the stack.
280          */
281         vma = find_extend_vma(current->mm, bprm->p);
282         if (!vma)
283                 return -EFAULT;
284
285         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
286         if (__put_user(argc, sp++))
287                 return -EFAULT;
288         argv = sp;
289         envp = argv + argc + 1;
290
291         /* Populate argv and envp */
292         p = current->mm->arg_end = current->mm->arg_start;
293         while (argc-- > 0) {
294                 size_t len;
295                 if (__put_user((elf_addr_t)p, argv++))
296                         return -EFAULT;
297                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
298                 if (!len || len > MAX_ARG_STRLEN)
299                         return -EINVAL;
300                 p += len;
301         }
302         if (__put_user(0, argv))
303                 return -EFAULT;
304         current->mm->arg_end = current->mm->env_start = p;
305         while (envc-- > 0) {
306                 size_t len;
307                 if (__put_user((elf_addr_t)p, envp++))
308                         return -EFAULT;
309                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
310                 if (!len || len > MAX_ARG_STRLEN)
311                         return -EINVAL;
312                 p += len;
313         }
314         if (__put_user(0, envp))
315                 return -EFAULT;
316         current->mm->env_end = p;
317
318         /* Put the elf_info on the stack in the right place.  */
319         sp = (elf_addr_t __user *)envp + 1;
320         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
321                 return -EFAULT;
322         return 0;
323 }
324
325 #ifndef elf_map
326
327 static unsigned long elf_map(struct file *filep, unsigned long addr,
328                 struct elf_phdr *eppnt, int prot, int type,
329                 unsigned long total_size)
330 {
331         unsigned long map_addr;
332         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
333         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
334         addr = ELF_PAGESTART(addr);
335         size = ELF_PAGEALIGN(size);
336
337         /* mmap() will return -EINVAL if given a zero size, but a
338          * segment with zero filesize is perfectly valid */
339         if (!size)
340                 return addr;
341
342         /*
343         * total_size is the size of the ELF (interpreter) image.
344         * The _first_ mmap needs to know the full size, otherwise
345         * randomization might put this image into an overlapping
346         * position with the ELF binary image. (since size < total_size)
347         * So we first map the 'big' image - and unmap the remainder at
348         * the end. (which unmap is needed for ELF images with holes.)
349         */
350         if (total_size) {
351                 total_size = ELF_PAGEALIGN(total_size);
352                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
353                 if (!BAD_ADDR(map_addr))
354                         vm_munmap(map_addr+size, total_size-size);
355         } else
356                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
357
358         return(map_addr);
359 }
360
361 #endif /* !elf_map */
362
363 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
364 {
365         int i, first_idx = -1, last_idx = -1;
366
367         for (i = 0; i < nr; i++) {
368                 if (cmds[i].p_type == PT_LOAD) {
369                         last_idx = i;
370                         if (first_idx == -1)
371                                 first_idx = i;
372                 }
373         }
374         if (first_idx == -1)
375                 return 0;
376
377         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
378                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
379 }
380
381
382 /* This is much more generalized than the library routine read function,
383    so we keep this separate.  Technically the library read function
384    is only provided so that we can read a.out libraries that have
385    an ELF header */
386
387 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
388                 struct file *interpreter, unsigned long *interp_map_addr,
389                 unsigned long no_base)
390 {
391         struct elf_phdr *elf_phdata;
392         struct elf_phdr *eppnt;
393         unsigned long load_addr = 0;
394         int load_addr_set = 0;
395         unsigned long last_bss = 0, elf_bss = 0;
396         unsigned long error = ~0UL;
397         unsigned long total_size;
398         int retval, i, size;
399
400         /* First of all, some simple consistency checks */
401         if (interp_elf_ex->e_type != ET_EXEC &&
402             interp_elf_ex->e_type != ET_DYN)
403                 goto out;
404         if (!elf_check_arch(interp_elf_ex))
405                 goto out;
406         if (!interpreter->f_op || !interpreter->f_op->mmap)
407                 goto out;
408
409         /*
410          * If the size of this structure has changed, then punt, since
411          * we will be doing the wrong thing.
412          */
413         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
414                 goto out;
415         if (interp_elf_ex->e_phnum < 1 ||
416                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
417                 goto out;
418
419         /* Now read in all of the header information */
420         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
421         if (size > ELF_MIN_ALIGN)
422                 goto out;
423         elf_phdata = kmalloc(size, GFP_KERNEL);
424         if (!elf_phdata)
425                 goto out;
426
427         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
428                              (char *)elf_phdata, size);
429         error = -EIO;
430         if (retval != size) {
431                 if (retval < 0)
432                         error = retval; 
433                 goto out_close;
434         }
435
436         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
437         if (!total_size) {
438                 error = -EINVAL;
439                 goto out_close;
440         }
441
442         eppnt = elf_phdata;
443         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
444                 if (eppnt->p_type == PT_LOAD) {
445                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
446                         int elf_prot = 0;
447                         unsigned long vaddr = 0;
448                         unsigned long k, map_addr;
449
450                         if (eppnt->p_flags & PF_R)
451                                 elf_prot = PROT_READ;
452                         if (eppnt->p_flags & PF_W)
453                                 elf_prot |= PROT_WRITE;
454                         if (eppnt->p_flags & PF_X)
455                                 elf_prot |= PROT_EXEC;
456                         vaddr = eppnt->p_vaddr;
457                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
458                                 elf_type |= MAP_FIXED;
459                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
460                                 load_addr = -vaddr;
461
462                         map_addr = elf_map(interpreter, load_addr + vaddr,
463                                         eppnt, elf_prot, elf_type, total_size);
464                         total_size = 0;
465                         if (!*interp_map_addr)
466                                 *interp_map_addr = map_addr;
467                         error = map_addr;
468                         if (BAD_ADDR(map_addr))
469                                 goto out_close;
470
471                         if (!load_addr_set &&
472                             interp_elf_ex->e_type == ET_DYN) {
473                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
474                                 load_addr_set = 1;
475                         }
476
477                         /*
478                          * Check to see if the section's size will overflow the
479                          * allowed task size. Note that p_filesz must always be
480                          * <= p_memsize so it's only necessary to check p_memsz.
481                          */
482                         k = load_addr + eppnt->p_vaddr;
483                         if (BAD_ADDR(k) ||
484                             eppnt->p_filesz > eppnt->p_memsz ||
485                             eppnt->p_memsz > TASK_SIZE ||
486                             TASK_SIZE - eppnt->p_memsz < k) {
487                                 error = -ENOMEM;
488                                 goto out_close;
489                         }
490
491                         /*
492                          * Find the end of the file mapping for this phdr, and
493                          * keep track of the largest address we see for this.
494                          */
495                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
496                         if (k > elf_bss)
497                                 elf_bss = k;
498
499                         /*
500                          * Do the same thing for the memory mapping - between
501                          * elf_bss and last_bss is the bss section.
502                          */
503                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
504                         if (k > last_bss)
505                                 last_bss = k;
506                 }
507         }
508
509         if (last_bss > elf_bss) {
510                 /*
511                  * Now fill out the bss section.  First pad the last page up
512                  * to the page boundary, and then perform a mmap to make sure
513                  * that there are zero-mapped pages up to and including the
514                  * last bss page.
515                  */
516                 if (padzero(elf_bss)) {
517                         error = -EFAULT;
518                         goto out_close;
519                 }
520
521                 /* What we have mapped so far */
522                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
523
524                 /* Map the last of the bss segment */
525                 error = vm_brk(elf_bss, last_bss - elf_bss);
526                 if (BAD_ADDR(error))
527                         goto out_close;
528         }
529
530         error = load_addr;
531
532 out_close:
533         kfree(elf_phdata);
534 out:
535         return error;
536 }
537
538 /*
539  * These are the functions used to load ELF style executables and shared
540  * libraries.  There is no binary dependent code anywhere else.
541  */
542
543 #define INTERPRETER_NONE 0
544 #define INTERPRETER_ELF 2
545
546 #ifndef STACK_RND_MASK
547 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
548 #endif
549
550 static unsigned long randomize_stack_top(unsigned long stack_top)
551 {
552         unsigned int random_variable = 0;
553
554         if ((current->flags & PF_RANDOMIZE) &&
555                 !(current->personality & ADDR_NO_RANDOMIZE)) {
556                 random_variable = get_random_int() & STACK_RND_MASK;
557                 random_variable <<= PAGE_SHIFT;
558         }
559 #ifdef CONFIG_STACK_GROWSUP
560         return PAGE_ALIGN(stack_top) + random_variable;
561 #else
562         return PAGE_ALIGN(stack_top) - random_variable;
563 #endif
564 }
565
566 static int load_elf_binary(struct linux_binprm *bprm)
567 {
568         struct file *interpreter = NULL; /* to shut gcc up */
569         unsigned long load_addr = 0, load_bias = 0;
570         int load_addr_set = 0;
571         char * elf_interpreter = NULL;
572         unsigned long error;
573         struct elf_phdr *elf_ppnt, *elf_phdata;
574         unsigned long elf_bss, elf_brk;
575         int retval, i;
576         unsigned int size;
577         unsigned long elf_entry;
578         unsigned long interp_load_addr = 0;
579         unsigned long start_code, end_code, start_data, end_data;
580         unsigned long reloc_func_desc __maybe_unused = 0;
581         int executable_stack = EXSTACK_DEFAULT;
582         unsigned long def_flags = 0;
583         struct pt_regs *regs = current_pt_regs();
584         struct {
585                 struct elfhdr elf_ex;
586                 struct elfhdr interp_elf_ex;
587         } *loc;
588
589         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
590         if (!loc) {
591                 retval = -ENOMEM;
592                 goto out_ret;
593         }
594         
595         /* Get the exec-header */
596         loc->elf_ex = *((struct elfhdr *)bprm->buf);
597
598         retval = -ENOEXEC;
599         /* First of all, some simple consistency checks */
600         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
601                 goto out;
602
603         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
604                 goto out;
605         if (!elf_check_arch(&loc->elf_ex))
606                 goto out;
607         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
608                 goto out;
609
610         /* Now read in all of the header information */
611         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
612                 goto out;
613         if (loc->elf_ex.e_phnum < 1 ||
614                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
615                 goto out;
616         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
617         retval = -ENOMEM;
618         elf_phdata = kmalloc(size, GFP_KERNEL);
619         if (!elf_phdata)
620                 goto out;
621
622         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
623                              (char *)elf_phdata, size);
624         if (retval != size) {
625                 if (retval >= 0)
626                         retval = -EIO;
627                 goto out_free_ph;
628         }
629
630         elf_ppnt = elf_phdata;
631         elf_bss = 0;
632         elf_brk = 0;
633
634         start_code = ~0UL;
635         end_code = 0;
636         start_data = 0;
637         end_data = 0;
638
639         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
640                 if (elf_ppnt->p_type == PT_INTERP) {
641                         /* This is the program interpreter used for
642                          * shared libraries - for now assume that this
643                          * is an a.out format binary
644                          */
645                         retval = -ENOEXEC;
646                         if (elf_ppnt->p_filesz > PATH_MAX || 
647                             elf_ppnt->p_filesz < 2)
648                                 goto out_free_ph;
649
650                         retval = -ENOMEM;
651                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
652                                                   GFP_KERNEL);
653                         if (!elf_interpreter)
654                                 goto out_free_ph;
655
656                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
657                                              elf_interpreter,
658                                              elf_ppnt->p_filesz);
659                         if (retval != elf_ppnt->p_filesz) {
660                                 if (retval >= 0)
661                                         retval = -EIO;
662                                 goto out_free_interp;
663                         }
664                         /* make sure path is NULL terminated */
665                         retval = -ENOEXEC;
666                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
667                                 goto out_free_interp;
668
669                         interpreter = open_exec(elf_interpreter);
670                         retval = PTR_ERR(interpreter);
671                         if (IS_ERR(interpreter))
672                                 goto out_free_interp;
673
674                         /*
675                          * If the binary is not readable then enforce
676                          * mm->dumpable = 0 regardless of the interpreter's
677                          * permissions.
678                          */
679                         would_dump(bprm, interpreter);
680
681                         retval = kernel_read(interpreter, 0, bprm->buf,
682                                              BINPRM_BUF_SIZE);
683                         if (retval != BINPRM_BUF_SIZE) {
684                                 if (retval >= 0)
685                                         retval = -EIO;
686                                 goto out_free_dentry;
687                         }
688
689                         /* Get the exec headers */
690                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
691                         break;
692                 }
693                 elf_ppnt++;
694         }
695
696         elf_ppnt = elf_phdata;
697         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
698                 if (elf_ppnt->p_type == PT_GNU_STACK) {
699                         if (elf_ppnt->p_flags & PF_X)
700                                 executable_stack = EXSTACK_ENABLE_X;
701                         else
702                                 executable_stack = EXSTACK_DISABLE_X;
703                         break;
704                 }
705
706         /* Some simple consistency checks for the interpreter */
707         if (elf_interpreter) {
708                 retval = -ELIBBAD;
709                 /* Not an ELF interpreter */
710                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
711                         goto out_free_dentry;
712                 /* Verify the interpreter has a valid arch */
713                 if (!elf_check_arch(&loc->interp_elf_ex))
714                         goto out_free_dentry;
715         }
716
717         /* Flush all traces of the currently running executable */
718         retval = flush_old_exec(bprm);
719         if (retval)
720                 goto out_free_dentry;
721
722         /* OK, This is the point of no return */
723         current->mm->def_flags = def_flags;
724
725         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
726            may depend on the personality.  */
727         SET_PERSONALITY(loc->elf_ex);
728         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
729                 current->personality |= READ_IMPLIES_EXEC;
730
731         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
732                 current->flags |= PF_RANDOMIZE;
733
734         setup_new_exec(bprm);
735
736         /* Do this so that we can load the interpreter, if need be.  We will
737            change some of these later */
738         current->mm->free_area_cache = current->mm->mmap_base;
739         current->mm->cached_hole_size = 0;
740         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
741                                  executable_stack);
742         if (retval < 0) {
743                 send_sig(SIGKILL, current, 0);
744                 goto out_free_dentry;
745         }
746         
747         current->mm->start_stack = bprm->p;
748
749         /* Now we do a little grungy work by mmapping the ELF image into
750            the correct location in memory. */
751         for(i = 0, elf_ppnt = elf_phdata;
752             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
753                 int elf_prot = 0, elf_flags;
754                 unsigned long k, vaddr;
755
756                 if (elf_ppnt->p_type != PT_LOAD)
757                         continue;
758
759                 if (unlikely (elf_brk > elf_bss)) {
760                         unsigned long nbyte;
761                     
762                         /* There was a PT_LOAD segment with p_memsz > p_filesz
763                            before this one. Map anonymous pages, if needed,
764                            and clear the area.  */
765                         retval = set_brk(elf_bss + load_bias,
766                                          elf_brk + load_bias);
767                         if (retval) {
768                                 send_sig(SIGKILL, current, 0);
769                                 goto out_free_dentry;
770                         }
771                         nbyte = ELF_PAGEOFFSET(elf_bss);
772                         if (nbyte) {
773                                 nbyte = ELF_MIN_ALIGN - nbyte;
774                                 if (nbyte > elf_brk - elf_bss)
775                                         nbyte = elf_brk - elf_bss;
776                                 if (clear_user((void __user *)elf_bss +
777                                                         load_bias, nbyte)) {
778                                         /*
779                                          * This bss-zeroing can fail if the ELF
780                                          * file specifies odd protections. So
781                                          * we don't check the return value
782                                          */
783                                 }
784                         }
785                 }
786
787                 if (elf_ppnt->p_flags & PF_R)
788                         elf_prot |= PROT_READ;
789                 if (elf_ppnt->p_flags & PF_W)
790                         elf_prot |= PROT_WRITE;
791                 if (elf_ppnt->p_flags & PF_X)
792                         elf_prot |= PROT_EXEC;
793
794                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
795
796                 vaddr = elf_ppnt->p_vaddr;
797                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
798                         elf_flags |= MAP_FIXED;
799                 } else if (loc->elf_ex.e_type == ET_DYN) {
800                         /* Try and get dynamic programs out of the way of the
801                          * default mmap base, as well as whatever program they
802                          * might try to exec.  This is because the brk will
803                          * follow the loader, and is not movable.  */
804 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
805                         /* Memory randomization might have been switched off
806                          * in runtime via sysctl.
807                          * If that is the case, retain the original non-zero
808                          * load_bias value in order to establish proper
809                          * non-randomized mappings.
810                          */
811                         if (current->flags & PF_RANDOMIZE)
812                                 load_bias = 0;
813                         else
814                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
815 #else
816                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
817 #endif
818                 }
819
820                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
821                                 elf_prot, elf_flags, 0);
822                 if (BAD_ADDR(error)) {
823                         send_sig(SIGKILL, current, 0);
824                         retval = IS_ERR((void *)error) ?
825                                 PTR_ERR((void*)error) : -EINVAL;
826                         goto out_free_dentry;
827                 }
828
829                 if (!load_addr_set) {
830                         load_addr_set = 1;
831                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
832                         if (loc->elf_ex.e_type == ET_DYN) {
833                                 load_bias += error -
834                                              ELF_PAGESTART(load_bias + vaddr);
835                                 load_addr += load_bias;
836                                 reloc_func_desc = load_bias;
837                         }
838                 }
839                 k = elf_ppnt->p_vaddr;
840                 if (k < start_code)
841                         start_code = k;
842                 if (start_data < k)
843                         start_data = k;
844
845                 /*
846                  * Check to see if the section's size will overflow the
847                  * allowed task size. Note that p_filesz must always be
848                  * <= p_memsz so it is only necessary to check p_memsz.
849                  */
850                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
851                     elf_ppnt->p_memsz > TASK_SIZE ||
852                     TASK_SIZE - elf_ppnt->p_memsz < k) {
853                         /* set_brk can never work. Avoid overflows. */
854                         send_sig(SIGKILL, current, 0);
855                         retval = -EINVAL;
856                         goto out_free_dentry;
857                 }
858
859                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
860
861                 if (k > elf_bss)
862                         elf_bss = k;
863                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
864                         end_code = k;
865                 if (end_data < k)
866                         end_data = k;
867                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
868                 if (k > elf_brk)
869                         elf_brk = k;
870         }
871
872         loc->elf_ex.e_entry += load_bias;
873         elf_bss += load_bias;
874         elf_brk += load_bias;
875         start_code += load_bias;
876         end_code += load_bias;
877         start_data += load_bias;
878         end_data += load_bias;
879
880         /* Calling set_brk effectively mmaps the pages that we need
881          * for the bss and break sections.  We must do this before
882          * mapping in the interpreter, to make sure it doesn't wind
883          * up getting placed where the bss needs to go.
884          */
885         retval = set_brk(elf_bss, elf_brk);
886         if (retval) {
887                 send_sig(SIGKILL, current, 0);
888                 goto out_free_dentry;
889         }
890         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
891                 send_sig(SIGSEGV, current, 0);
892                 retval = -EFAULT; /* Nobody gets to see this, but.. */
893                 goto out_free_dentry;
894         }
895
896         if (elf_interpreter) {
897                 unsigned long interp_map_addr = 0;
898
899                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
900                                             interpreter,
901                                             &interp_map_addr,
902                                             load_bias);
903                 if (!IS_ERR((void *)elf_entry)) {
904                         /*
905                          * load_elf_interp() returns relocation
906                          * adjustment
907                          */
908                         interp_load_addr = elf_entry;
909                         elf_entry += loc->interp_elf_ex.e_entry;
910                 }
911                 if (BAD_ADDR(elf_entry)) {
912                         force_sig(SIGSEGV, current);
913                         retval = IS_ERR((void *)elf_entry) ?
914                                         (int)elf_entry : -EINVAL;
915                         goto out_free_dentry;
916                 }
917                 reloc_func_desc = interp_load_addr;
918
919                 allow_write_access(interpreter);
920                 fput(interpreter);
921                 kfree(elf_interpreter);
922         } else {
923                 elf_entry = loc->elf_ex.e_entry;
924                 if (BAD_ADDR(elf_entry)) {
925                         force_sig(SIGSEGV, current);
926                         retval = -EINVAL;
927                         goto out_free_dentry;
928                 }
929         }
930
931         kfree(elf_phdata);
932
933         set_binfmt(&elf_format);
934
935 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
936         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
937         if (retval < 0) {
938                 send_sig(SIGKILL, current, 0);
939                 goto out;
940         }
941 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
942
943         install_exec_creds(bprm);
944         retval = create_elf_tables(bprm, &loc->elf_ex,
945                           load_addr, interp_load_addr);
946         if (retval < 0) {
947                 send_sig(SIGKILL, current, 0);
948                 goto out;
949         }
950         /* N.B. passed_fileno might not be initialized? */
951         current->mm->end_code = end_code;
952         current->mm->start_code = start_code;
953         current->mm->start_data = start_data;
954         current->mm->end_data = end_data;
955         current->mm->start_stack = bprm->p;
956
957 #ifdef arch_randomize_brk
958         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
959                 current->mm->brk = current->mm->start_brk =
960                         arch_randomize_brk(current->mm);
961 #ifdef CONFIG_COMPAT_BRK
962                 current->brk_randomized = 1;
963 #endif
964         }
965 #endif
966
967         if (current->personality & MMAP_PAGE_ZERO) {
968                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
969                    and some applications "depend" upon this behavior.
970                    Since we do not have the power to recompile these, we
971                    emulate the SVr4 behavior. Sigh. */
972                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
973                                 MAP_FIXED | MAP_PRIVATE, 0);
974         }
975
976 #ifdef ELF_PLAT_INIT
977         /*
978          * The ABI may specify that certain registers be set up in special
979          * ways (on i386 %edx is the address of a DT_FINI function, for
980          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
981          * that the e_entry field is the address of the function descriptor
982          * for the startup routine, rather than the address of the startup
983          * routine itself.  This macro performs whatever initialization to
984          * the regs structure is required as well as any relocations to the
985          * function descriptor entries when executing dynamically links apps.
986          */
987         ELF_PLAT_INIT(regs, reloc_func_desc);
988 #endif
989
990         start_thread(regs, elf_entry, bprm->p);
991         retval = 0;
992 out:
993         kfree(loc);
994 out_ret:
995         return retval;
996
997         /* error cleanup */
998 out_free_dentry:
999         allow_write_access(interpreter);
1000         if (interpreter)
1001                 fput(interpreter);
1002 out_free_interp:
1003         kfree(elf_interpreter);
1004 out_free_ph:
1005         kfree(elf_phdata);
1006         goto out;
1007 }
1008
1009 /* This is really simpleminded and specialized - we are loading an
1010    a.out library that is given an ELF header. */
1011 static int load_elf_library(struct file *file)
1012 {
1013         struct elf_phdr *elf_phdata;
1014         struct elf_phdr *eppnt;
1015         unsigned long elf_bss, bss, len;
1016         int retval, error, i, j;
1017         struct elfhdr elf_ex;
1018
1019         error = -ENOEXEC;
1020         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1021         if (retval != sizeof(elf_ex))
1022                 goto out;
1023
1024         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1025                 goto out;
1026
1027         /* First of all, some simple consistency checks */
1028         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1029             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1030                 goto out;
1031
1032         /* Now read in all of the header information */
1033
1034         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1035         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1036
1037         error = -ENOMEM;
1038         elf_phdata = kmalloc(j, GFP_KERNEL);
1039         if (!elf_phdata)
1040                 goto out;
1041
1042         eppnt = elf_phdata;
1043         error = -ENOEXEC;
1044         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1045         if (retval != j)
1046                 goto out_free_ph;
1047
1048         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1049                 if ((eppnt + i)->p_type == PT_LOAD)
1050                         j++;
1051         if (j != 1)
1052                 goto out_free_ph;
1053
1054         while (eppnt->p_type != PT_LOAD)
1055                 eppnt++;
1056
1057         /* Now use mmap to map the library into memory. */
1058         error = vm_mmap(file,
1059                         ELF_PAGESTART(eppnt->p_vaddr),
1060                         (eppnt->p_filesz +
1061                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1062                         PROT_READ | PROT_WRITE | PROT_EXEC,
1063                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1064                         (eppnt->p_offset -
1065                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1066         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1067                 goto out_free_ph;
1068
1069         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1070         if (padzero(elf_bss)) {
1071                 error = -EFAULT;
1072                 goto out_free_ph;
1073         }
1074
1075         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1076                             ELF_MIN_ALIGN - 1);
1077         bss = eppnt->p_memsz + eppnt->p_vaddr;
1078         if (bss > len)
1079                 vm_brk(len, bss - len);
1080         error = 0;
1081
1082 out_free_ph:
1083         kfree(elf_phdata);
1084 out:
1085         return error;
1086 }
1087
1088 #ifdef CONFIG_ELF_CORE
1089 /*
1090  * ELF core dumper
1091  *
1092  * Modelled on fs/exec.c:aout_core_dump()
1093  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1094  */
1095
1096 /*
1097  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1098  * that are useful for post-mortem analysis are included in every core dump.
1099  * In that way we ensure that the core dump is fully interpretable later
1100  * without matching up the same kernel and hardware config to see what PC values
1101  * meant. These special mappings include - vDSO, vsyscall, and other
1102  * architecture specific mappings
1103  */
1104 static bool always_dump_vma(struct vm_area_struct *vma)
1105 {
1106         /* Any vsyscall mappings? */
1107         if (vma == get_gate_vma(vma->vm_mm))
1108                 return true;
1109         /*
1110          * arch_vma_name() returns non-NULL for special architecture mappings,
1111          * such as vDSO sections.
1112          */
1113         if (arch_vma_name(vma))
1114                 return true;
1115
1116         return false;
1117 }
1118
1119 /*
1120  * Decide what to dump of a segment, part, all or none.
1121  */
1122 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1123                                    unsigned long mm_flags)
1124 {
1125 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1126
1127         /* always dump the vdso and vsyscall sections */
1128         if (always_dump_vma(vma))
1129                 goto whole;
1130
1131         if (vma->vm_flags & VM_DONTDUMP)
1132                 return 0;
1133
1134         /* Hugetlb memory check */
1135         if (vma->vm_flags & VM_HUGETLB) {
1136                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1137                         goto whole;
1138                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1139                         goto whole;
1140                 return 0;
1141         }
1142
1143         /* Do not dump I/O mapped devices or special mappings */
1144         if (vma->vm_flags & VM_IO)
1145                 return 0;
1146
1147         /* By default, dump shared memory if mapped from an anonymous file. */
1148         if (vma->vm_flags & VM_SHARED) {
1149                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1150                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1151                         goto whole;
1152                 return 0;
1153         }
1154
1155         /* Dump segments that have been written to.  */
1156         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1157                 goto whole;
1158         if (vma->vm_file == NULL)
1159                 return 0;
1160
1161         if (FILTER(MAPPED_PRIVATE))
1162                 goto whole;
1163
1164         /*
1165          * If this looks like the beginning of a DSO or executable mapping,
1166          * check for an ELF header.  If we find one, dump the first page to
1167          * aid in determining what was mapped here.
1168          */
1169         if (FILTER(ELF_HEADERS) &&
1170             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1171                 u32 __user *header = (u32 __user *) vma->vm_start;
1172                 u32 word;
1173                 mm_segment_t fs = get_fs();
1174                 /*
1175                  * Doing it this way gets the constant folded by GCC.
1176                  */
1177                 union {
1178                         u32 cmp;
1179                         char elfmag[SELFMAG];
1180                 } magic;
1181                 BUILD_BUG_ON(SELFMAG != sizeof word);
1182                 magic.elfmag[EI_MAG0] = ELFMAG0;
1183                 magic.elfmag[EI_MAG1] = ELFMAG1;
1184                 magic.elfmag[EI_MAG2] = ELFMAG2;
1185                 magic.elfmag[EI_MAG3] = ELFMAG3;
1186                 /*
1187                  * Switch to the user "segment" for get_user(),
1188                  * then put back what elf_core_dump() had in place.
1189                  */
1190                 set_fs(USER_DS);
1191                 if (unlikely(get_user(word, header)))
1192                         word = 0;
1193                 set_fs(fs);
1194                 if (word == magic.cmp)
1195                         return PAGE_SIZE;
1196         }
1197
1198 #undef  FILTER
1199
1200         return 0;
1201
1202 whole:
1203         return vma->vm_end - vma->vm_start;
1204 }
1205
1206 /* An ELF note in memory */
1207 struct memelfnote
1208 {
1209         const char *name;
1210         int type;
1211         unsigned int datasz;
1212         void *data;
1213 };
1214
1215 static int notesize(struct memelfnote *en)
1216 {
1217         int sz;
1218
1219         sz = sizeof(struct elf_note);
1220         sz += roundup(strlen(en->name) + 1, 4);
1221         sz += roundup(en->datasz, 4);
1222
1223         return sz;
1224 }
1225
1226 #define DUMP_WRITE(addr, nr, foffset)   \
1227         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1228
1229 static int alignfile(struct file *file, loff_t *foffset)
1230 {
1231         static const char buf[4] = { 0, };
1232         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1233         return 1;
1234 }
1235
1236 static int writenote(struct memelfnote *men, struct file *file,
1237                         loff_t *foffset)
1238 {
1239         struct elf_note en;
1240         en.n_namesz = strlen(men->name) + 1;
1241         en.n_descsz = men->datasz;
1242         en.n_type = men->type;
1243
1244         DUMP_WRITE(&en, sizeof(en), foffset);
1245         DUMP_WRITE(men->name, en.n_namesz, foffset);
1246         if (!alignfile(file, foffset))
1247                 return 0;
1248         DUMP_WRITE(men->data, men->datasz, foffset);
1249         if (!alignfile(file, foffset))
1250                 return 0;
1251
1252         return 1;
1253 }
1254 #undef DUMP_WRITE
1255
1256 static void fill_elf_header(struct elfhdr *elf, int segs,
1257                             u16 machine, u32 flags)
1258 {
1259         memset(elf, 0, sizeof(*elf));
1260
1261         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1262         elf->e_ident[EI_CLASS] = ELF_CLASS;
1263         elf->e_ident[EI_DATA] = ELF_DATA;
1264         elf->e_ident[EI_VERSION] = EV_CURRENT;
1265         elf->e_ident[EI_OSABI] = ELF_OSABI;
1266
1267         elf->e_type = ET_CORE;
1268         elf->e_machine = machine;
1269         elf->e_version = EV_CURRENT;
1270         elf->e_phoff = sizeof(struct elfhdr);
1271         elf->e_flags = flags;
1272         elf->e_ehsize = sizeof(struct elfhdr);
1273         elf->e_phentsize = sizeof(struct elf_phdr);
1274         elf->e_phnum = segs;
1275
1276         return;
1277 }
1278
1279 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1280 {
1281         phdr->p_type = PT_NOTE;
1282         phdr->p_offset = offset;
1283         phdr->p_vaddr = 0;
1284         phdr->p_paddr = 0;
1285         phdr->p_filesz = sz;
1286         phdr->p_memsz = 0;
1287         phdr->p_flags = 0;
1288         phdr->p_align = 0;
1289         return;
1290 }
1291
1292 static void fill_note(struct memelfnote *note, const char *name, int type, 
1293                 unsigned int sz, void *data)
1294 {
1295         note->name = name;
1296         note->type = type;
1297         note->datasz = sz;
1298         note->data = data;
1299         return;
1300 }
1301
1302 /*
1303  * fill up all the fields in prstatus from the given task struct, except
1304  * registers which need to be filled up separately.
1305  */
1306 static void fill_prstatus(struct elf_prstatus *prstatus,
1307                 struct task_struct *p, long signr)
1308 {
1309         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1310         prstatus->pr_sigpend = p->pending.signal.sig[0];
1311         prstatus->pr_sighold = p->blocked.sig[0];
1312         rcu_read_lock();
1313         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1314         rcu_read_unlock();
1315         prstatus->pr_pid = task_pid_vnr(p);
1316         prstatus->pr_pgrp = task_pgrp_vnr(p);
1317         prstatus->pr_sid = task_session_vnr(p);
1318         if (thread_group_leader(p)) {
1319                 struct task_cputime cputime;
1320
1321                 /*
1322                  * This is the record for the group leader.  It shows the
1323                  * group-wide total, not its individual thread total.
1324                  */
1325                 thread_group_cputime(p, &cputime);
1326                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1327                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1328         } else {
1329                 cputime_t utime, stime;
1330
1331                 task_cputime(p, &utime, &stime);
1332                 cputime_to_timeval(utime, &prstatus->pr_utime);
1333                 cputime_to_timeval(stime, &prstatus->pr_stime);
1334         }
1335         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1336         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1337 }
1338
1339 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1340                        struct mm_struct *mm)
1341 {
1342         const struct cred *cred;
1343         unsigned int i, len;
1344         
1345         /* first copy the parameters from user space */
1346         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1347
1348         len = mm->arg_end - mm->arg_start;
1349         if (len >= ELF_PRARGSZ)
1350                 len = ELF_PRARGSZ-1;
1351         if (copy_from_user(&psinfo->pr_psargs,
1352                            (const char __user *)mm->arg_start, len))
1353                 return -EFAULT;
1354         for(i = 0; i < len; i++)
1355                 if (psinfo->pr_psargs[i] == 0)
1356                         psinfo->pr_psargs[i] = ' ';
1357         psinfo->pr_psargs[len] = 0;
1358
1359         rcu_read_lock();
1360         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1361         rcu_read_unlock();
1362         psinfo->pr_pid = task_pid_vnr(p);
1363         psinfo->pr_pgrp = task_pgrp_vnr(p);
1364         psinfo->pr_sid = task_session_vnr(p);
1365
1366         i = p->state ? ffz(~p->state) + 1 : 0;
1367         psinfo->pr_state = i;
1368         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1369         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1370         psinfo->pr_nice = task_nice(p);
1371         psinfo->pr_flag = p->flags;
1372         rcu_read_lock();
1373         cred = __task_cred(p);
1374         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1375         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1376         rcu_read_unlock();
1377         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1378         
1379         return 0;
1380 }
1381
1382 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1383 {
1384         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1385         int i = 0;
1386         do
1387                 i += 2;
1388         while (auxv[i - 2] != AT_NULL);
1389         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1390 }
1391
1392 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1393                 siginfo_t *siginfo)
1394 {
1395         mm_segment_t old_fs = get_fs();
1396         set_fs(KERNEL_DS);
1397         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1398         set_fs(old_fs);
1399         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1400 }
1401
1402 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1403 /*
1404  * Format of NT_FILE note:
1405  *
1406  * long count     -- how many files are mapped
1407  * long page_size -- units for file_ofs
1408  * array of [COUNT] elements of
1409  *   long start
1410  *   long end
1411  *   long file_ofs
1412  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1413  */
1414 static void fill_files_note(struct memelfnote *note)
1415 {
1416         struct vm_area_struct *vma;
1417         unsigned count, size, names_ofs, remaining, n;
1418         user_long_t *data;
1419         user_long_t *start_end_ofs;
1420         char *name_base, *name_curpos;
1421
1422         /* *Estimated* file count and total data size needed */
1423         count = current->mm->map_count;
1424         size = count * 64;
1425
1426         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1427  alloc:
1428         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1429                 goto err;
1430         size = round_up(size, PAGE_SIZE);
1431         data = vmalloc(size);
1432         if (!data)
1433                 goto err;
1434
1435         start_end_ofs = data + 2;
1436         name_base = name_curpos = ((char *)data) + names_ofs;
1437         remaining = size - names_ofs;
1438         count = 0;
1439         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1440                 struct file *file;
1441                 const char *filename;
1442
1443                 file = vma->vm_file;
1444                 if (!file)
1445                         continue;
1446                 filename = d_path(&file->f_path, name_curpos, remaining);
1447                 if (IS_ERR(filename)) {
1448                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1449                                 vfree(data);
1450                                 size = size * 5 / 4;
1451                                 goto alloc;
1452                         }
1453                         continue;
1454                 }
1455
1456                 /* d_path() fills at the end, move name down */
1457                 /* n = strlen(filename) + 1: */
1458                 n = (name_curpos + remaining) - filename;
1459                 remaining = filename - name_curpos;
1460                 memmove(name_curpos, filename, n);
1461                 name_curpos += n;
1462
1463                 *start_end_ofs++ = vma->vm_start;
1464                 *start_end_ofs++ = vma->vm_end;
1465                 *start_end_ofs++ = vma->vm_pgoff;
1466                 count++;
1467         }
1468
1469         /* Now we know exact count of files, can store it */
1470         data[0] = count;
1471         data[1] = PAGE_SIZE;
1472         /*
1473          * Count usually is less than current->mm->map_count,
1474          * we need to move filenames down.
1475          */
1476         n = current->mm->map_count - count;
1477         if (n != 0) {
1478                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1479                 memmove(name_base - shift_bytes, name_base,
1480                         name_curpos - name_base);
1481                 name_curpos -= shift_bytes;
1482         }
1483
1484         size = name_curpos - (char *)data;
1485         fill_note(note, "CORE", NT_FILE, size, data);
1486  err: ;
1487 }
1488
1489 #ifdef CORE_DUMP_USE_REGSET
1490 #include <linux/regset.h>
1491
1492 struct elf_thread_core_info {
1493         struct elf_thread_core_info *next;
1494         struct task_struct *task;
1495         struct elf_prstatus prstatus;
1496         struct memelfnote notes[0];
1497 };
1498
1499 struct elf_note_info {
1500         struct elf_thread_core_info *thread;
1501         struct memelfnote psinfo;
1502         struct memelfnote signote;
1503         struct memelfnote auxv;
1504         struct memelfnote files;
1505         user_siginfo_t csigdata;
1506         size_t size;
1507         int thread_notes;
1508 };
1509
1510 /*
1511  * When a regset has a writeback hook, we call it on each thread before
1512  * dumping user memory.  On register window machines, this makes sure the
1513  * user memory backing the register data is up to date before we read it.
1514  */
1515 static void do_thread_regset_writeback(struct task_struct *task,
1516                                        const struct user_regset *regset)
1517 {
1518         if (regset->writeback)
1519                 regset->writeback(task, regset, 1);
1520 }
1521
1522 #ifndef PR_REG_SIZE
1523 #define PR_REG_SIZE(S) sizeof(S)
1524 #endif
1525
1526 #ifndef PRSTATUS_SIZE
1527 #define PRSTATUS_SIZE(S) sizeof(S)
1528 #endif
1529
1530 #ifndef PR_REG_PTR
1531 #define PR_REG_PTR(S) (&((S)->pr_reg))
1532 #endif
1533
1534 #ifndef SET_PR_FPVALID
1535 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1536 #endif
1537
1538 static int fill_thread_core_info(struct elf_thread_core_info *t,
1539                                  const struct user_regset_view *view,
1540                                  long signr, size_t *total)
1541 {
1542         unsigned int i;
1543
1544         /*
1545          * NT_PRSTATUS is the one special case, because the regset data
1546          * goes into the pr_reg field inside the note contents, rather
1547          * than being the whole note contents.  We fill the reset in here.
1548          * We assume that regset 0 is NT_PRSTATUS.
1549          */
1550         fill_prstatus(&t->prstatus, t->task, signr);
1551         (void) view->regsets[0].get(t->task, &view->regsets[0],
1552                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1553                                     PR_REG_PTR(&t->prstatus), NULL);
1554
1555         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1556                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1557         *total += notesize(&t->notes[0]);
1558
1559         do_thread_regset_writeback(t->task, &view->regsets[0]);
1560
1561         /*
1562          * Each other regset might generate a note too.  For each regset
1563          * that has no core_note_type or is inactive, we leave t->notes[i]
1564          * all zero and we'll know to skip writing it later.
1565          */
1566         for (i = 1; i < view->n; ++i) {
1567                 const struct user_regset *regset = &view->regsets[i];
1568                 do_thread_regset_writeback(t->task, regset);
1569                 if (regset->core_note_type && regset->get &&
1570                     (!regset->active || regset->active(t->task, regset))) {
1571                         int ret;
1572                         size_t size = regset->n * regset->size;
1573                         void *data = kmalloc(size, GFP_KERNEL);
1574                         if (unlikely(!data))
1575                                 return 0;
1576                         ret = regset->get(t->task, regset,
1577                                           0, size, data, NULL);
1578                         if (unlikely(ret))
1579                                 kfree(data);
1580                         else {
1581                                 if (regset->core_note_type != NT_PRFPREG)
1582                                         fill_note(&t->notes[i], "LINUX",
1583                                                   regset->core_note_type,
1584                                                   size, data);
1585                                 else {
1586                                         SET_PR_FPVALID(&t->prstatus, 1);
1587                                         fill_note(&t->notes[i], "CORE",
1588                                                   NT_PRFPREG, size, data);
1589                                 }
1590                                 *total += notesize(&t->notes[i]);
1591                         }
1592                 }
1593         }
1594
1595         return 1;
1596 }
1597
1598 static int fill_note_info(struct elfhdr *elf, int phdrs,
1599                           struct elf_note_info *info,
1600                           siginfo_t *siginfo, struct pt_regs *regs)
1601 {
1602         struct task_struct *dump_task = current;
1603         const struct user_regset_view *view = task_user_regset_view(dump_task);
1604         struct elf_thread_core_info *t;
1605         struct elf_prpsinfo *psinfo;
1606         struct core_thread *ct;
1607         unsigned int i;
1608
1609         info->size = 0;
1610         info->thread = NULL;
1611
1612         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1613         if (psinfo == NULL) {
1614                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1615                 return 0;
1616         }
1617
1618         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1619
1620         /*
1621          * Figure out how many notes we're going to need for each thread.
1622          */
1623         info->thread_notes = 0;
1624         for (i = 0; i < view->n; ++i)
1625                 if (view->regsets[i].core_note_type != 0)
1626                         ++info->thread_notes;
1627
1628         /*
1629          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1630          * since it is our one special case.
1631          */
1632         if (unlikely(info->thread_notes == 0) ||
1633             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1634                 WARN_ON(1);
1635                 return 0;
1636         }
1637
1638         /*
1639          * Initialize the ELF file header.
1640          */
1641         fill_elf_header(elf, phdrs,
1642                         view->e_machine, view->e_flags);
1643
1644         /*
1645          * Allocate a structure for each thread.
1646          */
1647         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1648                 t = kzalloc(offsetof(struct elf_thread_core_info,
1649                                      notes[info->thread_notes]),
1650                             GFP_KERNEL);
1651                 if (unlikely(!t))
1652                         return 0;
1653
1654                 t->task = ct->task;
1655                 if (ct->task == dump_task || !info->thread) {
1656                         t->next = info->thread;
1657                         info->thread = t;
1658                 } else {
1659                         /*
1660                          * Make sure to keep the original task at
1661                          * the head of the list.
1662                          */
1663                         t->next = info->thread->next;
1664                         info->thread->next = t;
1665                 }
1666         }
1667
1668         /*
1669          * Now fill in each thread's information.
1670          */
1671         for (t = info->thread; t != NULL; t = t->next)
1672                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1673                         return 0;
1674
1675         /*
1676          * Fill in the two process-wide notes.
1677          */
1678         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1679         info->size += notesize(&info->psinfo);
1680
1681         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1682         info->size += notesize(&info->signote);
1683
1684         fill_auxv_note(&info->auxv, current->mm);
1685         info->size += notesize(&info->auxv);
1686
1687         fill_files_note(&info->files);
1688         info->size += notesize(&info->files);
1689
1690         return 1;
1691 }
1692
1693 static size_t get_note_info_size(struct elf_note_info *info)
1694 {
1695         return info->size;
1696 }
1697
1698 /*
1699  * Write all the notes for each thread.  When writing the first thread, the
1700  * process-wide notes are interleaved after the first thread-specific note.
1701  */
1702 static int write_note_info(struct elf_note_info *info,
1703                            struct file *file, loff_t *foffset)
1704 {
1705         bool first = 1;
1706         struct elf_thread_core_info *t = info->thread;
1707
1708         do {
1709                 int i;
1710
1711                 if (!writenote(&t->notes[0], file, foffset))
1712                         return 0;
1713
1714                 if (first && !writenote(&info->psinfo, file, foffset))
1715                         return 0;
1716                 if (first && !writenote(&info->signote, file, foffset))
1717                         return 0;
1718                 if (first && !writenote(&info->auxv, file, foffset))
1719                         return 0;
1720                 if (first && !writenote(&info->files, file, foffset))
1721                         return 0;
1722
1723                 for (i = 1; i < info->thread_notes; ++i)
1724                         if (t->notes[i].data &&
1725                             !writenote(&t->notes[i], file, foffset))
1726                                 return 0;
1727
1728                 first = 0;
1729                 t = t->next;
1730         } while (t);
1731
1732         return 1;
1733 }
1734
1735 static void free_note_info(struct elf_note_info *info)
1736 {
1737         struct elf_thread_core_info *threads = info->thread;
1738         while (threads) {
1739                 unsigned int i;
1740                 struct elf_thread_core_info *t = threads;
1741                 threads = t->next;
1742                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1743                 for (i = 1; i < info->thread_notes; ++i)
1744                         kfree(t->notes[i].data);
1745                 kfree(t);
1746         }
1747         kfree(info->psinfo.data);
1748         vfree(info->files.data);
1749 }
1750
1751 #else
1752
1753 /* Here is the structure in which status of each thread is captured. */
1754 struct elf_thread_status
1755 {
1756         struct list_head list;
1757         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1758         elf_fpregset_t fpu;             /* NT_PRFPREG */
1759         struct task_struct *thread;
1760 #ifdef ELF_CORE_COPY_XFPREGS
1761         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1762 #endif
1763         struct memelfnote notes[3];
1764         int num_notes;
1765 };
1766
1767 /*
1768  * In order to add the specific thread information for the elf file format,
1769  * we need to keep a linked list of every threads pr_status and then create
1770  * a single section for them in the final core file.
1771  */
1772 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1773 {
1774         int sz = 0;
1775         struct task_struct *p = t->thread;
1776         t->num_notes = 0;
1777
1778         fill_prstatus(&t->prstatus, p, signr);
1779         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1780         
1781         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1782                   &(t->prstatus));
1783         t->num_notes++;
1784         sz += notesize(&t->notes[0]);
1785
1786         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1787                                                                 &t->fpu))) {
1788                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1789                           &(t->fpu));
1790                 t->num_notes++;
1791                 sz += notesize(&t->notes[1]);
1792         }
1793
1794 #ifdef ELF_CORE_COPY_XFPREGS
1795         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1796                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1797                           sizeof(t->xfpu), &t->xfpu);
1798                 t->num_notes++;
1799                 sz += notesize(&t->notes[2]);
1800         }
1801 #endif  
1802         return sz;
1803 }
1804
1805 struct elf_note_info {
1806         struct memelfnote *notes;
1807         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1808         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1809         struct list_head thread_list;
1810         elf_fpregset_t *fpu;
1811 #ifdef ELF_CORE_COPY_XFPREGS
1812         elf_fpxregset_t *xfpu;
1813 #endif
1814         user_siginfo_t csigdata;
1815         int thread_status_size;
1816         int numnote;
1817 };
1818
1819 static int elf_note_info_init(struct elf_note_info *info)
1820 {
1821         memset(info, 0, sizeof(*info));
1822         INIT_LIST_HEAD(&info->thread_list);
1823
1824         /* Allocate space for ELF notes */
1825         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1826         if (!info->notes)
1827                 return 0;
1828         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1829         if (!info->psinfo)
1830                 return 0;
1831         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1832         if (!info->prstatus)
1833                 return 0;
1834         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1835         if (!info->fpu)
1836                 return 0;
1837 #ifdef ELF_CORE_COPY_XFPREGS
1838         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1839         if (!info->xfpu)
1840                 return 0;
1841 #endif
1842         return 1;
1843 }
1844
1845 static int fill_note_info(struct elfhdr *elf, int phdrs,
1846                           struct elf_note_info *info,
1847                           siginfo_t *siginfo, struct pt_regs *regs)
1848 {
1849         struct list_head *t;
1850
1851         if (!elf_note_info_init(info))
1852                 return 0;
1853
1854         if (siginfo->si_signo) {
1855                 struct core_thread *ct;
1856                 struct elf_thread_status *ets;
1857
1858                 for (ct = current->mm->core_state->dumper.next;
1859                                                 ct; ct = ct->next) {
1860                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1861                         if (!ets)
1862                                 return 0;
1863
1864                         ets->thread = ct->task;
1865                         list_add(&ets->list, &info->thread_list);
1866                 }
1867
1868                 list_for_each(t, &info->thread_list) {
1869                         int sz;
1870
1871                         ets = list_entry(t, struct elf_thread_status, list);
1872                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1873                         info->thread_status_size += sz;
1874                 }
1875         }
1876         /* now collect the dump for the current */
1877         memset(info->prstatus, 0, sizeof(*info->prstatus));
1878         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1879         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1880
1881         /* Set up header */
1882         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1883
1884         /*
1885          * Set up the notes in similar form to SVR4 core dumps made
1886          * with info from their /proc.
1887          */
1888
1889         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1890                   sizeof(*info->prstatus), info->prstatus);
1891         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1892         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1893                   sizeof(*info->psinfo), info->psinfo);
1894
1895         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1896         fill_auxv_note(info->notes + 3, current->mm);
1897         fill_files_note(info->notes + 4);
1898
1899         info->numnote = 5;
1900
1901         /* Try to dump the FPU. */
1902         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1903                                                                info->fpu);
1904         if (info->prstatus->pr_fpvalid)
1905                 fill_note(info->notes + info->numnote++,
1906                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1907 #ifdef ELF_CORE_COPY_XFPREGS
1908         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1909                 fill_note(info->notes + info->numnote++,
1910                           "LINUX", ELF_CORE_XFPREG_TYPE,
1911                           sizeof(*info->xfpu), info->xfpu);
1912 #endif
1913
1914         return 1;
1915 }
1916
1917 static size_t get_note_info_size(struct elf_note_info *info)
1918 {
1919         int sz = 0;
1920         int i;
1921
1922         for (i = 0; i < info->numnote; i++)
1923                 sz += notesize(info->notes + i);
1924
1925         sz += info->thread_status_size;
1926
1927         return sz;
1928 }
1929
1930 static int write_note_info(struct elf_note_info *info,
1931                            struct file *file, loff_t *foffset)
1932 {
1933         int i;
1934         struct list_head *t;
1935
1936         for (i = 0; i < info->numnote; i++)
1937                 if (!writenote(info->notes + i, file, foffset))
1938                         return 0;
1939
1940         /* write out the thread status notes section */
1941         list_for_each(t, &info->thread_list) {
1942                 struct elf_thread_status *tmp =
1943                                 list_entry(t, struct elf_thread_status, list);
1944
1945                 for (i = 0; i < tmp->num_notes; i++)
1946                         if (!writenote(&tmp->notes[i], file, foffset))
1947                                 return 0;
1948         }
1949
1950         return 1;
1951 }
1952
1953 static void free_note_info(struct elf_note_info *info)
1954 {
1955         while (!list_empty(&info->thread_list)) {
1956                 struct list_head *tmp = info->thread_list.next;
1957                 list_del(tmp);
1958                 kfree(list_entry(tmp, struct elf_thread_status, list));
1959         }
1960
1961         /* Free data allocated by fill_files_note(): */
1962         vfree(info->notes[4].data);
1963
1964         kfree(info->prstatus);
1965         kfree(info->psinfo);
1966         kfree(info->notes);
1967         kfree(info->fpu);
1968 #ifdef ELF_CORE_COPY_XFPREGS
1969         kfree(info->xfpu);
1970 #endif
1971 }
1972
1973 #endif
1974
1975 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1976                                         struct vm_area_struct *gate_vma)
1977 {
1978         struct vm_area_struct *ret = tsk->mm->mmap;
1979
1980         if (ret)
1981                 return ret;
1982         return gate_vma;
1983 }
1984 /*
1985  * Helper function for iterating across a vma list.  It ensures that the caller
1986  * will visit `gate_vma' prior to terminating the search.
1987  */
1988 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1989                                         struct vm_area_struct *gate_vma)
1990 {
1991         struct vm_area_struct *ret;
1992
1993         ret = this_vma->vm_next;
1994         if (ret)
1995                 return ret;
1996         if (this_vma == gate_vma)
1997                 return NULL;
1998         return gate_vma;
1999 }
2000
2001 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2002                              elf_addr_t e_shoff, int segs)
2003 {
2004         elf->e_shoff = e_shoff;
2005         elf->e_shentsize = sizeof(*shdr4extnum);
2006         elf->e_shnum = 1;
2007         elf->e_shstrndx = SHN_UNDEF;
2008
2009         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2010
2011         shdr4extnum->sh_type = SHT_NULL;
2012         shdr4extnum->sh_size = elf->e_shnum;
2013         shdr4extnum->sh_link = elf->e_shstrndx;
2014         shdr4extnum->sh_info = segs;
2015 }
2016
2017 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2018                                      unsigned long mm_flags)
2019 {
2020         struct vm_area_struct *vma;
2021         size_t size = 0;
2022
2023         for (vma = first_vma(current, gate_vma); vma != NULL;
2024              vma = next_vma(vma, gate_vma))
2025                 size += vma_dump_size(vma, mm_flags);
2026         return size;
2027 }
2028
2029 /*
2030  * Actual dumper
2031  *
2032  * This is a two-pass process; first we find the offsets of the bits,
2033  * and then they are actually written out.  If we run out of core limit
2034  * we just truncate.
2035  */
2036 static int elf_core_dump(struct coredump_params *cprm)
2037 {
2038         int has_dumped = 0;
2039         mm_segment_t fs;
2040         int segs;
2041         size_t size = 0;
2042         struct vm_area_struct *vma, *gate_vma;
2043         struct elfhdr *elf = NULL;
2044         loff_t offset = 0, dataoff, foffset;
2045         struct elf_note_info info;
2046         struct elf_phdr *phdr4note = NULL;
2047         struct elf_shdr *shdr4extnum = NULL;
2048         Elf_Half e_phnum;
2049         elf_addr_t e_shoff;
2050
2051         /*
2052          * We no longer stop all VM operations.
2053          * 
2054          * This is because those proceses that could possibly change map_count
2055          * or the mmap / vma pages are now blocked in do_exit on current
2056          * finishing this core dump.
2057          *
2058          * Only ptrace can touch these memory addresses, but it doesn't change
2059          * the map_count or the pages allocated. So no possibility of crashing
2060          * exists while dumping the mm->vm_next areas to the core file.
2061          */
2062   
2063         /* alloc memory for large data structures: too large to be on stack */
2064         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2065         if (!elf)
2066                 goto out;
2067         /*
2068          * The number of segs are recored into ELF header as 16bit value.
2069          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2070          */
2071         segs = current->mm->map_count;
2072         segs += elf_core_extra_phdrs();
2073
2074         gate_vma = get_gate_vma(current->mm);
2075         if (gate_vma != NULL)
2076                 segs++;
2077
2078         /* for notes section */
2079         segs++;
2080
2081         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2082          * this, kernel supports extended numbering. Have a look at
2083          * include/linux/elf.h for further information. */
2084         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2085
2086         /*
2087          * Collect all the non-memory information about the process for the
2088          * notes.  This also sets up the file header.
2089          */
2090         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2091                 goto cleanup;
2092
2093         has_dumped = 1;
2094         current->flags |= PF_DUMPCORE;
2095   
2096         fs = get_fs();
2097         set_fs(KERNEL_DS);
2098
2099         offset += sizeof(*elf);                         /* Elf header */
2100         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2101         foffset = offset;
2102
2103         /* Write notes phdr entry */
2104         {
2105                 size_t sz = get_note_info_size(&info);
2106
2107                 sz += elf_coredump_extra_notes_size();
2108
2109                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2110                 if (!phdr4note)
2111                         goto end_coredump;
2112
2113                 fill_elf_note_phdr(phdr4note, sz, offset);
2114                 offset += sz;
2115         }
2116
2117         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2118
2119         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2120         offset += elf_core_extra_data_size();
2121         e_shoff = offset;
2122
2123         if (e_phnum == PN_XNUM) {
2124                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2125                 if (!shdr4extnum)
2126                         goto end_coredump;
2127                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2128         }
2129
2130         offset = dataoff;
2131
2132         size += sizeof(*elf);
2133         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2134                 goto end_coredump;
2135
2136         size += sizeof(*phdr4note);
2137         if (size > cprm->limit
2138             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2139                 goto end_coredump;
2140
2141         /* Write program headers for segments dump */
2142         for (vma = first_vma(current, gate_vma); vma != NULL;
2143                         vma = next_vma(vma, gate_vma)) {
2144                 struct elf_phdr phdr;
2145
2146                 phdr.p_type = PT_LOAD;
2147                 phdr.p_offset = offset;
2148                 phdr.p_vaddr = vma->vm_start;
2149                 phdr.p_paddr = 0;
2150                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2151                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2152                 offset += phdr.p_filesz;
2153                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2154                 if (vma->vm_flags & VM_WRITE)
2155                         phdr.p_flags |= PF_W;
2156                 if (vma->vm_flags & VM_EXEC)
2157                         phdr.p_flags |= PF_X;
2158                 phdr.p_align = ELF_EXEC_PAGESIZE;
2159
2160                 size += sizeof(phdr);
2161                 if (size > cprm->limit
2162                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2163                         goto end_coredump;
2164         }
2165
2166         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2167                 goto end_coredump;
2168
2169         /* write out the notes section */
2170         if (!write_note_info(&info, cprm->file, &foffset))
2171                 goto end_coredump;
2172
2173         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2174                 goto end_coredump;
2175
2176         /* Align to page */
2177         if (!dump_seek(cprm->file, dataoff - foffset))
2178                 goto end_coredump;
2179
2180         for (vma = first_vma(current, gate_vma); vma != NULL;
2181                         vma = next_vma(vma, gate_vma)) {
2182                 unsigned long addr;
2183                 unsigned long end;
2184
2185                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2186
2187                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2188                         struct page *page;
2189                         int stop;
2190
2191                         page = get_dump_page(addr);
2192                         if (page) {
2193                                 void *kaddr = kmap(page);
2194                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2195                                         !dump_write(cprm->file, kaddr,
2196                                                     PAGE_SIZE);
2197                                 kunmap(page);
2198                                 page_cache_release(page);
2199                         } else
2200                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2201                         if (stop)
2202                                 goto end_coredump;
2203                 }
2204         }
2205
2206         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2207                 goto end_coredump;
2208
2209         if (e_phnum == PN_XNUM) {
2210                 size += sizeof(*shdr4extnum);
2211                 if (size > cprm->limit
2212                     || !dump_write(cprm->file, shdr4extnum,
2213                                    sizeof(*shdr4extnum)))
2214                         goto end_coredump;
2215         }
2216
2217 end_coredump:
2218         set_fs(fs);
2219
2220 cleanup:
2221         free_note_info(&info);
2222         kfree(shdr4extnum);
2223         kfree(phdr4note);
2224         kfree(elf);
2225 out:
2226         return has_dumped;
2227 }
2228
2229 #endif          /* CONFIG_ELF_CORE */
2230
2231 static int __init init_elf_binfmt(void)
2232 {
2233         register_binfmt(&elf_format);
2234         return 0;
2235 }
2236
2237 static void __exit exit_elf_binfmt(void)
2238 {
2239         /* Remove the COFF and ELF loaders. */
2240         unregister_binfmt(&elf_format);
2241 }
2242
2243 core_initcall(init_elf_binfmt);
2244 module_exit(exit_elf_binfmt);
2245 MODULE_LICENSE("GPL");