]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/x86/mm/dump_pagetables.c
Merge branches 'acpi-spcr', 'acpi-osi', 'acpi-bus', 'acpi-scan' and 'acpi-misc'
[karo-tx-linux.git] / arch / x86 / mm / dump_pagetables.c
1 /*
2  * Debug helper to dump the current kernel pagetables of the system
3  * so that we can see what the various memory ranges are set to.
4  *
5  * (C) Copyright 2008 Intel Corporation
6  *
7  * Author: Arjan van de Ven <arjan@linux.intel.com>
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; version 2
12  * of the License.
13  */
14
15 #include <linux/debugfs.h>
16 #include <linux/mm.h>
17 #include <linux/init.h>
18 #include <linux/sched.h>
19 #include <linux/seq_file.h>
20
21 #include <asm/kasan.h>
22 #include <asm/pgtable.h>
23
24 /*
25  * The dumper groups pagetable entries of the same type into one, and for
26  * that it needs to keep some state when walking, and flush this state
27  * when a "break" in the continuity is found.
28  */
29 struct pg_state {
30         int level;
31         pgprot_t current_prot;
32         unsigned long start_address;
33         unsigned long current_address;
34         const struct addr_marker *marker;
35         unsigned long lines;
36         bool to_dmesg;
37         bool check_wx;
38         unsigned long wx_pages;
39 };
40
41 struct addr_marker {
42         unsigned long start_address;
43         const char *name;
44         unsigned long max_lines;
45 };
46
47 /* indices for address_markers; keep sync'd w/ address_markers below */
48 enum address_markers_idx {
49         USER_SPACE_NR = 0,
50 #ifdef CONFIG_X86_64
51         KERNEL_SPACE_NR,
52         LOW_KERNEL_NR,
53         VMALLOC_START_NR,
54         VMEMMAP_START_NR,
55 #ifdef CONFIG_KASAN
56         KASAN_SHADOW_START_NR,
57         KASAN_SHADOW_END_NR,
58 #endif
59 # ifdef CONFIG_X86_ESPFIX64
60         ESPFIX_START_NR,
61 # endif
62         HIGH_KERNEL_NR,
63         MODULES_VADDR_NR,
64         MODULES_END_NR,
65 #else
66         KERNEL_SPACE_NR,
67         VMALLOC_START_NR,
68         VMALLOC_END_NR,
69 # ifdef CONFIG_HIGHMEM
70         PKMAP_BASE_NR,
71 # endif
72         FIXADDR_START_NR,
73 #endif
74 };
75
76 /* Address space markers hints */
77 static struct addr_marker address_markers[] = {
78         { 0, "User Space" },
79 #ifdef CONFIG_X86_64
80         { 0x8000000000000000UL, "Kernel Space" },
81         { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
82         { 0/* VMALLOC_START */, "vmalloc() Area" },
83         { 0/* VMEMMAP_START */, "Vmemmap" },
84 #ifdef CONFIG_KASAN
85         { KASAN_SHADOW_START,   "KASAN shadow" },
86         { KASAN_SHADOW_END,     "KASAN shadow end" },
87 #endif
88 # ifdef CONFIG_X86_ESPFIX64
89         { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
90 # endif
91 # ifdef CONFIG_EFI
92         { EFI_VA_END,           "EFI Runtime Services" },
93 # endif
94         { __START_KERNEL_map,   "High Kernel Mapping" },
95         { MODULES_VADDR,        "Modules" },
96         { MODULES_END,          "End Modules" },
97 #else
98         { PAGE_OFFSET,          "Kernel Mapping" },
99         { 0/* VMALLOC_START */, "vmalloc() Area" },
100         { 0/*VMALLOC_END*/,     "vmalloc() End" },
101 # ifdef CONFIG_HIGHMEM
102         { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
103 # endif
104         { 0/*FIXADDR_START*/,   "Fixmap Area" },
105 #endif
106         { -1, NULL }            /* End of list */
107 };
108
109 /* Multipliers for offsets within the PTEs */
110 #define PTE_LEVEL_MULT (PAGE_SIZE)
111 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
112 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
113 #define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
114 #define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT)
115
116 #define pt_dump_seq_printf(m, to_dmesg, fmt, args...)           \
117 ({                                                              \
118         if (to_dmesg)                                   \
119                 printk(KERN_INFO fmt, ##args);                  \
120         else                                                    \
121                 if (m)                                          \
122                         seq_printf(m, fmt, ##args);             \
123 })
124
125 #define pt_dump_cont_printf(m, to_dmesg, fmt, args...)          \
126 ({                                                              \
127         if (to_dmesg)                                   \
128                 printk(KERN_CONT fmt, ##args);                  \
129         else                                                    \
130                 if (m)                                          \
131                         seq_printf(m, fmt, ##args);             \
132 })
133
134 /*
135  * Print a readable form of a pgprot_t to the seq_file
136  */
137 static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
138 {
139         pgprotval_t pr = pgprot_val(prot);
140         static const char * const level_name[] =
141                 { "cr3", "pgd", "pud", "pmd", "pte" };
142
143         if (!pgprot_val(prot)) {
144                 /* Not present */
145                 pt_dump_cont_printf(m, dmsg, "                              ");
146         } else {
147                 if (pr & _PAGE_USER)
148                         pt_dump_cont_printf(m, dmsg, "USR ");
149                 else
150                         pt_dump_cont_printf(m, dmsg, "    ");
151                 if (pr & _PAGE_RW)
152                         pt_dump_cont_printf(m, dmsg, "RW ");
153                 else
154                         pt_dump_cont_printf(m, dmsg, "ro ");
155                 if (pr & _PAGE_PWT)
156                         pt_dump_cont_printf(m, dmsg, "PWT ");
157                 else
158                         pt_dump_cont_printf(m, dmsg, "    ");
159                 if (pr & _PAGE_PCD)
160                         pt_dump_cont_printf(m, dmsg, "PCD ");
161                 else
162                         pt_dump_cont_printf(m, dmsg, "    ");
163
164                 /* Bit 7 has a different meaning on level 3 vs 4 */
165                 if (level <= 3 && pr & _PAGE_PSE)
166                         pt_dump_cont_printf(m, dmsg, "PSE ");
167                 else
168                         pt_dump_cont_printf(m, dmsg, "    ");
169                 if ((level == 4 && pr & _PAGE_PAT) ||
170                     ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE))
171                         pt_dump_cont_printf(m, dmsg, "PAT ");
172                 else
173                         pt_dump_cont_printf(m, dmsg, "    ");
174                 if (pr & _PAGE_GLOBAL)
175                         pt_dump_cont_printf(m, dmsg, "GLB ");
176                 else
177                         pt_dump_cont_printf(m, dmsg, "    ");
178                 if (pr & _PAGE_NX)
179                         pt_dump_cont_printf(m, dmsg, "NX ");
180                 else
181                         pt_dump_cont_printf(m, dmsg, "x  ");
182         }
183         pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]);
184 }
185
186 /*
187  * On 64 bits, sign-extend the 48 bit address to 64 bit
188  */
189 static unsigned long normalize_addr(unsigned long u)
190 {
191 #ifdef CONFIG_X86_64
192         return (signed long)(u << 16) >> 16;
193 #else
194         return u;
195 #endif
196 }
197
198 /*
199  * This function gets called on a break in a continuous series
200  * of PTE entries; the next one is different so we need to
201  * print what we collected so far.
202  */
203 static void note_page(struct seq_file *m, struct pg_state *st,
204                       pgprot_t new_prot, int level)
205 {
206         pgprotval_t prot, cur;
207         static const char units[] = "BKMGTPE";
208
209         /*
210          * If we have a "break" in the series, we need to flush the state that
211          * we have now. "break" is either changing perms, levels or
212          * address space marker.
213          */
214         prot = pgprot_val(new_prot);
215         cur = pgprot_val(st->current_prot);
216
217         if (!st->level) {
218                 /* First entry */
219                 st->current_prot = new_prot;
220                 st->level = level;
221                 st->marker = address_markers;
222                 st->lines = 0;
223                 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
224                                    st->marker->name);
225         } else if (prot != cur || level != st->level ||
226                    st->current_address >= st->marker[1].start_address) {
227                 const char *unit = units;
228                 unsigned long delta;
229                 int width = sizeof(unsigned long) * 2;
230                 pgprotval_t pr = pgprot_val(st->current_prot);
231
232                 if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
233                         WARN_ONCE(1,
234                                   "x86/mm: Found insecure W+X mapping at address %p/%pS\n",
235                                   (void *)st->start_address,
236                                   (void *)st->start_address);
237                         st->wx_pages += (st->current_address -
238                                          st->start_address) / PAGE_SIZE;
239                 }
240
241                 /*
242                  * Now print the actual finished series
243                  */
244                 if (!st->marker->max_lines ||
245                     st->lines < st->marker->max_lines) {
246                         pt_dump_seq_printf(m, st->to_dmesg,
247                                            "0x%0*lx-0x%0*lx   ",
248                                            width, st->start_address,
249                                            width, st->current_address);
250
251                         delta = st->current_address - st->start_address;
252                         while (!(delta & 1023) && unit[1]) {
253                                 delta >>= 10;
254                                 unit++;
255                         }
256                         pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ",
257                                             delta, *unit);
258                         printk_prot(m, st->current_prot, st->level,
259                                     st->to_dmesg);
260                 }
261                 st->lines++;
262
263                 /*
264                  * We print markers for special areas of address space,
265                  * such as the start of vmalloc space etc.
266                  * This helps in the interpretation.
267                  */
268                 if (st->current_address >= st->marker[1].start_address) {
269                         if (st->marker->max_lines &&
270                             st->lines > st->marker->max_lines) {
271                                 unsigned long nskip =
272                                         st->lines - st->marker->max_lines;
273                                 pt_dump_seq_printf(m, st->to_dmesg,
274                                                    "... %lu entr%s skipped ... \n",
275                                                    nskip,
276                                                    nskip == 1 ? "y" : "ies");
277                         }
278                         st->marker++;
279                         st->lines = 0;
280                         pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
281                                            st->marker->name);
282                 }
283
284                 st->start_address = st->current_address;
285                 st->current_prot = new_prot;
286                 st->level = level;
287         }
288 }
289
290 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P)
291 {
292         int i;
293         pte_t *start;
294         pgprotval_t prot;
295
296         start = (pte_t *)pmd_page_vaddr(addr);
297         for (i = 0; i < PTRS_PER_PTE; i++) {
298                 prot = pte_flags(*start);
299                 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
300                 note_page(m, st, __pgprot(prot), 4);
301                 start++;
302         }
303 }
304
305 #if PTRS_PER_PMD > 1
306
307 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
308 {
309         int i;
310         pmd_t *start;
311         pgprotval_t prot;
312
313         start = (pmd_t *)pud_page_vaddr(addr);
314         for (i = 0; i < PTRS_PER_PMD; i++) {
315                 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
316                 if (!pmd_none(*start)) {
317                         if (pmd_large(*start) || !pmd_present(*start)) {
318                                 prot = pmd_flags(*start);
319                                 note_page(m, st, __pgprot(prot), 3);
320                         } else {
321                                 walk_pte_level(m, st, *start,
322                                                P + i * PMD_LEVEL_MULT);
323                         }
324                 } else
325                         note_page(m, st, __pgprot(0), 3);
326                 start++;
327         }
328 }
329
330 #else
331 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
332 #define pud_large(a) pmd_large(__pmd(pud_val(a)))
333 #define pud_none(a)  pmd_none(__pmd(pud_val(a)))
334 #endif
335
336 #if PTRS_PER_PUD > 1
337
338 /*
339  * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
340  * KASAN fills page tables with the same values. Since there is no
341  * point in checking page table more than once we just skip repeated
342  * entries. This saves us dozens of seconds during boot.
343  */
344 static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
345 {
346         return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
347 }
348
349 static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
350 {
351         int i;
352         pud_t *start;
353         pgprotval_t prot;
354         pud_t *prev_pud = NULL;
355
356         start = (pud_t *)p4d_page_vaddr(addr);
357
358         for (i = 0; i < PTRS_PER_PUD; i++) {
359                 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
360                 if (!pud_none(*start) &&
361                     !pud_already_checked(prev_pud, start, st->check_wx)) {
362                         if (pud_large(*start) || !pud_present(*start)) {
363                                 prot = pud_flags(*start);
364                                 note_page(m, st, __pgprot(prot), 2);
365                         } else {
366                                 walk_pmd_level(m, st, *start,
367                                                P + i * PUD_LEVEL_MULT);
368                         }
369                 } else
370                         note_page(m, st, __pgprot(0), 2);
371
372                 prev_pud = start;
373                 start++;
374         }
375 }
376
377 #else
378 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p)
379 #define p4d_large(a) pud_large(__pud(p4d_val(a)))
380 #define p4d_none(a)  pud_none(__pud(p4d_val(a)))
381 #endif
382
383 #if PTRS_PER_P4D > 1
384
385 static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
386 {
387         int i;
388         p4d_t *start;
389         pgprotval_t prot;
390
391         start = (p4d_t *)pgd_page_vaddr(addr);
392
393         for (i = 0; i < PTRS_PER_P4D; i++) {
394                 st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
395                 if (!p4d_none(*start)) {
396                         if (p4d_large(*start) || !p4d_present(*start)) {
397                                 prot = p4d_flags(*start);
398                                 note_page(m, st, __pgprot(prot), 2);
399                         } else {
400                                 walk_pud_level(m, st, *start,
401                                                P + i * P4D_LEVEL_MULT);
402                         }
403                 } else
404                         note_page(m, st, __pgprot(0), 2);
405
406                 start++;
407         }
408 }
409
410 #else
411 #define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
412 #define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
413 #define pgd_none(a)  p4d_none(__p4d(pgd_val(a)))
414 #endif
415
416 static inline bool is_hypervisor_range(int idx)
417 {
418 #ifdef CONFIG_X86_64
419         /*
420          * ffff800000000000 - ffff87ffffffffff is reserved for
421          * the hypervisor.
422          */
423         return  (idx >= pgd_index(__PAGE_OFFSET) - 16) &&
424                 (idx <  pgd_index(__PAGE_OFFSET));
425 #else
426         return false;
427 #endif
428 }
429
430 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
431                                        bool checkwx)
432 {
433 #ifdef CONFIG_X86_64
434         pgd_t *start = (pgd_t *) &init_top_pgt;
435 #else
436         pgd_t *start = swapper_pg_dir;
437 #endif
438         pgprotval_t prot;
439         int i;
440         struct pg_state st = {};
441
442         if (pgd) {
443                 start = pgd;
444                 st.to_dmesg = true;
445         }
446
447         st.check_wx = checkwx;
448         if (checkwx)
449                 st.wx_pages = 0;
450
451         for (i = 0; i < PTRS_PER_PGD; i++) {
452                 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
453                 if (!pgd_none(*start) && !is_hypervisor_range(i)) {
454                         if (pgd_large(*start) || !pgd_present(*start)) {
455                                 prot = pgd_flags(*start);
456                                 note_page(m, &st, __pgprot(prot), 1);
457                         } else {
458                                 walk_p4d_level(m, &st, *start,
459                                                i * PGD_LEVEL_MULT);
460                         }
461                 } else
462                         note_page(m, &st, __pgprot(0), 1);
463
464                 cond_resched();
465                 start++;
466         }
467
468         /* Flush out the last page */
469         st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
470         note_page(m, &st, __pgprot(0), 0);
471         if (!checkwx)
472                 return;
473         if (st.wx_pages)
474                 pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
475                         st.wx_pages);
476         else
477                 pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
478 }
479
480 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
481 {
482         ptdump_walk_pgd_level_core(m, pgd, false);
483 }
484 EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
485
486 void ptdump_walk_pgd_level_checkwx(void)
487 {
488         ptdump_walk_pgd_level_core(NULL, NULL, true);
489 }
490
491 static int __init pt_dump_init(void)
492 {
493         /*
494          * Various markers are not compile-time constants, so assign them
495          * here.
496          */
497 #ifdef CONFIG_X86_64
498         address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
499         address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
500         address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
501 #endif
502 #ifdef CONFIG_X86_32
503         address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
504         address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
505 # ifdef CONFIG_HIGHMEM
506         address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
507 # endif
508         address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
509 #endif
510
511         return 0;
512 }
513 __initcall(pt_dump_init);