2 * IBM System z Huge TLB Page Support for Kernel.
4 * Copyright IBM Corp. 2007,2016
5 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
8 #define KMSG_COMPONENT "hugetlb"
9 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
12 #include <linux/hugetlb.h>
14 static inline unsigned long __pte_to_rste(pte_t pte)
19 * Convert encoding pte bits pmd / pud bits
20 * lIR.uswrdy.p dy..R...I...wr
21 * empty 010.000000.0 -> 00..0...1...00
22 * prot-none, clean, old 111.000000.1 -> 00..1...1...00
23 * prot-none, clean, young 111.000001.1 -> 01..1...1...00
24 * prot-none, dirty, old 111.000010.1 -> 10..1...1...00
25 * prot-none, dirty, young 111.000011.1 -> 11..1...1...00
26 * read-only, clean, old 111.000100.1 -> 00..1...1...01
27 * read-only, clean, young 101.000101.1 -> 01..1...0...01
28 * read-only, dirty, old 111.000110.1 -> 10..1...1...01
29 * read-only, dirty, young 101.000111.1 -> 11..1...0...01
30 * read-write, clean, old 111.001100.1 -> 00..1...1...11
31 * read-write, clean, young 101.001101.1 -> 01..1...0...11
32 * read-write, dirty, old 110.001110.1 -> 10..0...1...11
33 * read-write, dirty, young 100.001111.1 -> 11..0...0...11
34 * HW-bits: R read-only, I invalid
35 * SW-bits: p present, y young, d dirty, r read, w write, s special,
38 if (pte_present(pte)) {
39 rste = pte_val(pte) & PAGE_MASK;
40 rste |= (pte_val(pte) & _PAGE_READ) >> 4;
41 rste |= (pte_val(pte) & _PAGE_WRITE) >> 4;
42 rste |= (pte_val(pte) & _PAGE_INVALID) >> 5;
43 rste |= (pte_val(pte) & _PAGE_PROTECT);
44 rste |= (pte_val(pte) & _PAGE_DIRTY) << 10;
45 rste |= (pte_val(pte) & _PAGE_YOUNG) << 10;
46 rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
48 rste = _SEGMENT_ENTRY_INVALID;
52 static inline pte_t __rste_to_pte(unsigned long rste)
57 if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
58 present = pud_present(__pud(rste));
60 present = pmd_present(__pmd(rste));
63 * Convert encoding pmd / pud bits pte bits
64 * dy..R...I...wr lIR.uswrdy.p
65 * empty 00..0...1...00 -> 010.000000.0
66 * prot-none, clean, old 00..1...1...00 -> 111.000000.1
67 * prot-none, clean, young 01..1...1...00 -> 111.000001.1
68 * prot-none, dirty, old 10..1...1...00 -> 111.000010.1
69 * prot-none, dirty, young 11..1...1...00 -> 111.000011.1
70 * read-only, clean, old 00..1...1...01 -> 111.000100.1
71 * read-only, clean, young 01..1...0...01 -> 101.000101.1
72 * read-only, dirty, old 10..1...1...01 -> 111.000110.1
73 * read-only, dirty, young 11..1...0...01 -> 101.000111.1
74 * read-write, clean, old 00..1...1...11 -> 111.001100.1
75 * read-write, clean, young 01..1...0...11 -> 101.001101.1
76 * read-write, dirty, old 10..0...1...11 -> 110.001110.1
77 * read-write, dirty, young 11..0...0...11 -> 100.001111.1
78 * HW-bits: R read-only, I invalid
79 * SW-bits: p present, y young, d dirty, r read, w write, s special,
83 pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
84 pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
85 pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4;
86 pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4;
87 pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5;
88 pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT);
89 pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10;
90 pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10;
91 pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
93 pte_val(pte) = _PAGE_INVALID;
97 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
98 pte_t *ptep, pte_t pte)
100 unsigned long rste = __pte_to_rste(pte);
102 /* Set correct table type for 2G hugepages */
103 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
104 rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
106 rste |= _SEGMENT_ENTRY_LARGE;
107 pte_val(*ptep) = rste;
110 pte_t huge_ptep_get(pte_t *ptep)
112 return __rste_to_pte(pte_val(*ptep));
115 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
116 unsigned long addr, pte_t *ptep)
118 pte_t pte = huge_ptep_get(ptep);
119 pmd_t *pmdp = (pmd_t *) ptep;
120 pud_t *pudp = (pud_t *) ptep;
122 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
123 pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
125 pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
129 pte_t *huge_pte_alloc(struct mm_struct *mm,
130 unsigned long addr, unsigned long sz)
136 pgdp = pgd_offset(mm, addr);
137 pudp = pud_alloc(mm, pgdp, addr);
140 return (pte_t *) pudp;
141 else if (sz == PMD_SIZE)
142 pmdp = pmd_alloc(mm, pudp, addr);
144 return (pte_t *) pmdp;
147 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
153 pgdp = pgd_offset(mm, addr);
154 if (pgd_present(*pgdp)) {
155 pudp = pud_offset(pgdp, addr);
156 if (pud_present(*pudp)) {
157 if (pud_large(*pudp))
158 return (pte_t *) pudp;
159 pmdp = pmd_offset(pudp, addr);
162 return (pte_t *) pmdp;
165 int pmd_huge(pmd_t pmd)
167 return pmd_large(pmd);
170 int pud_huge(pud_t pud)
172 return pud_large(pud);
176 follow_huge_pud(struct mm_struct *mm, unsigned long address,
177 pud_t *pud, int flags)
179 if (flags & FOLL_GET)
182 return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
185 static __init int setup_hugepagesz(char *opt)
190 size = memparse(opt, &opt);
191 if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
192 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
193 } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
194 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
196 pr_err("hugepagesz= specifies an unsupported page size %s\n",
202 __setup("hugepagesz=", setup_hugepagesz);