1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Page table handling routines for radix page table.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #define pr_fmt(fmt) "radix-mmu: " fmt
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <linux/io.h>
11*4882a593Smuzhiyun #include <linux/kernel.h>
12*4882a593Smuzhiyun #include <linux/sched/mm.h>
13*4882a593Smuzhiyun #include <linux/memblock.h>
14*4882a593Smuzhiyun #include <linux/of_fdt.h>
15*4882a593Smuzhiyun #include <linux/mm.h>
16*4882a593Smuzhiyun #include <linux/hugetlb.h>
17*4882a593Smuzhiyun #include <linux/string_helpers.h>
18*4882a593Smuzhiyun #include <linux/memory.h>
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun #include <asm/pgalloc.h>
21*4882a593Smuzhiyun #include <asm/mmu_context.h>
22*4882a593Smuzhiyun #include <asm/dma.h>
23*4882a593Smuzhiyun #include <asm/machdep.h>
24*4882a593Smuzhiyun #include <asm/mmu.h>
25*4882a593Smuzhiyun #include <asm/firmware.h>
26*4882a593Smuzhiyun #include <asm/powernv.h>
27*4882a593Smuzhiyun #include <asm/sections.h>
28*4882a593Smuzhiyun #include <asm/smp.h>
29*4882a593Smuzhiyun #include <asm/trace.h>
30*4882a593Smuzhiyun #include <asm/uaccess.h>
31*4882a593Smuzhiyun #include <asm/ultravisor.h>
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun #include <trace/events/thp.h>
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun unsigned int mmu_pid_bits;
36*4882a593Smuzhiyun unsigned int mmu_base_pid;
37*4882a593Smuzhiyun unsigned long radix_mem_block_size __ro_after_init;
38*4882a593Smuzhiyun
early_alloc_pgtable(unsigned long size,int nid,unsigned long region_start,unsigned long region_end)39*4882a593Smuzhiyun static __ref void *early_alloc_pgtable(unsigned long size, int nid,
40*4882a593Smuzhiyun unsigned long region_start, unsigned long region_end)
41*4882a593Smuzhiyun {
42*4882a593Smuzhiyun phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
43*4882a593Smuzhiyun phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
44*4882a593Smuzhiyun void *ptr;
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun if (region_start)
47*4882a593Smuzhiyun min_addr = region_start;
48*4882a593Smuzhiyun if (region_end)
49*4882a593Smuzhiyun max_addr = region_end;
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun if (!ptr)
54*4882a593Smuzhiyun panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n",
55*4882a593Smuzhiyun __func__, size, size, nid, &min_addr, &max_addr);
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun return ptr;
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun /*
61*4882a593Smuzhiyun * When allocating pud or pmd pointers, we allocate a complete page
62*4882a593Smuzhiyun * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This
63*4882a593Smuzhiyun * is to ensure that the page obtained from the memblock allocator
64*4882a593Smuzhiyun * can be completely used as page table page and can be freed
65*4882a593Smuzhiyun * correctly when the page table entries are removed.
66*4882a593Smuzhiyun */
early_map_kernel_page(unsigned long ea,unsigned long pa,pgprot_t flags,unsigned int map_page_size,int nid,unsigned long region_start,unsigned long region_end)67*4882a593Smuzhiyun static int early_map_kernel_page(unsigned long ea, unsigned long pa,
68*4882a593Smuzhiyun pgprot_t flags,
69*4882a593Smuzhiyun unsigned int map_page_size,
70*4882a593Smuzhiyun int nid,
71*4882a593Smuzhiyun unsigned long region_start, unsigned long region_end)
72*4882a593Smuzhiyun {
73*4882a593Smuzhiyun unsigned long pfn = pa >> PAGE_SHIFT;
74*4882a593Smuzhiyun pgd_t *pgdp;
75*4882a593Smuzhiyun p4d_t *p4dp;
76*4882a593Smuzhiyun pud_t *pudp;
77*4882a593Smuzhiyun pmd_t *pmdp;
78*4882a593Smuzhiyun pte_t *ptep;
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun pgdp = pgd_offset_k(ea);
81*4882a593Smuzhiyun p4dp = p4d_offset(pgdp, ea);
82*4882a593Smuzhiyun if (p4d_none(*p4dp)) {
83*4882a593Smuzhiyun pudp = early_alloc_pgtable(PAGE_SIZE, nid,
84*4882a593Smuzhiyun region_start, region_end);
85*4882a593Smuzhiyun p4d_populate(&init_mm, p4dp, pudp);
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun pudp = pud_offset(p4dp, ea);
88*4882a593Smuzhiyun if (map_page_size == PUD_SIZE) {
89*4882a593Smuzhiyun ptep = (pte_t *)pudp;
90*4882a593Smuzhiyun goto set_the_pte;
91*4882a593Smuzhiyun }
92*4882a593Smuzhiyun if (pud_none(*pudp)) {
93*4882a593Smuzhiyun pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start,
94*4882a593Smuzhiyun region_end);
95*4882a593Smuzhiyun pud_populate(&init_mm, pudp, pmdp);
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun pmdp = pmd_offset(pudp, ea);
98*4882a593Smuzhiyun if (map_page_size == PMD_SIZE) {
99*4882a593Smuzhiyun ptep = pmdp_ptep(pmdp);
100*4882a593Smuzhiyun goto set_the_pte;
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun if (!pmd_present(*pmdp)) {
103*4882a593Smuzhiyun ptep = early_alloc_pgtable(PAGE_SIZE, nid,
104*4882a593Smuzhiyun region_start, region_end);
105*4882a593Smuzhiyun pmd_populate_kernel(&init_mm, pmdp, ptep);
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun ptep = pte_offset_kernel(pmdp, ea);
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun set_the_pte:
110*4882a593Smuzhiyun set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
111*4882a593Smuzhiyun asm volatile("ptesync": : :"memory");
112*4882a593Smuzhiyun return 0;
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun /*
116*4882a593Smuzhiyun * nid, region_start, and region_end are hints to try to place the page
117*4882a593Smuzhiyun * table memory in the same node or region.
118*4882a593Smuzhiyun */
__map_kernel_page(unsigned long ea,unsigned long pa,pgprot_t flags,unsigned int map_page_size,int nid,unsigned long region_start,unsigned long region_end)119*4882a593Smuzhiyun static int __map_kernel_page(unsigned long ea, unsigned long pa,
120*4882a593Smuzhiyun pgprot_t flags,
121*4882a593Smuzhiyun unsigned int map_page_size,
122*4882a593Smuzhiyun int nid,
123*4882a593Smuzhiyun unsigned long region_start, unsigned long region_end)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun unsigned long pfn = pa >> PAGE_SHIFT;
126*4882a593Smuzhiyun pgd_t *pgdp;
127*4882a593Smuzhiyun p4d_t *p4dp;
128*4882a593Smuzhiyun pud_t *pudp;
129*4882a593Smuzhiyun pmd_t *pmdp;
130*4882a593Smuzhiyun pte_t *ptep;
131*4882a593Smuzhiyun /*
132*4882a593Smuzhiyun * Make sure task size is correct as per the max adddr
133*4882a593Smuzhiyun */
134*4882a593Smuzhiyun BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun #ifdef CONFIG_PPC_64K_PAGES
137*4882a593Smuzhiyun BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
138*4882a593Smuzhiyun #endif
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun if (unlikely(!slab_is_available()))
141*4882a593Smuzhiyun return early_map_kernel_page(ea, pa, flags, map_page_size,
142*4882a593Smuzhiyun nid, region_start, region_end);
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun /*
145*4882a593Smuzhiyun * Should make page table allocation functions be able to take a
146*4882a593Smuzhiyun * node, so we can place kernel page tables on the right nodes after
147*4882a593Smuzhiyun * boot.
148*4882a593Smuzhiyun */
149*4882a593Smuzhiyun pgdp = pgd_offset_k(ea);
150*4882a593Smuzhiyun p4dp = p4d_offset(pgdp, ea);
151*4882a593Smuzhiyun pudp = pud_alloc(&init_mm, p4dp, ea);
152*4882a593Smuzhiyun if (!pudp)
153*4882a593Smuzhiyun return -ENOMEM;
154*4882a593Smuzhiyun if (map_page_size == PUD_SIZE) {
155*4882a593Smuzhiyun ptep = (pte_t *)pudp;
156*4882a593Smuzhiyun goto set_the_pte;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun pmdp = pmd_alloc(&init_mm, pudp, ea);
159*4882a593Smuzhiyun if (!pmdp)
160*4882a593Smuzhiyun return -ENOMEM;
161*4882a593Smuzhiyun if (map_page_size == PMD_SIZE) {
162*4882a593Smuzhiyun ptep = pmdp_ptep(pmdp);
163*4882a593Smuzhiyun goto set_the_pte;
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun ptep = pte_alloc_kernel(pmdp, ea);
166*4882a593Smuzhiyun if (!ptep)
167*4882a593Smuzhiyun return -ENOMEM;
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun set_the_pte:
170*4882a593Smuzhiyun set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
171*4882a593Smuzhiyun asm volatile("ptesync": : :"memory");
172*4882a593Smuzhiyun return 0;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun
radix__map_kernel_page(unsigned long ea,unsigned long pa,pgprot_t flags,unsigned int map_page_size)175*4882a593Smuzhiyun int radix__map_kernel_page(unsigned long ea, unsigned long pa,
176*4882a593Smuzhiyun pgprot_t flags,
177*4882a593Smuzhiyun unsigned int map_page_size)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun #ifdef CONFIG_STRICT_KERNEL_RWX
radix__change_memory_range(unsigned long start,unsigned long end,unsigned long clear)183*4882a593Smuzhiyun void radix__change_memory_range(unsigned long start, unsigned long end,
184*4882a593Smuzhiyun unsigned long clear)
185*4882a593Smuzhiyun {
186*4882a593Smuzhiyun unsigned long idx;
187*4882a593Smuzhiyun pgd_t *pgdp;
188*4882a593Smuzhiyun p4d_t *p4dp;
189*4882a593Smuzhiyun pud_t *pudp;
190*4882a593Smuzhiyun pmd_t *pmdp;
191*4882a593Smuzhiyun pte_t *ptep;
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun start = ALIGN_DOWN(start, PAGE_SIZE);
194*4882a593Smuzhiyun end = PAGE_ALIGN(end); // aligns up
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
197*4882a593Smuzhiyun start, end, clear);
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun for (idx = start; idx < end; idx += PAGE_SIZE) {
200*4882a593Smuzhiyun pgdp = pgd_offset_k(idx);
201*4882a593Smuzhiyun p4dp = p4d_offset(pgdp, idx);
202*4882a593Smuzhiyun pudp = pud_alloc(&init_mm, p4dp, idx);
203*4882a593Smuzhiyun if (!pudp)
204*4882a593Smuzhiyun continue;
205*4882a593Smuzhiyun if (pud_is_leaf(*pudp)) {
206*4882a593Smuzhiyun ptep = (pte_t *)pudp;
207*4882a593Smuzhiyun goto update_the_pte;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun pmdp = pmd_alloc(&init_mm, pudp, idx);
210*4882a593Smuzhiyun if (!pmdp)
211*4882a593Smuzhiyun continue;
212*4882a593Smuzhiyun if (pmd_is_leaf(*pmdp)) {
213*4882a593Smuzhiyun ptep = pmdp_ptep(pmdp);
214*4882a593Smuzhiyun goto update_the_pte;
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun ptep = pte_alloc_kernel(pmdp, idx);
217*4882a593Smuzhiyun if (!ptep)
218*4882a593Smuzhiyun continue;
219*4882a593Smuzhiyun update_the_pte:
220*4882a593Smuzhiyun radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun radix__flush_tlb_kernel_range(start, end);
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun
radix__mark_rodata_ro(void)226*4882a593Smuzhiyun void radix__mark_rodata_ro(void)
227*4882a593Smuzhiyun {
228*4882a593Smuzhiyun unsigned long start, end;
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun start = (unsigned long)_stext;
231*4882a593Smuzhiyun end = (unsigned long)__init_begin;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun radix__change_memory_range(start, end, _PAGE_WRITE);
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
radix__mark_initmem_nx(void)236*4882a593Smuzhiyun void radix__mark_initmem_nx(void)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun unsigned long start = (unsigned long)__init_begin;
239*4882a593Smuzhiyun unsigned long end = (unsigned long)__init_end;
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun radix__change_memory_range(start, end, _PAGE_EXEC);
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun #endif /* CONFIG_STRICT_KERNEL_RWX */
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun static inline void __meminit
print_mapping(unsigned long start,unsigned long end,unsigned long size,bool exec)246*4882a593Smuzhiyun print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
247*4882a593Smuzhiyun {
248*4882a593Smuzhiyun char buf[10];
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun if (end <= start)
251*4882a593Smuzhiyun return;
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
256*4882a593Smuzhiyun exec ? " (exec)" : "");
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
next_boundary(unsigned long addr,unsigned long end)259*4882a593Smuzhiyun static unsigned long next_boundary(unsigned long addr, unsigned long end)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun #ifdef CONFIG_STRICT_KERNEL_RWX
262*4882a593Smuzhiyun if (addr < __pa_symbol(__init_begin))
263*4882a593Smuzhiyun return __pa_symbol(__init_begin);
264*4882a593Smuzhiyun #endif
265*4882a593Smuzhiyun return end;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun
create_physical_mapping(unsigned long start,unsigned long end,unsigned long max_mapping_size,int nid,pgprot_t _prot)268*4882a593Smuzhiyun static int __meminit create_physical_mapping(unsigned long start,
269*4882a593Smuzhiyun unsigned long end,
270*4882a593Smuzhiyun unsigned long max_mapping_size,
271*4882a593Smuzhiyun int nid, pgprot_t _prot)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun unsigned long vaddr, addr, mapping_size = 0;
274*4882a593Smuzhiyun bool prev_exec, exec = false;
275*4882a593Smuzhiyun pgprot_t prot;
276*4882a593Smuzhiyun int psize;
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun start = ALIGN(start, PAGE_SIZE);
279*4882a593Smuzhiyun end = ALIGN_DOWN(end, PAGE_SIZE);
280*4882a593Smuzhiyun for (addr = start; addr < end; addr += mapping_size) {
281*4882a593Smuzhiyun unsigned long gap, previous_size;
282*4882a593Smuzhiyun int rc;
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun gap = next_boundary(addr, end) - addr;
285*4882a593Smuzhiyun if (gap > max_mapping_size)
286*4882a593Smuzhiyun gap = max_mapping_size;
287*4882a593Smuzhiyun previous_size = mapping_size;
288*4882a593Smuzhiyun prev_exec = exec;
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
291*4882a593Smuzhiyun mmu_psize_defs[MMU_PAGE_1G].shift) {
292*4882a593Smuzhiyun mapping_size = PUD_SIZE;
293*4882a593Smuzhiyun psize = MMU_PAGE_1G;
294*4882a593Smuzhiyun } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
295*4882a593Smuzhiyun mmu_psize_defs[MMU_PAGE_2M].shift) {
296*4882a593Smuzhiyun mapping_size = PMD_SIZE;
297*4882a593Smuzhiyun psize = MMU_PAGE_2M;
298*4882a593Smuzhiyun } else {
299*4882a593Smuzhiyun mapping_size = PAGE_SIZE;
300*4882a593Smuzhiyun psize = mmu_virtual_psize;
301*4882a593Smuzhiyun }
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun vaddr = (unsigned long)__va(addr);
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
306*4882a593Smuzhiyun overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
307*4882a593Smuzhiyun prot = PAGE_KERNEL_X;
308*4882a593Smuzhiyun exec = true;
309*4882a593Smuzhiyun } else {
310*4882a593Smuzhiyun prot = _prot;
311*4882a593Smuzhiyun exec = false;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun if (mapping_size != previous_size || exec != prev_exec) {
315*4882a593Smuzhiyun print_mapping(start, addr, previous_size, prev_exec);
316*4882a593Smuzhiyun start = addr;
317*4882a593Smuzhiyun }
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
320*4882a593Smuzhiyun if (rc)
321*4882a593Smuzhiyun return rc;
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun update_page_count(psize, 1);
324*4882a593Smuzhiyun }
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun print_mapping(start, addr, mapping_size, exec);
327*4882a593Smuzhiyun return 0;
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun
radix_init_pgtable(void)330*4882a593Smuzhiyun static void __init radix_init_pgtable(void)
331*4882a593Smuzhiyun {
332*4882a593Smuzhiyun unsigned long rts_field;
333*4882a593Smuzhiyun phys_addr_t start, end;
334*4882a593Smuzhiyun u64 i;
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun /* We don't support slb for radix */
337*4882a593Smuzhiyun mmu_slb_size = 0;
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun /*
340*4882a593Smuzhiyun * Create the linear mapping
341*4882a593Smuzhiyun */
342*4882a593Smuzhiyun for_each_mem_range(i, &start, &end) {
343*4882a593Smuzhiyun /*
344*4882a593Smuzhiyun * The memblock allocator is up at this point, so the
345*4882a593Smuzhiyun * page tables will be allocated within the range. No
346*4882a593Smuzhiyun * need or a node (which we don't have yet).
347*4882a593Smuzhiyun */
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun if (end >= RADIX_VMALLOC_START) {
350*4882a593Smuzhiyun pr_warn("Outside the supported range\n");
351*4882a593Smuzhiyun continue;
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun WARN_ON(create_physical_mapping(start, end,
355*4882a593Smuzhiyun radix_mem_block_size,
356*4882a593Smuzhiyun -1, PAGE_KERNEL));
357*4882a593Smuzhiyun }
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun /* Find out how many PID bits are supported */
360*4882a593Smuzhiyun if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
361*4882a593Smuzhiyun if (!mmu_pid_bits)
362*4882a593Smuzhiyun mmu_pid_bits = 20;
363*4882a593Smuzhiyun mmu_base_pid = 1;
364*4882a593Smuzhiyun } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
365*4882a593Smuzhiyun if (!mmu_pid_bits)
366*4882a593Smuzhiyun mmu_pid_bits = 20;
367*4882a593Smuzhiyun #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
368*4882a593Smuzhiyun /*
369*4882a593Smuzhiyun * When KVM is possible, we only use the top half of the
370*4882a593Smuzhiyun * PID space to avoid collisions between host and guest PIDs
371*4882a593Smuzhiyun * which can cause problems due to prefetch when exiting the
372*4882a593Smuzhiyun * guest with AIL=3
373*4882a593Smuzhiyun */
374*4882a593Smuzhiyun mmu_base_pid = 1 << (mmu_pid_bits - 1);
375*4882a593Smuzhiyun #else
376*4882a593Smuzhiyun mmu_base_pid = 1;
377*4882a593Smuzhiyun #endif
378*4882a593Smuzhiyun } else {
379*4882a593Smuzhiyun /* The guest uses the bottom half of the PID space */
380*4882a593Smuzhiyun if (!mmu_pid_bits)
381*4882a593Smuzhiyun mmu_pid_bits = 19;
382*4882a593Smuzhiyun mmu_base_pid = 1;
383*4882a593Smuzhiyun }
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun /*
386*4882a593Smuzhiyun * Allocate Partition table and process table for the
387*4882a593Smuzhiyun * host.
388*4882a593Smuzhiyun */
389*4882a593Smuzhiyun BUG_ON(PRTB_SIZE_SHIFT > 36);
390*4882a593Smuzhiyun process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
391*4882a593Smuzhiyun /*
392*4882a593Smuzhiyun * Fill in the process table.
393*4882a593Smuzhiyun */
394*4882a593Smuzhiyun rts_field = radix__get_tree_size();
395*4882a593Smuzhiyun process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
396*4882a593Smuzhiyun
397*4882a593Smuzhiyun /*
398*4882a593Smuzhiyun * The init_mm context is given the first available (non-zero) PID,
399*4882a593Smuzhiyun * which is the "guard PID" and contains no page table. PIDR should
400*4882a593Smuzhiyun * never be set to zero because that duplicates the kernel address
401*4882a593Smuzhiyun * space at the 0x0... offset (quadrant 0)!
402*4882a593Smuzhiyun *
403*4882a593Smuzhiyun * An arbitrary PID that may later be allocated by the PID allocator
404*4882a593Smuzhiyun * for userspace processes must not be used either, because that
405*4882a593Smuzhiyun * would cause stale user mappings for that PID on CPUs outside of
406*4882a593Smuzhiyun * the TLB invalidation scheme (because it won't be in mm_cpumask).
407*4882a593Smuzhiyun *
408*4882a593Smuzhiyun * So permanently carve out one PID for the purpose of a guard PID.
409*4882a593Smuzhiyun */
410*4882a593Smuzhiyun init_mm.context.id = mmu_base_pid;
411*4882a593Smuzhiyun mmu_base_pid++;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun
radix_init_partition_table(void)414*4882a593Smuzhiyun static void __init radix_init_partition_table(void)
415*4882a593Smuzhiyun {
416*4882a593Smuzhiyun unsigned long rts_field, dw0, dw1;
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun mmu_partition_table_init();
419*4882a593Smuzhiyun rts_field = radix__get_tree_size();
420*4882a593Smuzhiyun dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
421*4882a593Smuzhiyun dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
422*4882a593Smuzhiyun mmu_partition_table_set_entry(0, dw0, dw1, false);
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun pr_info("Initializing Radix MMU\n");
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun
get_idx_from_shift(unsigned int shift)427*4882a593Smuzhiyun static int __init get_idx_from_shift(unsigned int shift)
428*4882a593Smuzhiyun {
429*4882a593Smuzhiyun int idx = -1;
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun switch (shift) {
432*4882a593Smuzhiyun case 0xc:
433*4882a593Smuzhiyun idx = MMU_PAGE_4K;
434*4882a593Smuzhiyun break;
435*4882a593Smuzhiyun case 0x10:
436*4882a593Smuzhiyun idx = MMU_PAGE_64K;
437*4882a593Smuzhiyun break;
438*4882a593Smuzhiyun case 0x15:
439*4882a593Smuzhiyun idx = MMU_PAGE_2M;
440*4882a593Smuzhiyun break;
441*4882a593Smuzhiyun case 0x1e:
442*4882a593Smuzhiyun idx = MMU_PAGE_1G;
443*4882a593Smuzhiyun break;
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun return idx;
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun
radix_dt_scan_page_sizes(unsigned long node,const char * uname,int depth,void * data)448*4882a593Smuzhiyun static int __init radix_dt_scan_page_sizes(unsigned long node,
449*4882a593Smuzhiyun const char *uname, int depth,
450*4882a593Smuzhiyun void *data)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun int size = 0;
453*4882a593Smuzhiyun int shift, idx;
454*4882a593Smuzhiyun unsigned int ap;
455*4882a593Smuzhiyun const __be32 *prop;
456*4882a593Smuzhiyun const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun /* We are scanning "cpu" nodes only */
459*4882a593Smuzhiyun if (type == NULL || strcmp(type, "cpu") != 0)
460*4882a593Smuzhiyun return 0;
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun /* Find MMU PID size */
463*4882a593Smuzhiyun prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
464*4882a593Smuzhiyun if (prop && size == 4)
465*4882a593Smuzhiyun mmu_pid_bits = be32_to_cpup(prop);
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun /* Grab page size encodings */
468*4882a593Smuzhiyun prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
469*4882a593Smuzhiyun if (!prop)
470*4882a593Smuzhiyun return 0;
471*4882a593Smuzhiyun
472*4882a593Smuzhiyun pr_info("Page sizes from device-tree:\n");
473*4882a593Smuzhiyun for (; size >= 4; size -= 4, ++prop) {
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun struct mmu_psize_def *def;
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun /* top 3 bit is AP encoding */
478*4882a593Smuzhiyun shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
479*4882a593Smuzhiyun ap = be32_to_cpu(prop[0]) >> 29;
480*4882a593Smuzhiyun pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun idx = get_idx_from_shift(shift);
483*4882a593Smuzhiyun if (idx < 0)
484*4882a593Smuzhiyun continue;
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun def = &mmu_psize_defs[idx];
487*4882a593Smuzhiyun def->shift = shift;
488*4882a593Smuzhiyun def->ap = ap;
489*4882a593Smuzhiyun }
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun /* needed ? */
492*4882a593Smuzhiyun cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
493*4882a593Smuzhiyun return 1;
494*4882a593Smuzhiyun }
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
probe_memory_block_size(unsigned long node,const char * uname,int depth,void * data)497*4882a593Smuzhiyun static int __init probe_memory_block_size(unsigned long node, const char *uname, int
498*4882a593Smuzhiyun depth, void *data)
499*4882a593Smuzhiyun {
500*4882a593Smuzhiyun unsigned long *mem_block_size = (unsigned long *)data;
501*4882a593Smuzhiyun const __be32 *prop;
502*4882a593Smuzhiyun int len;
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun if (depth != 1)
505*4882a593Smuzhiyun return 0;
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
508*4882a593Smuzhiyun return 0;
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun if (!prop || len < dt_root_size_cells * sizeof(__be32))
513*4882a593Smuzhiyun /*
514*4882a593Smuzhiyun * Nothing in the device tree
515*4882a593Smuzhiyun */
516*4882a593Smuzhiyun *mem_block_size = MIN_MEMORY_BLOCK_SIZE;
517*4882a593Smuzhiyun else
518*4882a593Smuzhiyun *mem_block_size = of_read_number(prop, dt_root_size_cells);
519*4882a593Smuzhiyun return 1;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun
radix_memory_block_size(void)522*4882a593Smuzhiyun static unsigned long radix_memory_block_size(void)
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun /*
527*4882a593Smuzhiyun * OPAL firmware feature is set by now. Hence we are ok
528*4882a593Smuzhiyun * to test OPAL feature.
529*4882a593Smuzhiyun */
530*4882a593Smuzhiyun if (firmware_has_feature(FW_FEATURE_OPAL))
531*4882a593Smuzhiyun mem_block_size = 1UL * 1024 * 1024 * 1024;
532*4882a593Smuzhiyun else
533*4882a593Smuzhiyun of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun return mem_block_size;
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun #else /* CONFIG_MEMORY_HOTPLUG */
539*4882a593Smuzhiyun
radix_memory_block_size(void)540*4882a593Smuzhiyun static unsigned long radix_memory_block_size(void)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun return 1UL * 1024 * 1024 * 1024;
543*4882a593Smuzhiyun }
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun #endif /* CONFIG_MEMORY_HOTPLUG */
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun
radix__early_init_devtree(void)548*4882a593Smuzhiyun void __init radix__early_init_devtree(void)
549*4882a593Smuzhiyun {
550*4882a593Smuzhiyun int rc;
551*4882a593Smuzhiyun
552*4882a593Smuzhiyun /*
553*4882a593Smuzhiyun * Try to find the available page sizes in the device-tree
554*4882a593Smuzhiyun */
555*4882a593Smuzhiyun rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
556*4882a593Smuzhiyun if (!rc) {
557*4882a593Smuzhiyun /*
558*4882a593Smuzhiyun * No page size details found in device tree.
559*4882a593Smuzhiyun * Let's assume we have page 4k and 64k support
560*4882a593Smuzhiyun */
561*4882a593Smuzhiyun mmu_psize_defs[MMU_PAGE_4K].shift = 12;
562*4882a593Smuzhiyun mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun mmu_psize_defs[MMU_PAGE_64K].shift = 16;
565*4882a593Smuzhiyun mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
566*4882a593Smuzhiyun }
567*4882a593Smuzhiyun
568*4882a593Smuzhiyun /*
569*4882a593Smuzhiyun * Max mapping size used when mapping pages. We don't use
570*4882a593Smuzhiyun * ppc_md.memory_block_size() here because this get called
571*4882a593Smuzhiyun * early and we don't have machine probe called yet. Also
572*4882a593Smuzhiyun * the pseries implementation only check for ibm,lmb-size.
573*4882a593Smuzhiyun * All hypervisor supporting radix do expose that device
574*4882a593Smuzhiyun * tree node.
575*4882a593Smuzhiyun */
576*4882a593Smuzhiyun radix_mem_block_size = radix_memory_block_size();
577*4882a593Smuzhiyun return;
578*4882a593Smuzhiyun }
579*4882a593Smuzhiyun
radix_init_amor(void)580*4882a593Smuzhiyun static void radix_init_amor(void)
581*4882a593Smuzhiyun {
582*4882a593Smuzhiyun /*
583*4882a593Smuzhiyun * In HV mode, we init AMOR (Authority Mask Override Register) so that
584*4882a593Smuzhiyun * the hypervisor and guest can setup IAMR (Instruction Authority Mask
585*4882a593Smuzhiyun * Register), enable key 0 and set it to 1.
586*4882a593Smuzhiyun *
587*4882a593Smuzhiyun * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
588*4882a593Smuzhiyun */
589*4882a593Smuzhiyun mtspr(SPRN_AMOR, (3ul << 62));
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun #ifdef CONFIG_PPC_KUEP
setup_kuep(bool disabled)593*4882a593Smuzhiyun void setup_kuep(bool disabled)
594*4882a593Smuzhiyun {
595*4882a593Smuzhiyun if (disabled || !early_radix_enabled())
596*4882a593Smuzhiyun return;
597*4882a593Smuzhiyun
598*4882a593Smuzhiyun if (smp_processor_id() == boot_cpuid) {
599*4882a593Smuzhiyun pr_info("Activating Kernel Userspace Execution Prevention\n");
600*4882a593Smuzhiyun cur_cpu_spec->mmu_features |= MMU_FTR_KUEP;
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun /*
604*4882a593Smuzhiyun * Radix always uses key0 of the IAMR to determine if an access is
605*4882a593Smuzhiyun * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
606*4882a593Smuzhiyun * fetch.
607*4882a593Smuzhiyun */
608*4882a593Smuzhiyun mtspr(SPRN_IAMR, (1ul << 62));
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun #endif
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun #ifdef CONFIG_PPC_KUAP
setup_kuap(bool disabled)613*4882a593Smuzhiyun void setup_kuap(bool disabled)
614*4882a593Smuzhiyun {
615*4882a593Smuzhiyun if (disabled || !early_radix_enabled())
616*4882a593Smuzhiyun return;
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun if (smp_processor_id() == boot_cpuid) {
619*4882a593Smuzhiyun pr_info("Activating Kernel Userspace Access Prevention\n");
620*4882a593Smuzhiyun cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun /* Make sure userspace can't change the AMR */
624*4882a593Smuzhiyun mtspr(SPRN_UAMOR, 0);
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun /*
627*4882a593Smuzhiyun * Set the default kernel AMR values on all cpus.
628*4882a593Smuzhiyun */
629*4882a593Smuzhiyun mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
630*4882a593Smuzhiyun isync();
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun #endif
633*4882a593Smuzhiyun
radix__early_init_mmu(void)634*4882a593Smuzhiyun void __init radix__early_init_mmu(void)
635*4882a593Smuzhiyun {
636*4882a593Smuzhiyun unsigned long lpcr;
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun #ifdef CONFIG_PPC_64K_PAGES
639*4882a593Smuzhiyun /* PAGE_SIZE mappings */
640*4882a593Smuzhiyun mmu_virtual_psize = MMU_PAGE_64K;
641*4882a593Smuzhiyun #else
642*4882a593Smuzhiyun mmu_virtual_psize = MMU_PAGE_4K;
643*4882a593Smuzhiyun #endif
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
646*4882a593Smuzhiyun /* vmemmap mapping */
647*4882a593Smuzhiyun if (mmu_psize_defs[MMU_PAGE_2M].shift) {
648*4882a593Smuzhiyun /*
649*4882a593Smuzhiyun * map vmemmap using 2M if available
650*4882a593Smuzhiyun */
651*4882a593Smuzhiyun mmu_vmemmap_psize = MMU_PAGE_2M;
652*4882a593Smuzhiyun } else
653*4882a593Smuzhiyun mmu_vmemmap_psize = mmu_virtual_psize;
654*4882a593Smuzhiyun #endif
655*4882a593Smuzhiyun /*
656*4882a593Smuzhiyun * initialize page table size
657*4882a593Smuzhiyun */
658*4882a593Smuzhiyun __pte_index_size = RADIX_PTE_INDEX_SIZE;
659*4882a593Smuzhiyun __pmd_index_size = RADIX_PMD_INDEX_SIZE;
660*4882a593Smuzhiyun __pud_index_size = RADIX_PUD_INDEX_SIZE;
661*4882a593Smuzhiyun __pgd_index_size = RADIX_PGD_INDEX_SIZE;
662*4882a593Smuzhiyun __pud_cache_index = RADIX_PUD_INDEX_SIZE;
663*4882a593Smuzhiyun __pte_table_size = RADIX_PTE_TABLE_SIZE;
664*4882a593Smuzhiyun __pmd_table_size = RADIX_PMD_TABLE_SIZE;
665*4882a593Smuzhiyun __pud_table_size = RADIX_PUD_TABLE_SIZE;
666*4882a593Smuzhiyun __pgd_table_size = RADIX_PGD_TABLE_SIZE;
667*4882a593Smuzhiyun
668*4882a593Smuzhiyun __pmd_val_bits = RADIX_PMD_VAL_BITS;
669*4882a593Smuzhiyun __pud_val_bits = RADIX_PUD_VAL_BITS;
670*4882a593Smuzhiyun __pgd_val_bits = RADIX_PGD_VAL_BITS;
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun __kernel_virt_start = RADIX_KERN_VIRT_START;
673*4882a593Smuzhiyun __vmalloc_start = RADIX_VMALLOC_START;
674*4882a593Smuzhiyun __vmalloc_end = RADIX_VMALLOC_END;
675*4882a593Smuzhiyun __kernel_io_start = RADIX_KERN_IO_START;
676*4882a593Smuzhiyun __kernel_io_end = RADIX_KERN_IO_END;
677*4882a593Smuzhiyun vmemmap = (struct page *)RADIX_VMEMMAP_START;
678*4882a593Smuzhiyun ioremap_bot = IOREMAP_BASE;
679*4882a593Smuzhiyun
680*4882a593Smuzhiyun #ifdef CONFIG_PCI
681*4882a593Smuzhiyun pci_io_base = ISA_IO_BASE;
682*4882a593Smuzhiyun #endif
683*4882a593Smuzhiyun __pte_frag_nr = RADIX_PTE_FRAG_NR;
684*4882a593Smuzhiyun __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
685*4882a593Smuzhiyun __pmd_frag_nr = RADIX_PMD_FRAG_NR;
686*4882a593Smuzhiyun __pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
687*4882a593Smuzhiyun
688*4882a593Smuzhiyun radix_init_pgtable();
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun if (!firmware_has_feature(FW_FEATURE_LPAR)) {
691*4882a593Smuzhiyun lpcr = mfspr(SPRN_LPCR);
692*4882a593Smuzhiyun mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
693*4882a593Smuzhiyun radix_init_partition_table();
694*4882a593Smuzhiyun radix_init_amor();
695*4882a593Smuzhiyun } else {
696*4882a593Smuzhiyun radix_init_pseries();
697*4882a593Smuzhiyun }
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun /* Switch to the guard PID before turning on MMU */
702*4882a593Smuzhiyun radix__switch_mmu_context(NULL, &init_mm);
703*4882a593Smuzhiyun tlbiel_all();
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun
radix__early_init_mmu_secondary(void)706*4882a593Smuzhiyun void radix__early_init_mmu_secondary(void)
707*4882a593Smuzhiyun {
708*4882a593Smuzhiyun unsigned long lpcr;
709*4882a593Smuzhiyun /*
710*4882a593Smuzhiyun * update partition table control register and UPRT
711*4882a593Smuzhiyun */
712*4882a593Smuzhiyun if (!firmware_has_feature(FW_FEATURE_LPAR)) {
713*4882a593Smuzhiyun lpcr = mfspr(SPRN_LPCR);
714*4882a593Smuzhiyun mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
715*4882a593Smuzhiyun
716*4882a593Smuzhiyun set_ptcr_when_no_uv(__pa(partition_tb) |
717*4882a593Smuzhiyun (PATB_SIZE_SHIFT - 12));
718*4882a593Smuzhiyun
719*4882a593Smuzhiyun radix_init_amor();
720*4882a593Smuzhiyun }
721*4882a593Smuzhiyun
722*4882a593Smuzhiyun radix__switch_mmu_context(NULL, &init_mm);
723*4882a593Smuzhiyun tlbiel_all();
724*4882a593Smuzhiyun }
725*4882a593Smuzhiyun
radix__mmu_cleanup_all(void)726*4882a593Smuzhiyun void radix__mmu_cleanup_all(void)
727*4882a593Smuzhiyun {
728*4882a593Smuzhiyun unsigned long lpcr;
729*4882a593Smuzhiyun
730*4882a593Smuzhiyun if (!firmware_has_feature(FW_FEATURE_LPAR)) {
731*4882a593Smuzhiyun lpcr = mfspr(SPRN_LPCR);
732*4882a593Smuzhiyun mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
733*4882a593Smuzhiyun set_ptcr_when_no_uv(0);
734*4882a593Smuzhiyun powernv_set_nmmu_ptcr(0);
735*4882a593Smuzhiyun radix__flush_tlb_all();
736*4882a593Smuzhiyun }
737*4882a593Smuzhiyun }
738*4882a593Smuzhiyun
739*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
free_pte_table(pte_t * pte_start,pmd_t * pmd)740*4882a593Smuzhiyun static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
741*4882a593Smuzhiyun {
742*4882a593Smuzhiyun pte_t *pte;
743*4882a593Smuzhiyun int i;
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun for (i = 0; i < PTRS_PER_PTE; i++) {
746*4882a593Smuzhiyun pte = pte_start + i;
747*4882a593Smuzhiyun if (!pte_none(*pte))
748*4882a593Smuzhiyun return;
749*4882a593Smuzhiyun }
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun pte_free_kernel(&init_mm, pte_start);
752*4882a593Smuzhiyun pmd_clear(pmd);
753*4882a593Smuzhiyun }
754*4882a593Smuzhiyun
free_pmd_table(pmd_t * pmd_start,pud_t * pud)755*4882a593Smuzhiyun static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
756*4882a593Smuzhiyun {
757*4882a593Smuzhiyun pmd_t *pmd;
758*4882a593Smuzhiyun int i;
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun for (i = 0; i < PTRS_PER_PMD; i++) {
761*4882a593Smuzhiyun pmd = pmd_start + i;
762*4882a593Smuzhiyun if (!pmd_none(*pmd))
763*4882a593Smuzhiyun return;
764*4882a593Smuzhiyun }
765*4882a593Smuzhiyun
766*4882a593Smuzhiyun pmd_free(&init_mm, pmd_start);
767*4882a593Smuzhiyun pud_clear(pud);
768*4882a593Smuzhiyun }
769*4882a593Smuzhiyun
free_pud_table(pud_t * pud_start,p4d_t * p4d)770*4882a593Smuzhiyun static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
771*4882a593Smuzhiyun {
772*4882a593Smuzhiyun pud_t *pud;
773*4882a593Smuzhiyun int i;
774*4882a593Smuzhiyun
775*4882a593Smuzhiyun for (i = 0; i < PTRS_PER_PUD; i++) {
776*4882a593Smuzhiyun pud = pud_start + i;
777*4882a593Smuzhiyun if (!pud_none(*pud))
778*4882a593Smuzhiyun return;
779*4882a593Smuzhiyun }
780*4882a593Smuzhiyun
781*4882a593Smuzhiyun pud_free(&init_mm, pud_start);
782*4882a593Smuzhiyun p4d_clear(p4d);
783*4882a593Smuzhiyun }
784*4882a593Smuzhiyun
remove_pte_table(pte_t * pte_start,unsigned long addr,unsigned long end)785*4882a593Smuzhiyun static void remove_pte_table(pte_t *pte_start, unsigned long addr,
786*4882a593Smuzhiyun unsigned long end)
787*4882a593Smuzhiyun {
788*4882a593Smuzhiyun unsigned long next;
789*4882a593Smuzhiyun pte_t *pte;
790*4882a593Smuzhiyun
791*4882a593Smuzhiyun pte = pte_start + pte_index(addr);
792*4882a593Smuzhiyun for (; addr < end; addr = next, pte++) {
793*4882a593Smuzhiyun next = (addr + PAGE_SIZE) & PAGE_MASK;
794*4882a593Smuzhiyun if (next > end)
795*4882a593Smuzhiyun next = end;
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun if (!pte_present(*pte))
798*4882a593Smuzhiyun continue;
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
801*4882a593Smuzhiyun /*
802*4882a593Smuzhiyun * The vmemmap_free() and remove_section_mapping()
803*4882a593Smuzhiyun * codepaths call us with aligned addresses.
804*4882a593Smuzhiyun */
805*4882a593Smuzhiyun WARN_ONCE(1, "%s: unaligned range\n", __func__);
806*4882a593Smuzhiyun continue;
807*4882a593Smuzhiyun }
808*4882a593Smuzhiyun
809*4882a593Smuzhiyun pte_clear(&init_mm, addr, pte);
810*4882a593Smuzhiyun }
811*4882a593Smuzhiyun }
812*4882a593Smuzhiyun
remove_pmd_table(pmd_t * pmd_start,unsigned long addr,unsigned long end)813*4882a593Smuzhiyun static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
814*4882a593Smuzhiyun unsigned long end)
815*4882a593Smuzhiyun {
816*4882a593Smuzhiyun unsigned long next;
817*4882a593Smuzhiyun pte_t *pte_base;
818*4882a593Smuzhiyun pmd_t *pmd;
819*4882a593Smuzhiyun
820*4882a593Smuzhiyun pmd = pmd_start + pmd_index(addr);
821*4882a593Smuzhiyun for (; addr < end; addr = next, pmd++) {
822*4882a593Smuzhiyun next = pmd_addr_end(addr, end);
823*4882a593Smuzhiyun
824*4882a593Smuzhiyun if (!pmd_present(*pmd))
825*4882a593Smuzhiyun continue;
826*4882a593Smuzhiyun
827*4882a593Smuzhiyun if (pmd_is_leaf(*pmd)) {
828*4882a593Smuzhiyun if (!IS_ALIGNED(addr, PMD_SIZE) ||
829*4882a593Smuzhiyun !IS_ALIGNED(next, PMD_SIZE)) {
830*4882a593Smuzhiyun WARN_ONCE(1, "%s: unaligned range\n", __func__);
831*4882a593Smuzhiyun continue;
832*4882a593Smuzhiyun }
833*4882a593Smuzhiyun pte_clear(&init_mm, addr, (pte_t *)pmd);
834*4882a593Smuzhiyun continue;
835*4882a593Smuzhiyun }
836*4882a593Smuzhiyun
837*4882a593Smuzhiyun pte_base = (pte_t *)pmd_page_vaddr(*pmd);
838*4882a593Smuzhiyun remove_pte_table(pte_base, addr, next);
839*4882a593Smuzhiyun free_pte_table(pte_base, pmd);
840*4882a593Smuzhiyun }
841*4882a593Smuzhiyun }
842*4882a593Smuzhiyun
remove_pud_table(pud_t * pud_start,unsigned long addr,unsigned long end)843*4882a593Smuzhiyun static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
844*4882a593Smuzhiyun unsigned long end)
845*4882a593Smuzhiyun {
846*4882a593Smuzhiyun unsigned long next;
847*4882a593Smuzhiyun pmd_t *pmd_base;
848*4882a593Smuzhiyun pud_t *pud;
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun pud = pud_start + pud_index(addr);
851*4882a593Smuzhiyun for (; addr < end; addr = next, pud++) {
852*4882a593Smuzhiyun next = pud_addr_end(addr, end);
853*4882a593Smuzhiyun
854*4882a593Smuzhiyun if (!pud_present(*pud))
855*4882a593Smuzhiyun continue;
856*4882a593Smuzhiyun
857*4882a593Smuzhiyun if (pud_is_leaf(*pud)) {
858*4882a593Smuzhiyun if (!IS_ALIGNED(addr, PUD_SIZE) ||
859*4882a593Smuzhiyun !IS_ALIGNED(next, PUD_SIZE)) {
860*4882a593Smuzhiyun WARN_ONCE(1, "%s: unaligned range\n", __func__);
861*4882a593Smuzhiyun continue;
862*4882a593Smuzhiyun }
863*4882a593Smuzhiyun pte_clear(&init_mm, addr, (pte_t *)pud);
864*4882a593Smuzhiyun continue;
865*4882a593Smuzhiyun }
866*4882a593Smuzhiyun
867*4882a593Smuzhiyun pmd_base = (pmd_t *)pud_page_vaddr(*pud);
868*4882a593Smuzhiyun remove_pmd_table(pmd_base, addr, next);
869*4882a593Smuzhiyun free_pmd_table(pmd_base, pud);
870*4882a593Smuzhiyun }
871*4882a593Smuzhiyun }
872*4882a593Smuzhiyun
remove_pagetable(unsigned long start,unsigned long end)873*4882a593Smuzhiyun static void __meminit remove_pagetable(unsigned long start, unsigned long end)
874*4882a593Smuzhiyun {
875*4882a593Smuzhiyun unsigned long addr, next;
876*4882a593Smuzhiyun pud_t *pud_base;
877*4882a593Smuzhiyun pgd_t *pgd;
878*4882a593Smuzhiyun p4d_t *p4d;
879*4882a593Smuzhiyun
880*4882a593Smuzhiyun spin_lock(&init_mm.page_table_lock);
881*4882a593Smuzhiyun
882*4882a593Smuzhiyun for (addr = start; addr < end; addr = next) {
883*4882a593Smuzhiyun next = pgd_addr_end(addr, end);
884*4882a593Smuzhiyun
885*4882a593Smuzhiyun pgd = pgd_offset_k(addr);
886*4882a593Smuzhiyun p4d = p4d_offset(pgd, addr);
887*4882a593Smuzhiyun if (!p4d_present(*p4d))
888*4882a593Smuzhiyun continue;
889*4882a593Smuzhiyun
890*4882a593Smuzhiyun if (p4d_is_leaf(*p4d)) {
891*4882a593Smuzhiyun if (!IS_ALIGNED(addr, P4D_SIZE) ||
892*4882a593Smuzhiyun !IS_ALIGNED(next, P4D_SIZE)) {
893*4882a593Smuzhiyun WARN_ONCE(1, "%s: unaligned range\n", __func__);
894*4882a593Smuzhiyun continue;
895*4882a593Smuzhiyun }
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun pte_clear(&init_mm, addr, (pte_t *)pgd);
898*4882a593Smuzhiyun continue;
899*4882a593Smuzhiyun }
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun pud_base = (pud_t *)p4d_page_vaddr(*p4d);
902*4882a593Smuzhiyun remove_pud_table(pud_base, addr, next);
903*4882a593Smuzhiyun free_pud_table(pud_base, p4d);
904*4882a593Smuzhiyun }
905*4882a593Smuzhiyun
906*4882a593Smuzhiyun spin_unlock(&init_mm.page_table_lock);
907*4882a593Smuzhiyun radix__flush_tlb_kernel_range(start, end);
908*4882a593Smuzhiyun }
909*4882a593Smuzhiyun
radix__create_section_mapping(unsigned long start,unsigned long end,int nid,pgprot_t prot)910*4882a593Smuzhiyun int __meminit radix__create_section_mapping(unsigned long start,
911*4882a593Smuzhiyun unsigned long end, int nid,
912*4882a593Smuzhiyun pgprot_t prot)
913*4882a593Smuzhiyun {
914*4882a593Smuzhiyun if (end >= RADIX_VMALLOC_START) {
915*4882a593Smuzhiyun pr_warn("Outside the supported range\n");
916*4882a593Smuzhiyun return -1;
917*4882a593Smuzhiyun }
918*4882a593Smuzhiyun
919*4882a593Smuzhiyun return create_physical_mapping(__pa(start), __pa(end),
920*4882a593Smuzhiyun radix_mem_block_size, nid, prot);
921*4882a593Smuzhiyun }
922*4882a593Smuzhiyun
radix__remove_section_mapping(unsigned long start,unsigned long end)923*4882a593Smuzhiyun int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
924*4882a593Smuzhiyun {
925*4882a593Smuzhiyun remove_pagetable(start, end);
926*4882a593Smuzhiyun return 0;
927*4882a593Smuzhiyun }
928*4882a593Smuzhiyun #endif /* CONFIG_MEMORY_HOTPLUG */
929*4882a593Smuzhiyun
930*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
__map_kernel_page_nid(unsigned long ea,unsigned long pa,pgprot_t flags,unsigned int map_page_size,int nid)931*4882a593Smuzhiyun static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
932*4882a593Smuzhiyun pgprot_t flags, unsigned int map_page_size,
933*4882a593Smuzhiyun int nid)
934*4882a593Smuzhiyun {
935*4882a593Smuzhiyun return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
936*4882a593Smuzhiyun }
937*4882a593Smuzhiyun
radix__vmemmap_create_mapping(unsigned long start,unsigned long page_size,unsigned long phys)938*4882a593Smuzhiyun int __meminit radix__vmemmap_create_mapping(unsigned long start,
939*4882a593Smuzhiyun unsigned long page_size,
940*4882a593Smuzhiyun unsigned long phys)
941*4882a593Smuzhiyun {
942*4882a593Smuzhiyun /* Create a PTE encoding */
943*4882a593Smuzhiyun unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
944*4882a593Smuzhiyun int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
945*4882a593Smuzhiyun int ret;
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun if ((start + page_size) >= RADIX_VMEMMAP_END) {
948*4882a593Smuzhiyun pr_warn("Outside the supported range\n");
949*4882a593Smuzhiyun return -1;
950*4882a593Smuzhiyun }
951*4882a593Smuzhiyun
952*4882a593Smuzhiyun ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
953*4882a593Smuzhiyun BUG_ON(ret);
954*4882a593Smuzhiyun
955*4882a593Smuzhiyun return 0;
956*4882a593Smuzhiyun }
957*4882a593Smuzhiyun
958*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
radix__vmemmap_remove_mapping(unsigned long start,unsigned long page_size)959*4882a593Smuzhiyun void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
960*4882a593Smuzhiyun {
961*4882a593Smuzhiyun remove_pagetable(start, start + page_size);
962*4882a593Smuzhiyun }
963*4882a593Smuzhiyun #endif
964*4882a593Smuzhiyun #endif
965*4882a593Smuzhiyun
966*4882a593Smuzhiyun #ifdef CONFIG_TRANSPARENT_HUGEPAGE
967*4882a593Smuzhiyun
radix__pmd_hugepage_update(struct mm_struct * mm,unsigned long addr,pmd_t * pmdp,unsigned long clr,unsigned long set)968*4882a593Smuzhiyun unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
969*4882a593Smuzhiyun pmd_t *pmdp, unsigned long clr,
970*4882a593Smuzhiyun unsigned long set)
971*4882a593Smuzhiyun {
972*4882a593Smuzhiyun unsigned long old;
973*4882a593Smuzhiyun
974*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_VM
975*4882a593Smuzhiyun WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
976*4882a593Smuzhiyun assert_spin_locked(pmd_lockptr(mm, pmdp));
977*4882a593Smuzhiyun #endif
978*4882a593Smuzhiyun
979*4882a593Smuzhiyun old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
980*4882a593Smuzhiyun trace_hugepage_update(addr, old, clr, set);
981*4882a593Smuzhiyun
982*4882a593Smuzhiyun return old;
983*4882a593Smuzhiyun }
984*4882a593Smuzhiyun
radix__pmdp_collapse_flush(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp)985*4882a593Smuzhiyun pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
986*4882a593Smuzhiyun pmd_t *pmdp)
987*4882a593Smuzhiyun
988*4882a593Smuzhiyun {
989*4882a593Smuzhiyun pmd_t pmd;
990*4882a593Smuzhiyun
991*4882a593Smuzhiyun VM_BUG_ON(address & ~HPAGE_PMD_MASK);
992*4882a593Smuzhiyun VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
993*4882a593Smuzhiyun VM_BUG_ON(pmd_devmap(*pmdp));
994*4882a593Smuzhiyun /*
995*4882a593Smuzhiyun * khugepaged calls this for normal pmd
996*4882a593Smuzhiyun */
997*4882a593Smuzhiyun pmd = *pmdp;
998*4882a593Smuzhiyun pmd_clear(pmdp);
999*4882a593Smuzhiyun
1000*4882a593Smuzhiyun radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
1001*4882a593Smuzhiyun
1002*4882a593Smuzhiyun return pmd;
1003*4882a593Smuzhiyun }
1004*4882a593Smuzhiyun
1005*4882a593Smuzhiyun /*
1006*4882a593Smuzhiyun * For us pgtable_t is pte_t *. Inorder to save the deposisted
1007*4882a593Smuzhiyun * page table, we consider the allocated page table as a list
1008*4882a593Smuzhiyun * head. On withdraw we need to make sure we zero out the used
1009*4882a593Smuzhiyun * list_head memory area.
1010*4882a593Smuzhiyun */
radix__pgtable_trans_huge_deposit(struct mm_struct * mm,pmd_t * pmdp,pgtable_t pgtable)1011*4882a593Smuzhiyun void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
1012*4882a593Smuzhiyun pgtable_t pgtable)
1013*4882a593Smuzhiyun {
1014*4882a593Smuzhiyun struct list_head *lh = (struct list_head *) pgtable;
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun assert_spin_locked(pmd_lockptr(mm, pmdp));
1017*4882a593Smuzhiyun
1018*4882a593Smuzhiyun /* FIFO */
1019*4882a593Smuzhiyun if (!pmd_huge_pte(mm, pmdp))
1020*4882a593Smuzhiyun INIT_LIST_HEAD(lh);
1021*4882a593Smuzhiyun else
1022*4882a593Smuzhiyun list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
1023*4882a593Smuzhiyun pmd_huge_pte(mm, pmdp) = pgtable;
1024*4882a593Smuzhiyun }
1025*4882a593Smuzhiyun
radix__pgtable_trans_huge_withdraw(struct mm_struct * mm,pmd_t * pmdp)1026*4882a593Smuzhiyun pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
1027*4882a593Smuzhiyun {
1028*4882a593Smuzhiyun pte_t *ptep;
1029*4882a593Smuzhiyun pgtable_t pgtable;
1030*4882a593Smuzhiyun struct list_head *lh;
1031*4882a593Smuzhiyun
1032*4882a593Smuzhiyun assert_spin_locked(pmd_lockptr(mm, pmdp));
1033*4882a593Smuzhiyun
1034*4882a593Smuzhiyun /* FIFO */
1035*4882a593Smuzhiyun pgtable = pmd_huge_pte(mm, pmdp);
1036*4882a593Smuzhiyun lh = (struct list_head *) pgtable;
1037*4882a593Smuzhiyun if (list_empty(lh))
1038*4882a593Smuzhiyun pmd_huge_pte(mm, pmdp) = NULL;
1039*4882a593Smuzhiyun else {
1040*4882a593Smuzhiyun pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
1041*4882a593Smuzhiyun list_del(lh);
1042*4882a593Smuzhiyun }
1043*4882a593Smuzhiyun ptep = (pte_t *) pgtable;
1044*4882a593Smuzhiyun *ptep = __pte(0);
1045*4882a593Smuzhiyun ptep++;
1046*4882a593Smuzhiyun *ptep = __pte(0);
1047*4882a593Smuzhiyun return pgtable;
1048*4882a593Smuzhiyun }
1049*4882a593Smuzhiyun
radix__pmdp_huge_get_and_clear(struct mm_struct * mm,unsigned long addr,pmd_t * pmdp)1050*4882a593Smuzhiyun pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
1051*4882a593Smuzhiyun unsigned long addr, pmd_t *pmdp)
1052*4882a593Smuzhiyun {
1053*4882a593Smuzhiyun pmd_t old_pmd;
1054*4882a593Smuzhiyun unsigned long old;
1055*4882a593Smuzhiyun
1056*4882a593Smuzhiyun old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
1057*4882a593Smuzhiyun old_pmd = __pmd(old);
1058*4882a593Smuzhiyun return old_pmd;
1059*4882a593Smuzhiyun }
1060*4882a593Smuzhiyun
1061*4882a593Smuzhiyun #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1062*4882a593Smuzhiyun
radix__ptep_set_access_flags(struct vm_area_struct * vma,pte_t * ptep,pte_t entry,unsigned long address,int psize)1063*4882a593Smuzhiyun void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
1064*4882a593Smuzhiyun pte_t entry, unsigned long address, int psize)
1065*4882a593Smuzhiyun {
1066*4882a593Smuzhiyun struct mm_struct *mm = vma->vm_mm;
1067*4882a593Smuzhiyun unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
1068*4882a593Smuzhiyun _PAGE_RW | _PAGE_EXEC);
1069*4882a593Smuzhiyun
1070*4882a593Smuzhiyun unsigned long change = pte_val(entry) ^ pte_val(*ptep);
1071*4882a593Smuzhiyun /*
1072*4882a593Smuzhiyun * To avoid NMMU hang while relaxing access, we need mark
1073*4882a593Smuzhiyun * the pte invalid in between.
1074*4882a593Smuzhiyun */
1075*4882a593Smuzhiyun if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) {
1076*4882a593Smuzhiyun unsigned long old_pte, new_pte;
1077*4882a593Smuzhiyun
1078*4882a593Smuzhiyun old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
1079*4882a593Smuzhiyun /*
1080*4882a593Smuzhiyun * new value of pte
1081*4882a593Smuzhiyun */
1082*4882a593Smuzhiyun new_pte = old_pte | set;
1083*4882a593Smuzhiyun radix__flush_tlb_page_psize(mm, address, psize);
1084*4882a593Smuzhiyun __radix_pte_update(ptep, _PAGE_INVALID, new_pte);
1085*4882a593Smuzhiyun } else {
1086*4882a593Smuzhiyun __radix_pte_update(ptep, 0, set);
1087*4882a593Smuzhiyun /*
1088*4882a593Smuzhiyun * Book3S does not require a TLB flush when relaxing access
1089*4882a593Smuzhiyun * restrictions when the address space is not attached to a
1090*4882a593Smuzhiyun * NMMU, because the core MMU will reload the pte after taking
1091*4882a593Smuzhiyun * an access fault, which is defined by the architectue.
1092*4882a593Smuzhiyun */
1093*4882a593Smuzhiyun }
1094*4882a593Smuzhiyun /* See ptesync comment in radix__set_pte_at */
1095*4882a593Smuzhiyun }
1096*4882a593Smuzhiyun
radix__ptep_modify_prot_commit(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,pte_t old_pte,pte_t pte)1097*4882a593Smuzhiyun void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
1098*4882a593Smuzhiyun unsigned long addr, pte_t *ptep,
1099*4882a593Smuzhiyun pte_t old_pte, pte_t pte)
1100*4882a593Smuzhiyun {
1101*4882a593Smuzhiyun struct mm_struct *mm = vma->vm_mm;
1102*4882a593Smuzhiyun
1103*4882a593Smuzhiyun /*
1104*4882a593Smuzhiyun * To avoid NMMU hang while relaxing access we need to flush the tlb before
1105*4882a593Smuzhiyun * we set the new value. We need to do this only for radix, because hash
1106*4882a593Smuzhiyun * translation does flush when updating the linux pte.
1107*4882a593Smuzhiyun */
1108*4882a593Smuzhiyun if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
1109*4882a593Smuzhiyun (atomic_read(&mm->context.copros) > 0))
1110*4882a593Smuzhiyun radix__flush_tlb_page(vma, addr);
1111*4882a593Smuzhiyun
1112*4882a593Smuzhiyun set_pte_at(mm, addr, ptep, pte);
1113*4882a593Smuzhiyun }
1114*4882a593Smuzhiyun
arch_ioremap_pud_supported(void)1115*4882a593Smuzhiyun int __init arch_ioremap_pud_supported(void)
1116*4882a593Smuzhiyun {
1117*4882a593Smuzhiyun /* HPT does not cope with large pages in the vmalloc area */
1118*4882a593Smuzhiyun return radix_enabled();
1119*4882a593Smuzhiyun }
1120*4882a593Smuzhiyun
arch_ioremap_pmd_supported(void)1121*4882a593Smuzhiyun int __init arch_ioremap_pmd_supported(void)
1122*4882a593Smuzhiyun {
1123*4882a593Smuzhiyun return radix_enabled();
1124*4882a593Smuzhiyun }
1125*4882a593Smuzhiyun
p4d_free_pud_page(p4d_t * p4d,unsigned long addr)1126*4882a593Smuzhiyun int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1127*4882a593Smuzhiyun {
1128*4882a593Smuzhiyun return 0;
1129*4882a593Smuzhiyun }
1130*4882a593Smuzhiyun
pud_set_huge(pud_t * pud,phys_addr_t addr,pgprot_t prot)1131*4882a593Smuzhiyun int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1132*4882a593Smuzhiyun {
1133*4882a593Smuzhiyun pte_t *ptep = (pte_t *)pud;
1134*4882a593Smuzhiyun pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
1135*4882a593Smuzhiyun
1136*4882a593Smuzhiyun if (!radix_enabled())
1137*4882a593Smuzhiyun return 0;
1138*4882a593Smuzhiyun
1139*4882a593Smuzhiyun set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
1140*4882a593Smuzhiyun
1141*4882a593Smuzhiyun return 1;
1142*4882a593Smuzhiyun }
1143*4882a593Smuzhiyun
pud_clear_huge(pud_t * pud)1144*4882a593Smuzhiyun int pud_clear_huge(pud_t *pud)
1145*4882a593Smuzhiyun {
1146*4882a593Smuzhiyun if (pud_is_leaf(*pud)) {
1147*4882a593Smuzhiyun pud_clear(pud);
1148*4882a593Smuzhiyun return 1;
1149*4882a593Smuzhiyun }
1150*4882a593Smuzhiyun
1151*4882a593Smuzhiyun return 0;
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun
pud_free_pmd_page(pud_t * pud,unsigned long addr)1154*4882a593Smuzhiyun int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1155*4882a593Smuzhiyun {
1156*4882a593Smuzhiyun pmd_t *pmd;
1157*4882a593Smuzhiyun int i;
1158*4882a593Smuzhiyun
1159*4882a593Smuzhiyun pmd = (pmd_t *)pud_page_vaddr(*pud);
1160*4882a593Smuzhiyun pud_clear(pud);
1161*4882a593Smuzhiyun
1162*4882a593Smuzhiyun flush_tlb_kernel_range(addr, addr + PUD_SIZE);
1163*4882a593Smuzhiyun
1164*4882a593Smuzhiyun for (i = 0; i < PTRS_PER_PMD; i++) {
1165*4882a593Smuzhiyun if (!pmd_none(pmd[i])) {
1166*4882a593Smuzhiyun pte_t *pte;
1167*4882a593Smuzhiyun pte = (pte_t *)pmd_page_vaddr(pmd[i]);
1168*4882a593Smuzhiyun
1169*4882a593Smuzhiyun pte_free_kernel(&init_mm, pte);
1170*4882a593Smuzhiyun }
1171*4882a593Smuzhiyun }
1172*4882a593Smuzhiyun
1173*4882a593Smuzhiyun pmd_free(&init_mm, pmd);
1174*4882a593Smuzhiyun
1175*4882a593Smuzhiyun return 1;
1176*4882a593Smuzhiyun }
1177*4882a593Smuzhiyun
pmd_set_huge(pmd_t * pmd,phys_addr_t addr,pgprot_t prot)1178*4882a593Smuzhiyun int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1179*4882a593Smuzhiyun {
1180*4882a593Smuzhiyun pte_t *ptep = (pte_t *)pmd;
1181*4882a593Smuzhiyun pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
1182*4882a593Smuzhiyun
1183*4882a593Smuzhiyun if (!radix_enabled())
1184*4882a593Smuzhiyun return 0;
1185*4882a593Smuzhiyun
1186*4882a593Smuzhiyun set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun return 1;
1189*4882a593Smuzhiyun }
1190*4882a593Smuzhiyun
pmd_clear_huge(pmd_t * pmd)1191*4882a593Smuzhiyun int pmd_clear_huge(pmd_t *pmd)
1192*4882a593Smuzhiyun {
1193*4882a593Smuzhiyun if (pmd_is_leaf(*pmd)) {
1194*4882a593Smuzhiyun pmd_clear(pmd);
1195*4882a593Smuzhiyun return 1;
1196*4882a593Smuzhiyun }
1197*4882a593Smuzhiyun
1198*4882a593Smuzhiyun return 0;
1199*4882a593Smuzhiyun }
1200*4882a593Smuzhiyun
pmd_free_pte_page(pmd_t * pmd,unsigned long addr)1201*4882a593Smuzhiyun int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1202*4882a593Smuzhiyun {
1203*4882a593Smuzhiyun pte_t *pte;
1204*4882a593Smuzhiyun
1205*4882a593Smuzhiyun pte = (pte_t *)pmd_page_vaddr(*pmd);
1206*4882a593Smuzhiyun pmd_clear(pmd);
1207*4882a593Smuzhiyun
1208*4882a593Smuzhiyun flush_tlb_kernel_range(addr, addr + PMD_SIZE);
1209*4882a593Smuzhiyun
1210*4882a593Smuzhiyun pte_free_kernel(&init_mm, pte);
1211*4882a593Smuzhiyun
1212*4882a593Smuzhiyun return 1;
1213*4882a593Smuzhiyun }
1214*4882a593Smuzhiyun
arch_ioremap_p4d_supported(void)1215*4882a593Smuzhiyun int __init arch_ioremap_p4d_supported(void)
1216*4882a593Smuzhiyun {
1217*4882a593Smuzhiyun return 0;
1218*4882a593Smuzhiyun }
1219