xref: /OK3568_Linux_fs/kernel/arch/powerpc/mm/book3s64/radix_pgtable.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Page table handling routines for radix page table.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #define pr_fmt(fmt) "radix-mmu: " fmt
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include <linux/io.h>
11*4882a593Smuzhiyun #include <linux/kernel.h>
12*4882a593Smuzhiyun #include <linux/sched/mm.h>
13*4882a593Smuzhiyun #include <linux/memblock.h>
14*4882a593Smuzhiyun #include <linux/of_fdt.h>
15*4882a593Smuzhiyun #include <linux/mm.h>
16*4882a593Smuzhiyun #include <linux/hugetlb.h>
17*4882a593Smuzhiyun #include <linux/string_helpers.h>
18*4882a593Smuzhiyun #include <linux/memory.h>
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include <asm/pgalloc.h>
21*4882a593Smuzhiyun #include <asm/mmu_context.h>
22*4882a593Smuzhiyun #include <asm/dma.h>
23*4882a593Smuzhiyun #include <asm/machdep.h>
24*4882a593Smuzhiyun #include <asm/mmu.h>
25*4882a593Smuzhiyun #include <asm/firmware.h>
26*4882a593Smuzhiyun #include <asm/powernv.h>
27*4882a593Smuzhiyun #include <asm/sections.h>
28*4882a593Smuzhiyun #include <asm/smp.h>
29*4882a593Smuzhiyun #include <asm/trace.h>
30*4882a593Smuzhiyun #include <asm/uaccess.h>
31*4882a593Smuzhiyun #include <asm/ultravisor.h>
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun #include <trace/events/thp.h>
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun unsigned int mmu_pid_bits;
36*4882a593Smuzhiyun unsigned int mmu_base_pid;
37*4882a593Smuzhiyun unsigned long radix_mem_block_size __ro_after_init;
38*4882a593Smuzhiyun 
early_alloc_pgtable(unsigned long size,int nid,unsigned long region_start,unsigned long region_end)39*4882a593Smuzhiyun static __ref void *early_alloc_pgtable(unsigned long size, int nid,
40*4882a593Smuzhiyun 			unsigned long region_start, unsigned long region_end)
41*4882a593Smuzhiyun {
42*4882a593Smuzhiyun 	phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
43*4882a593Smuzhiyun 	phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
44*4882a593Smuzhiyun 	void *ptr;
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	if (region_start)
47*4882a593Smuzhiyun 		min_addr = region_start;
48*4882a593Smuzhiyun 	if (region_end)
49*4882a593Smuzhiyun 		max_addr = region_end;
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
52*4882a593Smuzhiyun 
53*4882a593Smuzhiyun 	if (!ptr)
54*4882a593Smuzhiyun 		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n",
55*4882a593Smuzhiyun 		      __func__, size, size, nid, &min_addr, &max_addr);
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 	return ptr;
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun /*
61*4882a593Smuzhiyun  * When allocating pud or pmd pointers, we allocate a complete page
62*4882a593Smuzhiyun  * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This
63*4882a593Smuzhiyun  * is to ensure that the page obtained from the memblock allocator
64*4882a593Smuzhiyun  * can be completely used as page table page and can be freed
65*4882a593Smuzhiyun  * correctly when the page table entries are removed.
66*4882a593Smuzhiyun  */
early_map_kernel_page(unsigned long ea,unsigned long pa,pgprot_t flags,unsigned int map_page_size,int nid,unsigned long region_start,unsigned long region_end)67*4882a593Smuzhiyun static int early_map_kernel_page(unsigned long ea, unsigned long pa,
68*4882a593Smuzhiyun 			  pgprot_t flags,
69*4882a593Smuzhiyun 			  unsigned int map_page_size,
70*4882a593Smuzhiyun 			  int nid,
71*4882a593Smuzhiyun 			  unsigned long region_start, unsigned long region_end)
72*4882a593Smuzhiyun {
73*4882a593Smuzhiyun 	unsigned long pfn = pa >> PAGE_SHIFT;
74*4882a593Smuzhiyun 	pgd_t *pgdp;
75*4882a593Smuzhiyun 	p4d_t *p4dp;
76*4882a593Smuzhiyun 	pud_t *pudp;
77*4882a593Smuzhiyun 	pmd_t *pmdp;
78*4882a593Smuzhiyun 	pte_t *ptep;
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun 	pgdp = pgd_offset_k(ea);
81*4882a593Smuzhiyun 	p4dp = p4d_offset(pgdp, ea);
82*4882a593Smuzhiyun 	if (p4d_none(*p4dp)) {
83*4882a593Smuzhiyun 		pudp = early_alloc_pgtable(PAGE_SIZE, nid,
84*4882a593Smuzhiyun 					   region_start, region_end);
85*4882a593Smuzhiyun 		p4d_populate(&init_mm, p4dp, pudp);
86*4882a593Smuzhiyun 	}
87*4882a593Smuzhiyun 	pudp = pud_offset(p4dp, ea);
88*4882a593Smuzhiyun 	if (map_page_size == PUD_SIZE) {
89*4882a593Smuzhiyun 		ptep = (pte_t *)pudp;
90*4882a593Smuzhiyun 		goto set_the_pte;
91*4882a593Smuzhiyun 	}
92*4882a593Smuzhiyun 	if (pud_none(*pudp)) {
93*4882a593Smuzhiyun 		pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start,
94*4882a593Smuzhiyun 					   region_end);
95*4882a593Smuzhiyun 		pud_populate(&init_mm, pudp, pmdp);
96*4882a593Smuzhiyun 	}
97*4882a593Smuzhiyun 	pmdp = pmd_offset(pudp, ea);
98*4882a593Smuzhiyun 	if (map_page_size == PMD_SIZE) {
99*4882a593Smuzhiyun 		ptep = pmdp_ptep(pmdp);
100*4882a593Smuzhiyun 		goto set_the_pte;
101*4882a593Smuzhiyun 	}
102*4882a593Smuzhiyun 	if (!pmd_present(*pmdp)) {
103*4882a593Smuzhiyun 		ptep = early_alloc_pgtable(PAGE_SIZE, nid,
104*4882a593Smuzhiyun 						region_start, region_end);
105*4882a593Smuzhiyun 		pmd_populate_kernel(&init_mm, pmdp, ptep);
106*4882a593Smuzhiyun 	}
107*4882a593Smuzhiyun 	ptep = pte_offset_kernel(pmdp, ea);
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun set_the_pte:
110*4882a593Smuzhiyun 	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
111*4882a593Smuzhiyun 	asm volatile("ptesync": : :"memory");
112*4882a593Smuzhiyun 	return 0;
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun /*
116*4882a593Smuzhiyun  * nid, region_start, and region_end are hints to try to place the page
117*4882a593Smuzhiyun  * table memory in the same node or region.
118*4882a593Smuzhiyun  */
__map_kernel_page(unsigned long ea,unsigned long pa,pgprot_t flags,unsigned int map_page_size,int nid,unsigned long region_start,unsigned long region_end)119*4882a593Smuzhiyun static int __map_kernel_page(unsigned long ea, unsigned long pa,
120*4882a593Smuzhiyun 			  pgprot_t flags,
121*4882a593Smuzhiyun 			  unsigned int map_page_size,
122*4882a593Smuzhiyun 			  int nid,
123*4882a593Smuzhiyun 			  unsigned long region_start, unsigned long region_end)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun 	unsigned long pfn = pa >> PAGE_SHIFT;
126*4882a593Smuzhiyun 	pgd_t *pgdp;
127*4882a593Smuzhiyun 	p4d_t *p4dp;
128*4882a593Smuzhiyun 	pud_t *pudp;
129*4882a593Smuzhiyun 	pmd_t *pmdp;
130*4882a593Smuzhiyun 	pte_t *ptep;
131*4882a593Smuzhiyun 	/*
132*4882a593Smuzhiyun 	 * Make sure task size is correct as per the max adddr
133*4882a593Smuzhiyun 	 */
134*4882a593Smuzhiyun 	BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun #ifdef CONFIG_PPC_64K_PAGES
137*4882a593Smuzhiyun 	BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
138*4882a593Smuzhiyun #endif
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	if (unlikely(!slab_is_available()))
141*4882a593Smuzhiyun 		return early_map_kernel_page(ea, pa, flags, map_page_size,
142*4882a593Smuzhiyun 						nid, region_start, region_end);
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 	/*
145*4882a593Smuzhiyun 	 * Should make page table allocation functions be able to take a
146*4882a593Smuzhiyun 	 * node, so we can place kernel page tables on the right nodes after
147*4882a593Smuzhiyun 	 * boot.
148*4882a593Smuzhiyun 	 */
149*4882a593Smuzhiyun 	pgdp = pgd_offset_k(ea);
150*4882a593Smuzhiyun 	p4dp = p4d_offset(pgdp, ea);
151*4882a593Smuzhiyun 	pudp = pud_alloc(&init_mm, p4dp, ea);
152*4882a593Smuzhiyun 	if (!pudp)
153*4882a593Smuzhiyun 		return -ENOMEM;
154*4882a593Smuzhiyun 	if (map_page_size == PUD_SIZE) {
155*4882a593Smuzhiyun 		ptep = (pte_t *)pudp;
156*4882a593Smuzhiyun 		goto set_the_pte;
157*4882a593Smuzhiyun 	}
158*4882a593Smuzhiyun 	pmdp = pmd_alloc(&init_mm, pudp, ea);
159*4882a593Smuzhiyun 	if (!pmdp)
160*4882a593Smuzhiyun 		return -ENOMEM;
161*4882a593Smuzhiyun 	if (map_page_size == PMD_SIZE) {
162*4882a593Smuzhiyun 		ptep = pmdp_ptep(pmdp);
163*4882a593Smuzhiyun 		goto set_the_pte;
164*4882a593Smuzhiyun 	}
165*4882a593Smuzhiyun 	ptep = pte_alloc_kernel(pmdp, ea);
166*4882a593Smuzhiyun 	if (!ptep)
167*4882a593Smuzhiyun 		return -ENOMEM;
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun set_the_pte:
170*4882a593Smuzhiyun 	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
171*4882a593Smuzhiyun 	asm volatile("ptesync": : :"memory");
172*4882a593Smuzhiyun 	return 0;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun 
radix__map_kernel_page(unsigned long ea,unsigned long pa,pgprot_t flags,unsigned int map_page_size)175*4882a593Smuzhiyun int radix__map_kernel_page(unsigned long ea, unsigned long pa,
176*4882a593Smuzhiyun 			  pgprot_t flags,
177*4882a593Smuzhiyun 			  unsigned int map_page_size)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun 	return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun #ifdef CONFIG_STRICT_KERNEL_RWX
radix__change_memory_range(unsigned long start,unsigned long end,unsigned long clear)183*4882a593Smuzhiyun void radix__change_memory_range(unsigned long start, unsigned long end,
184*4882a593Smuzhiyun 				unsigned long clear)
185*4882a593Smuzhiyun {
186*4882a593Smuzhiyun 	unsigned long idx;
187*4882a593Smuzhiyun 	pgd_t *pgdp;
188*4882a593Smuzhiyun 	p4d_t *p4dp;
189*4882a593Smuzhiyun 	pud_t *pudp;
190*4882a593Smuzhiyun 	pmd_t *pmdp;
191*4882a593Smuzhiyun 	pte_t *ptep;
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	start = ALIGN_DOWN(start, PAGE_SIZE);
194*4882a593Smuzhiyun 	end = PAGE_ALIGN(end); // aligns up
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
197*4882a593Smuzhiyun 		 start, end, clear);
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	for (idx = start; idx < end; idx += PAGE_SIZE) {
200*4882a593Smuzhiyun 		pgdp = pgd_offset_k(idx);
201*4882a593Smuzhiyun 		p4dp = p4d_offset(pgdp, idx);
202*4882a593Smuzhiyun 		pudp = pud_alloc(&init_mm, p4dp, idx);
203*4882a593Smuzhiyun 		if (!pudp)
204*4882a593Smuzhiyun 			continue;
205*4882a593Smuzhiyun 		if (pud_is_leaf(*pudp)) {
206*4882a593Smuzhiyun 			ptep = (pte_t *)pudp;
207*4882a593Smuzhiyun 			goto update_the_pte;
208*4882a593Smuzhiyun 		}
209*4882a593Smuzhiyun 		pmdp = pmd_alloc(&init_mm, pudp, idx);
210*4882a593Smuzhiyun 		if (!pmdp)
211*4882a593Smuzhiyun 			continue;
212*4882a593Smuzhiyun 		if (pmd_is_leaf(*pmdp)) {
213*4882a593Smuzhiyun 			ptep = pmdp_ptep(pmdp);
214*4882a593Smuzhiyun 			goto update_the_pte;
215*4882a593Smuzhiyun 		}
216*4882a593Smuzhiyun 		ptep = pte_alloc_kernel(pmdp, idx);
217*4882a593Smuzhiyun 		if (!ptep)
218*4882a593Smuzhiyun 			continue;
219*4882a593Smuzhiyun update_the_pte:
220*4882a593Smuzhiyun 		radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
221*4882a593Smuzhiyun 	}
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 	radix__flush_tlb_kernel_range(start, end);
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun 
radix__mark_rodata_ro(void)226*4882a593Smuzhiyun void radix__mark_rodata_ro(void)
227*4882a593Smuzhiyun {
228*4882a593Smuzhiyun 	unsigned long start, end;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	start = (unsigned long)_stext;
231*4882a593Smuzhiyun 	end = (unsigned long)__init_begin;
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	radix__change_memory_range(start, end, _PAGE_WRITE);
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun 
radix__mark_initmem_nx(void)236*4882a593Smuzhiyun void radix__mark_initmem_nx(void)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun 	unsigned long start = (unsigned long)__init_begin;
239*4882a593Smuzhiyun 	unsigned long end = (unsigned long)__init_end;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	radix__change_memory_range(start, end, _PAGE_EXEC);
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun #endif /* CONFIG_STRICT_KERNEL_RWX */
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun static inline void __meminit
print_mapping(unsigned long start,unsigned long end,unsigned long size,bool exec)246*4882a593Smuzhiyun print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
247*4882a593Smuzhiyun {
248*4882a593Smuzhiyun 	char buf[10];
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	if (end <= start)
251*4882a593Smuzhiyun 		return;
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
256*4882a593Smuzhiyun 		exec ? " (exec)" : "");
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun 
next_boundary(unsigned long addr,unsigned long end)259*4882a593Smuzhiyun static unsigned long next_boundary(unsigned long addr, unsigned long end)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun #ifdef CONFIG_STRICT_KERNEL_RWX
262*4882a593Smuzhiyun 	if (addr < __pa_symbol(__init_begin))
263*4882a593Smuzhiyun 		return __pa_symbol(__init_begin);
264*4882a593Smuzhiyun #endif
265*4882a593Smuzhiyun 	return end;
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun 
create_physical_mapping(unsigned long start,unsigned long end,unsigned long max_mapping_size,int nid,pgprot_t _prot)268*4882a593Smuzhiyun static int __meminit create_physical_mapping(unsigned long start,
269*4882a593Smuzhiyun 					     unsigned long end,
270*4882a593Smuzhiyun 					     unsigned long max_mapping_size,
271*4882a593Smuzhiyun 					     int nid, pgprot_t _prot)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun 	unsigned long vaddr, addr, mapping_size = 0;
274*4882a593Smuzhiyun 	bool prev_exec, exec = false;
275*4882a593Smuzhiyun 	pgprot_t prot;
276*4882a593Smuzhiyun 	int psize;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 	start = ALIGN(start, PAGE_SIZE);
279*4882a593Smuzhiyun 	end   = ALIGN_DOWN(end, PAGE_SIZE);
280*4882a593Smuzhiyun 	for (addr = start; addr < end; addr += mapping_size) {
281*4882a593Smuzhiyun 		unsigned long gap, previous_size;
282*4882a593Smuzhiyun 		int rc;
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun 		gap = next_boundary(addr, end) - addr;
285*4882a593Smuzhiyun 		if (gap > max_mapping_size)
286*4882a593Smuzhiyun 			gap = max_mapping_size;
287*4882a593Smuzhiyun 		previous_size = mapping_size;
288*4882a593Smuzhiyun 		prev_exec = exec;
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 		if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
291*4882a593Smuzhiyun 		    mmu_psize_defs[MMU_PAGE_1G].shift) {
292*4882a593Smuzhiyun 			mapping_size = PUD_SIZE;
293*4882a593Smuzhiyun 			psize = MMU_PAGE_1G;
294*4882a593Smuzhiyun 		} else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
295*4882a593Smuzhiyun 			   mmu_psize_defs[MMU_PAGE_2M].shift) {
296*4882a593Smuzhiyun 			mapping_size = PMD_SIZE;
297*4882a593Smuzhiyun 			psize = MMU_PAGE_2M;
298*4882a593Smuzhiyun 		} else {
299*4882a593Smuzhiyun 			mapping_size = PAGE_SIZE;
300*4882a593Smuzhiyun 			psize = mmu_virtual_psize;
301*4882a593Smuzhiyun 		}
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 		vaddr = (unsigned long)__va(addr);
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 		if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
306*4882a593Smuzhiyun 		    overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
307*4882a593Smuzhiyun 			prot = PAGE_KERNEL_X;
308*4882a593Smuzhiyun 			exec = true;
309*4882a593Smuzhiyun 		} else {
310*4882a593Smuzhiyun 			prot = _prot;
311*4882a593Smuzhiyun 			exec = false;
312*4882a593Smuzhiyun 		}
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 		if (mapping_size != previous_size || exec != prev_exec) {
315*4882a593Smuzhiyun 			print_mapping(start, addr, previous_size, prev_exec);
316*4882a593Smuzhiyun 			start = addr;
317*4882a593Smuzhiyun 		}
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 		rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
320*4882a593Smuzhiyun 		if (rc)
321*4882a593Smuzhiyun 			return rc;
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 		update_page_count(psize, 1);
324*4882a593Smuzhiyun 	}
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	print_mapping(start, addr, mapping_size, exec);
327*4882a593Smuzhiyun 	return 0;
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun 
radix_init_pgtable(void)330*4882a593Smuzhiyun static void __init radix_init_pgtable(void)
331*4882a593Smuzhiyun {
332*4882a593Smuzhiyun 	unsigned long rts_field;
333*4882a593Smuzhiyun 	phys_addr_t start, end;
334*4882a593Smuzhiyun 	u64 i;
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	/* We don't support slb for radix */
337*4882a593Smuzhiyun 	mmu_slb_size = 0;
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 	/*
340*4882a593Smuzhiyun 	 * Create the linear mapping
341*4882a593Smuzhiyun 	 */
342*4882a593Smuzhiyun 	for_each_mem_range(i, &start, &end) {
343*4882a593Smuzhiyun 		/*
344*4882a593Smuzhiyun 		 * The memblock allocator  is up at this point, so the
345*4882a593Smuzhiyun 		 * page tables will be allocated within the range. No
346*4882a593Smuzhiyun 		 * need or a node (which we don't have yet).
347*4882a593Smuzhiyun 		 */
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun 		if (end >= RADIX_VMALLOC_START) {
350*4882a593Smuzhiyun 			pr_warn("Outside the supported range\n");
351*4882a593Smuzhiyun 			continue;
352*4882a593Smuzhiyun 		}
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 		WARN_ON(create_physical_mapping(start, end,
355*4882a593Smuzhiyun 						radix_mem_block_size,
356*4882a593Smuzhiyun 						-1, PAGE_KERNEL));
357*4882a593Smuzhiyun 	}
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 	/* Find out how many PID bits are supported */
360*4882a593Smuzhiyun 	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
361*4882a593Smuzhiyun 		if (!mmu_pid_bits)
362*4882a593Smuzhiyun 			mmu_pid_bits = 20;
363*4882a593Smuzhiyun 		mmu_base_pid = 1;
364*4882a593Smuzhiyun 	} else if (cpu_has_feature(CPU_FTR_HVMODE)) {
365*4882a593Smuzhiyun 		if (!mmu_pid_bits)
366*4882a593Smuzhiyun 			mmu_pid_bits = 20;
367*4882a593Smuzhiyun #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
368*4882a593Smuzhiyun 		/*
369*4882a593Smuzhiyun 		 * When KVM is possible, we only use the top half of the
370*4882a593Smuzhiyun 		 * PID space to avoid collisions between host and guest PIDs
371*4882a593Smuzhiyun 		 * which can cause problems due to prefetch when exiting the
372*4882a593Smuzhiyun 		 * guest with AIL=3
373*4882a593Smuzhiyun 		 */
374*4882a593Smuzhiyun 		mmu_base_pid = 1 << (mmu_pid_bits - 1);
375*4882a593Smuzhiyun #else
376*4882a593Smuzhiyun 		mmu_base_pid = 1;
377*4882a593Smuzhiyun #endif
378*4882a593Smuzhiyun 	} else {
379*4882a593Smuzhiyun 		/* The guest uses the bottom half of the PID space */
380*4882a593Smuzhiyun 		if (!mmu_pid_bits)
381*4882a593Smuzhiyun 			mmu_pid_bits = 19;
382*4882a593Smuzhiyun 		mmu_base_pid = 1;
383*4882a593Smuzhiyun 	}
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 	/*
386*4882a593Smuzhiyun 	 * Allocate Partition table and process table for the
387*4882a593Smuzhiyun 	 * host.
388*4882a593Smuzhiyun 	 */
389*4882a593Smuzhiyun 	BUG_ON(PRTB_SIZE_SHIFT > 36);
390*4882a593Smuzhiyun 	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
391*4882a593Smuzhiyun 	/*
392*4882a593Smuzhiyun 	 * Fill in the process table.
393*4882a593Smuzhiyun 	 */
394*4882a593Smuzhiyun 	rts_field = radix__get_tree_size();
395*4882a593Smuzhiyun 	process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	/*
398*4882a593Smuzhiyun 	 * The init_mm context is given the first available (non-zero) PID,
399*4882a593Smuzhiyun 	 * which is the "guard PID" and contains no page table. PIDR should
400*4882a593Smuzhiyun 	 * never be set to zero because that duplicates the kernel address
401*4882a593Smuzhiyun 	 * space at the 0x0... offset (quadrant 0)!
402*4882a593Smuzhiyun 	 *
403*4882a593Smuzhiyun 	 * An arbitrary PID that may later be allocated by the PID allocator
404*4882a593Smuzhiyun 	 * for userspace processes must not be used either, because that
405*4882a593Smuzhiyun 	 * would cause stale user mappings for that PID on CPUs outside of
406*4882a593Smuzhiyun 	 * the TLB invalidation scheme (because it won't be in mm_cpumask).
407*4882a593Smuzhiyun 	 *
408*4882a593Smuzhiyun 	 * So permanently carve out one PID for the purpose of a guard PID.
409*4882a593Smuzhiyun 	 */
410*4882a593Smuzhiyun 	init_mm.context.id = mmu_base_pid;
411*4882a593Smuzhiyun 	mmu_base_pid++;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun 
radix_init_partition_table(void)414*4882a593Smuzhiyun static void __init radix_init_partition_table(void)
415*4882a593Smuzhiyun {
416*4882a593Smuzhiyun 	unsigned long rts_field, dw0, dw1;
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	mmu_partition_table_init();
419*4882a593Smuzhiyun 	rts_field = radix__get_tree_size();
420*4882a593Smuzhiyun 	dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
421*4882a593Smuzhiyun 	dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
422*4882a593Smuzhiyun 	mmu_partition_table_set_entry(0, dw0, dw1, false);
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 	pr_info("Initializing Radix MMU\n");
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun 
get_idx_from_shift(unsigned int shift)427*4882a593Smuzhiyun static int __init get_idx_from_shift(unsigned int shift)
428*4882a593Smuzhiyun {
429*4882a593Smuzhiyun 	int idx = -1;
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun 	switch (shift) {
432*4882a593Smuzhiyun 	case 0xc:
433*4882a593Smuzhiyun 		idx = MMU_PAGE_4K;
434*4882a593Smuzhiyun 		break;
435*4882a593Smuzhiyun 	case 0x10:
436*4882a593Smuzhiyun 		idx = MMU_PAGE_64K;
437*4882a593Smuzhiyun 		break;
438*4882a593Smuzhiyun 	case 0x15:
439*4882a593Smuzhiyun 		idx = MMU_PAGE_2M;
440*4882a593Smuzhiyun 		break;
441*4882a593Smuzhiyun 	case 0x1e:
442*4882a593Smuzhiyun 		idx = MMU_PAGE_1G;
443*4882a593Smuzhiyun 		break;
444*4882a593Smuzhiyun 	}
445*4882a593Smuzhiyun 	return idx;
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun 
radix_dt_scan_page_sizes(unsigned long node,const char * uname,int depth,void * data)448*4882a593Smuzhiyun static int __init radix_dt_scan_page_sizes(unsigned long node,
449*4882a593Smuzhiyun 					   const char *uname, int depth,
450*4882a593Smuzhiyun 					   void *data)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun 	int size = 0;
453*4882a593Smuzhiyun 	int shift, idx;
454*4882a593Smuzhiyun 	unsigned int ap;
455*4882a593Smuzhiyun 	const __be32 *prop;
456*4882a593Smuzhiyun 	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 	/* We are scanning "cpu" nodes only */
459*4882a593Smuzhiyun 	if (type == NULL || strcmp(type, "cpu") != 0)
460*4882a593Smuzhiyun 		return 0;
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun 	/* Find MMU PID size */
463*4882a593Smuzhiyun 	prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
464*4882a593Smuzhiyun 	if (prop && size == 4)
465*4882a593Smuzhiyun 		mmu_pid_bits = be32_to_cpup(prop);
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 	/* Grab page size encodings */
468*4882a593Smuzhiyun 	prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
469*4882a593Smuzhiyun 	if (!prop)
470*4882a593Smuzhiyun 		return 0;
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 	pr_info("Page sizes from device-tree:\n");
473*4882a593Smuzhiyun 	for (; size >= 4; size -= 4, ++prop) {
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 		struct mmu_psize_def *def;
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun 		/* top 3 bit is AP encoding */
478*4882a593Smuzhiyun 		shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
479*4882a593Smuzhiyun 		ap = be32_to_cpu(prop[0]) >> 29;
480*4882a593Smuzhiyun 		pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 		idx = get_idx_from_shift(shift);
483*4882a593Smuzhiyun 		if (idx < 0)
484*4882a593Smuzhiyun 			continue;
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 		def = &mmu_psize_defs[idx];
487*4882a593Smuzhiyun 		def->shift = shift;
488*4882a593Smuzhiyun 		def->ap  = ap;
489*4882a593Smuzhiyun 	}
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	/* needed ? */
492*4882a593Smuzhiyun 	cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
493*4882a593Smuzhiyun 	return 1;
494*4882a593Smuzhiyun }
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
probe_memory_block_size(unsigned long node,const char * uname,int depth,void * data)497*4882a593Smuzhiyun static int __init probe_memory_block_size(unsigned long node, const char *uname, int
498*4882a593Smuzhiyun 					  depth, void *data)
499*4882a593Smuzhiyun {
500*4882a593Smuzhiyun 	unsigned long *mem_block_size = (unsigned long *)data;
501*4882a593Smuzhiyun 	const __be32 *prop;
502*4882a593Smuzhiyun 	int len;
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun 	if (depth != 1)
505*4882a593Smuzhiyun 		return 0;
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun 	if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
508*4882a593Smuzhiyun 		return 0;
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun 	if (!prop || len < dt_root_size_cells * sizeof(__be32))
513*4882a593Smuzhiyun 		/*
514*4882a593Smuzhiyun 		 * Nothing in the device tree
515*4882a593Smuzhiyun 		 */
516*4882a593Smuzhiyun 		*mem_block_size = MIN_MEMORY_BLOCK_SIZE;
517*4882a593Smuzhiyun 	else
518*4882a593Smuzhiyun 		*mem_block_size = of_read_number(prop, dt_root_size_cells);
519*4882a593Smuzhiyun 	return 1;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun 
radix_memory_block_size(void)522*4882a593Smuzhiyun static unsigned long radix_memory_block_size(void)
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun 	unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun 	/*
527*4882a593Smuzhiyun 	 * OPAL firmware feature is set by now. Hence we are ok
528*4882a593Smuzhiyun 	 * to test OPAL feature.
529*4882a593Smuzhiyun 	 */
530*4882a593Smuzhiyun 	if (firmware_has_feature(FW_FEATURE_OPAL))
531*4882a593Smuzhiyun 		mem_block_size = 1UL * 1024 * 1024 * 1024;
532*4882a593Smuzhiyun 	else
533*4882a593Smuzhiyun 		of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun 	return mem_block_size;
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun #else   /* CONFIG_MEMORY_HOTPLUG */
539*4882a593Smuzhiyun 
radix_memory_block_size(void)540*4882a593Smuzhiyun static unsigned long radix_memory_block_size(void)
541*4882a593Smuzhiyun {
542*4882a593Smuzhiyun 	return 1UL * 1024 * 1024 * 1024;
543*4882a593Smuzhiyun }
544*4882a593Smuzhiyun 
545*4882a593Smuzhiyun #endif /* CONFIG_MEMORY_HOTPLUG */
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun 
radix__early_init_devtree(void)548*4882a593Smuzhiyun void __init radix__early_init_devtree(void)
549*4882a593Smuzhiyun {
550*4882a593Smuzhiyun 	int rc;
551*4882a593Smuzhiyun 
552*4882a593Smuzhiyun 	/*
553*4882a593Smuzhiyun 	 * Try to find the available page sizes in the device-tree
554*4882a593Smuzhiyun 	 */
555*4882a593Smuzhiyun 	rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
556*4882a593Smuzhiyun 	if (!rc) {
557*4882a593Smuzhiyun 		/*
558*4882a593Smuzhiyun 		 * No page size details found in device tree.
559*4882a593Smuzhiyun 		 * Let's assume we have page 4k and 64k support
560*4882a593Smuzhiyun 		 */
561*4882a593Smuzhiyun 		mmu_psize_defs[MMU_PAGE_4K].shift = 12;
562*4882a593Smuzhiyun 		mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 		mmu_psize_defs[MMU_PAGE_64K].shift = 16;
565*4882a593Smuzhiyun 		mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
566*4882a593Smuzhiyun 	}
567*4882a593Smuzhiyun 
568*4882a593Smuzhiyun 	/*
569*4882a593Smuzhiyun 	 * Max mapping size used when mapping pages. We don't use
570*4882a593Smuzhiyun 	 * ppc_md.memory_block_size() here because this get called
571*4882a593Smuzhiyun 	 * early and we don't have machine probe called yet. Also
572*4882a593Smuzhiyun 	 * the pseries implementation only check for ibm,lmb-size.
573*4882a593Smuzhiyun 	 * All hypervisor supporting radix do expose that device
574*4882a593Smuzhiyun 	 * tree node.
575*4882a593Smuzhiyun 	 */
576*4882a593Smuzhiyun 	radix_mem_block_size = radix_memory_block_size();
577*4882a593Smuzhiyun 	return;
578*4882a593Smuzhiyun }
579*4882a593Smuzhiyun 
radix_init_amor(void)580*4882a593Smuzhiyun static void radix_init_amor(void)
581*4882a593Smuzhiyun {
582*4882a593Smuzhiyun 	/*
583*4882a593Smuzhiyun 	* In HV mode, we init AMOR (Authority Mask Override Register) so that
584*4882a593Smuzhiyun 	* the hypervisor and guest can setup IAMR (Instruction Authority Mask
585*4882a593Smuzhiyun 	* Register), enable key 0 and set it to 1.
586*4882a593Smuzhiyun 	*
587*4882a593Smuzhiyun 	* AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
588*4882a593Smuzhiyun 	*/
589*4882a593Smuzhiyun 	mtspr(SPRN_AMOR, (3ul << 62));
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun #ifdef CONFIG_PPC_KUEP
setup_kuep(bool disabled)593*4882a593Smuzhiyun void setup_kuep(bool disabled)
594*4882a593Smuzhiyun {
595*4882a593Smuzhiyun 	if (disabled || !early_radix_enabled())
596*4882a593Smuzhiyun 		return;
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun 	if (smp_processor_id() == boot_cpuid) {
599*4882a593Smuzhiyun 		pr_info("Activating Kernel Userspace Execution Prevention\n");
600*4882a593Smuzhiyun 		cur_cpu_spec->mmu_features |= MMU_FTR_KUEP;
601*4882a593Smuzhiyun 	}
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	/*
604*4882a593Smuzhiyun 	 * Radix always uses key0 of the IAMR to determine if an access is
605*4882a593Smuzhiyun 	 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
606*4882a593Smuzhiyun 	 * fetch.
607*4882a593Smuzhiyun 	 */
608*4882a593Smuzhiyun 	mtspr(SPRN_IAMR, (1ul << 62));
609*4882a593Smuzhiyun }
610*4882a593Smuzhiyun #endif
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun #ifdef CONFIG_PPC_KUAP
setup_kuap(bool disabled)613*4882a593Smuzhiyun void setup_kuap(bool disabled)
614*4882a593Smuzhiyun {
615*4882a593Smuzhiyun 	if (disabled || !early_radix_enabled())
616*4882a593Smuzhiyun 		return;
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun 	if (smp_processor_id() == boot_cpuid) {
619*4882a593Smuzhiyun 		pr_info("Activating Kernel Userspace Access Prevention\n");
620*4882a593Smuzhiyun 		cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
621*4882a593Smuzhiyun 	}
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	/* Make sure userspace can't change the AMR */
624*4882a593Smuzhiyun 	mtspr(SPRN_UAMOR, 0);
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun 	/*
627*4882a593Smuzhiyun 	 * Set the default kernel AMR values on all cpus.
628*4882a593Smuzhiyun 	 */
629*4882a593Smuzhiyun 	mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
630*4882a593Smuzhiyun 	isync();
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun #endif
633*4882a593Smuzhiyun 
radix__early_init_mmu(void)634*4882a593Smuzhiyun void __init radix__early_init_mmu(void)
635*4882a593Smuzhiyun {
636*4882a593Smuzhiyun 	unsigned long lpcr;
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun #ifdef CONFIG_PPC_64K_PAGES
639*4882a593Smuzhiyun 	/* PAGE_SIZE mappings */
640*4882a593Smuzhiyun 	mmu_virtual_psize = MMU_PAGE_64K;
641*4882a593Smuzhiyun #else
642*4882a593Smuzhiyun 	mmu_virtual_psize = MMU_PAGE_4K;
643*4882a593Smuzhiyun #endif
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
646*4882a593Smuzhiyun 	/* vmemmap mapping */
647*4882a593Smuzhiyun 	if (mmu_psize_defs[MMU_PAGE_2M].shift) {
648*4882a593Smuzhiyun 		/*
649*4882a593Smuzhiyun 		 * map vmemmap using 2M if available
650*4882a593Smuzhiyun 		 */
651*4882a593Smuzhiyun 		mmu_vmemmap_psize = MMU_PAGE_2M;
652*4882a593Smuzhiyun 	} else
653*4882a593Smuzhiyun 		mmu_vmemmap_psize = mmu_virtual_psize;
654*4882a593Smuzhiyun #endif
655*4882a593Smuzhiyun 	/*
656*4882a593Smuzhiyun 	 * initialize page table size
657*4882a593Smuzhiyun 	 */
658*4882a593Smuzhiyun 	__pte_index_size = RADIX_PTE_INDEX_SIZE;
659*4882a593Smuzhiyun 	__pmd_index_size = RADIX_PMD_INDEX_SIZE;
660*4882a593Smuzhiyun 	__pud_index_size = RADIX_PUD_INDEX_SIZE;
661*4882a593Smuzhiyun 	__pgd_index_size = RADIX_PGD_INDEX_SIZE;
662*4882a593Smuzhiyun 	__pud_cache_index = RADIX_PUD_INDEX_SIZE;
663*4882a593Smuzhiyun 	__pte_table_size = RADIX_PTE_TABLE_SIZE;
664*4882a593Smuzhiyun 	__pmd_table_size = RADIX_PMD_TABLE_SIZE;
665*4882a593Smuzhiyun 	__pud_table_size = RADIX_PUD_TABLE_SIZE;
666*4882a593Smuzhiyun 	__pgd_table_size = RADIX_PGD_TABLE_SIZE;
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	__pmd_val_bits = RADIX_PMD_VAL_BITS;
669*4882a593Smuzhiyun 	__pud_val_bits = RADIX_PUD_VAL_BITS;
670*4882a593Smuzhiyun 	__pgd_val_bits = RADIX_PGD_VAL_BITS;
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun 	__kernel_virt_start = RADIX_KERN_VIRT_START;
673*4882a593Smuzhiyun 	__vmalloc_start = RADIX_VMALLOC_START;
674*4882a593Smuzhiyun 	__vmalloc_end = RADIX_VMALLOC_END;
675*4882a593Smuzhiyun 	__kernel_io_start = RADIX_KERN_IO_START;
676*4882a593Smuzhiyun 	__kernel_io_end = RADIX_KERN_IO_END;
677*4882a593Smuzhiyun 	vmemmap = (struct page *)RADIX_VMEMMAP_START;
678*4882a593Smuzhiyun 	ioremap_bot = IOREMAP_BASE;
679*4882a593Smuzhiyun 
680*4882a593Smuzhiyun #ifdef CONFIG_PCI
681*4882a593Smuzhiyun 	pci_io_base = ISA_IO_BASE;
682*4882a593Smuzhiyun #endif
683*4882a593Smuzhiyun 	__pte_frag_nr = RADIX_PTE_FRAG_NR;
684*4882a593Smuzhiyun 	__pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
685*4882a593Smuzhiyun 	__pmd_frag_nr = RADIX_PMD_FRAG_NR;
686*4882a593Smuzhiyun 	__pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
687*4882a593Smuzhiyun 
688*4882a593Smuzhiyun 	radix_init_pgtable();
689*4882a593Smuzhiyun 
690*4882a593Smuzhiyun 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
691*4882a593Smuzhiyun 		lpcr = mfspr(SPRN_LPCR);
692*4882a593Smuzhiyun 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
693*4882a593Smuzhiyun 		radix_init_partition_table();
694*4882a593Smuzhiyun 		radix_init_amor();
695*4882a593Smuzhiyun 	} else {
696*4882a593Smuzhiyun 		radix_init_pseries();
697*4882a593Smuzhiyun 	}
698*4882a593Smuzhiyun 
699*4882a593Smuzhiyun 	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun 	/* Switch to the guard PID before turning on MMU */
702*4882a593Smuzhiyun 	radix__switch_mmu_context(NULL, &init_mm);
703*4882a593Smuzhiyun 	tlbiel_all();
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun 
radix__early_init_mmu_secondary(void)706*4882a593Smuzhiyun void radix__early_init_mmu_secondary(void)
707*4882a593Smuzhiyun {
708*4882a593Smuzhiyun 	unsigned long lpcr;
709*4882a593Smuzhiyun 	/*
710*4882a593Smuzhiyun 	 * update partition table control register and UPRT
711*4882a593Smuzhiyun 	 */
712*4882a593Smuzhiyun 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
713*4882a593Smuzhiyun 		lpcr = mfspr(SPRN_LPCR);
714*4882a593Smuzhiyun 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
715*4882a593Smuzhiyun 
716*4882a593Smuzhiyun 		set_ptcr_when_no_uv(__pa(partition_tb) |
717*4882a593Smuzhiyun 				    (PATB_SIZE_SHIFT - 12));
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 		radix_init_amor();
720*4882a593Smuzhiyun 	}
721*4882a593Smuzhiyun 
722*4882a593Smuzhiyun 	radix__switch_mmu_context(NULL, &init_mm);
723*4882a593Smuzhiyun 	tlbiel_all();
724*4882a593Smuzhiyun }
725*4882a593Smuzhiyun 
radix__mmu_cleanup_all(void)726*4882a593Smuzhiyun void radix__mmu_cleanup_all(void)
727*4882a593Smuzhiyun {
728*4882a593Smuzhiyun 	unsigned long lpcr;
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
731*4882a593Smuzhiyun 		lpcr = mfspr(SPRN_LPCR);
732*4882a593Smuzhiyun 		mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
733*4882a593Smuzhiyun 		set_ptcr_when_no_uv(0);
734*4882a593Smuzhiyun 		powernv_set_nmmu_ptcr(0);
735*4882a593Smuzhiyun 		radix__flush_tlb_all();
736*4882a593Smuzhiyun 	}
737*4882a593Smuzhiyun }
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
free_pte_table(pte_t * pte_start,pmd_t * pmd)740*4882a593Smuzhiyun static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
741*4882a593Smuzhiyun {
742*4882a593Smuzhiyun 	pte_t *pte;
743*4882a593Smuzhiyun 	int i;
744*4882a593Smuzhiyun 
745*4882a593Smuzhiyun 	for (i = 0; i < PTRS_PER_PTE; i++) {
746*4882a593Smuzhiyun 		pte = pte_start + i;
747*4882a593Smuzhiyun 		if (!pte_none(*pte))
748*4882a593Smuzhiyun 			return;
749*4882a593Smuzhiyun 	}
750*4882a593Smuzhiyun 
751*4882a593Smuzhiyun 	pte_free_kernel(&init_mm, pte_start);
752*4882a593Smuzhiyun 	pmd_clear(pmd);
753*4882a593Smuzhiyun }
754*4882a593Smuzhiyun 
free_pmd_table(pmd_t * pmd_start,pud_t * pud)755*4882a593Smuzhiyun static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
756*4882a593Smuzhiyun {
757*4882a593Smuzhiyun 	pmd_t *pmd;
758*4882a593Smuzhiyun 	int i;
759*4882a593Smuzhiyun 
760*4882a593Smuzhiyun 	for (i = 0; i < PTRS_PER_PMD; i++) {
761*4882a593Smuzhiyun 		pmd = pmd_start + i;
762*4882a593Smuzhiyun 		if (!pmd_none(*pmd))
763*4882a593Smuzhiyun 			return;
764*4882a593Smuzhiyun 	}
765*4882a593Smuzhiyun 
766*4882a593Smuzhiyun 	pmd_free(&init_mm, pmd_start);
767*4882a593Smuzhiyun 	pud_clear(pud);
768*4882a593Smuzhiyun }
769*4882a593Smuzhiyun 
free_pud_table(pud_t * pud_start,p4d_t * p4d)770*4882a593Smuzhiyun static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
771*4882a593Smuzhiyun {
772*4882a593Smuzhiyun 	pud_t *pud;
773*4882a593Smuzhiyun 	int i;
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 	for (i = 0; i < PTRS_PER_PUD; i++) {
776*4882a593Smuzhiyun 		pud = pud_start + i;
777*4882a593Smuzhiyun 		if (!pud_none(*pud))
778*4882a593Smuzhiyun 			return;
779*4882a593Smuzhiyun 	}
780*4882a593Smuzhiyun 
781*4882a593Smuzhiyun 	pud_free(&init_mm, pud_start);
782*4882a593Smuzhiyun 	p4d_clear(p4d);
783*4882a593Smuzhiyun }
784*4882a593Smuzhiyun 
remove_pte_table(pte_t * pte_start,unsigned long addr,unsigned long end)785*4882a593Smuzhiyun static void remove_pte_table(pte_t *pte_start, unsigned long addr,
786*4882a593Smuzhiyun 			     unsigned long end)
787*4882a593Smuzhiyun {
788*4882a593Smuzhiyun 	unsigned long next;
789*4882a593Smuzhiyun 	pte_t *pte;
790*4882a593Smuzhiyun 
791*4882a593Smuzhiyun 	pte = pte_start + pte_index(addr);
792*4882a593Smuzhiyun 	for (; addr < end; addr = next, pte++) {
793*4882a593Smuzhiyun 		next = (addr + PAGE_SIZE) & PAGE_MASK;
794*4882a593Smuzhiyun 		if (next > end)
795*4882a593Smuzhiyun 			next = end;
796*4882a593Smuzhiyun 
797*4882a593Smuzhiyun 		if (!pte_present(*pte))
798*4882a593Smuzhiyun 			continue;
799*4882a593Smuzhiyun 
800*4882a593Smuzhiyun 		if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
801*4882a593Smuzhiyun 			/*
802*4882a593Smuzhiyun 			 * The vmemmap_free() and remove_section_mapping()
803*4882a593Smuzhiyun 			 * codepaths call us with aligned addresses.
804*4882a593Smuzhiyun 			 */
805*4882a593Smuzhiyun 			WARN_ONCE(1, "%s: unaligned range\n", __func__);
806*4882a593Smuzhiyun 			continue;
807*4882a593Smuzhiyun 		}
808*4882a593Smuzhiyun 
809*4882a593Smuzhiyun 		pte_clear(&init_mm, addr, pte);
810*4882a593Smuzhiyun 	}
811*4882a593Smuzhiyun }
812*4882a593Smuzhiyun 
remove_pmd_table(pmd_t * pmd_start,unsigned long addr,unsigned long end)813*4882a593Smuzhiyun static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
814*4882a593Smuzhiyun 			     unsigned long end)
815*4882a593Smuzhiyun {
816*4882a593Smuzhiyun 	unsigned long next;
817*4882a593Smuzhiyun 	pte_t *pte_base;
818*4882a593Smuzhiyun 	pmd_t *pmd;
819*4882a593Smuzhiyun 
820*4882a593Smuzhiyun 	pmd = pmd_start + pmd_index(addr);
821*4882a593Smuzhiyun 	for (; addr < end; addr = next, pmd++) {
822*4882a593Smuzhiyun 		next = pmd_addr_end(addr, end);
823*4882a593Smuzhiyun 
824*4882a593Smuzhiyun 		if (!pmd_present(*pmd))
825*4882a593Smuzhiyun 			continue;
826*4882a593Smuzhiyun 
827*4882a593Smuzhiyun 		if (pmd_is_leaf(*pmd)) {
828*4882a593Smuzhiyun 			if (!IS_ALIGNED(addr, PMD_SIZE) ||
829*4882a593Smuzhiyun 			    !IS_ALIGNED(next, PMD_SIZE)) {
830*4882a593Smuzhiyun 				WARN_ONCE(1, "%s: unaligned range\n", __func__);
831*4882a593Smuzhiyun 				continue;
832*4882a593Smuzhiyun 			}
833*4882a593Smuzhiyun 			pte_clear(&init_mm, addr, (pte_t *)pmd);
834*4882a593Smuzhiyun 			continue;
835*4882a593Smuzhiyun 		}
836*4882a593Smuzhiyun 
837*4882a593Smuzhiyun 		pte_base = (pte_t *)pmd_page_vaddr(*pmd);
838*4882a593Smuzhiyun 		remove_pte_table(pte_base, addr, next);
839*4882a593Smuzhiyun 		free_pte_table(pte_base, pmd);
840*4882a593Smuzhiyun 	}
841*4882a593Smuzhiyun }
842*4882a593Smuzhiyun 
remove_pud_table(pud_t * pud_start,unsigned long addr,unsigned long end)843*4882a593Smuzhiyun static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
844*4882a593Smuzhiyun 			     unsigned long end)
845*4882a593Smuzhiyun {
846*4882a593Smuzhiyun 	unsigned long next;
847*4882a593Smuzhiyun 	pmd_t *pmd_base;
848*4882a593Smuzhiyun 	pud_t *pud;
849*4882a593Smuzhiyun 
850*4882a593Smuzhiyun 	pud = pud_start + pud_index(addr);
851*4882a593Smuzhiyun 	for (; addr < end; addr = next, pud++) {
852*4882a593Smuzhiyun 		next = pud_addr_end(addr, end);
853*4882a593Smuzhiyun 
854*4882a593Smuzhiyun 		if (!pud_present(*pud))
855*4882a593Smuzhiyun 			continue;
856*4882a593Smuzhiyun 
857*4882a593Smuzhiyun 		if (pud_is_leaf(*pud)) {
858*4882a593Smuzhiyun 			if (!IS_ALIGNED(addr, PUD_SIZE) ||
859*4882a593Smuzhiyun 			    !IS_ALIGNED(next, PUD_SIZE)) {
860*4882a593Smuzhiyun 				WARN_ONCE(1, "%s: unaligned range\n", __func__);
861*4882a593Smuzhiyun 				continue;
862*4882a593Smuzhiyun 			}
863*4882a593Smuzhiyun 			pte_clear(&init_mm, addr, (pte_t *)pud);
864*4882a593Smuzhiyun 			continue;
865*4882a593Smuzhiyun 		}
866*4882a593Smuzhiyun 
867*4882a593Smuzhiyun 		pmd_base = (pmd_t *)pud_page_vaddr(*pud);
868*4882a593Smuzhiyun 		remove_pmd_table(pmd_base, addr, next);
869*4882a593Smuzhiyun 		free_pmd_table(pmd_base, pud);
870*4882a593Smuzhiyun 	}
871*4882a593Smuzhiyun }
872*4882a593Smuzhiyun 
remove_pagetable(unsigned long start,unsigned long end)873*4882a593Smuzhiyun static void __meminit remove_pagetable(unsigned long start, unsigned long end)
874*4882a593Smuzhiyun {
875*4882a593Smuzhiyun 	unsigned long addr, next;
876*4882a593Smuzhiyun 	pud_t *pud_base;
877*4882a593Smuzhiyun 	pgd_t *pgd;
878*4882a593Smuzhiyun 	p4d_t *p4d;
879*4882a593Smuzhiyun 
880*4882a593Smuzhiyun 	spin_lock(&init_mm.page_table_lock);
881*4882a593Smuzhiyun 
882*4882a593Smuzhiyun 	for (addr = start; addr < end; addr = next) {
883*4882a593Smuzhiyun 		next = pgd_addr_end(addr, end);
884*4882a593Smuzhiyun 
885*4882a593Smuzhiyun 		pgd = pgd_offset_k(addr);
886*4882a593Smuzhiyun 		p4d = p4d_offset(pgd, addr);
887*4882a593Smuzhiyun 		if (!p4d_present(*p4d))
888*4882a593Smuzhiyun 			continue;
889*4882a593Smuzhiyun 
890*4882a593Smuzhiyun 		if (p4d_is_leaf(*p4d)) {
891*4882a593Smuzhiyun 			if (!IS_ALIGNED(addr, P4D_SIZE) ||
892*4882a593Smuzhiyun 			    !IS_ALIGNED(next, P4D_SIZE)) {
893*4882a593Smuzhiyun 				WARN_ONCE(1, "%s: unaligned range\n", __func__);
894*4882a593Smuzhiyun 				continue;
895*4882a593Smuzhiyun 			}
896*4882a593Smuzhiyun 
897*4882a593Smuzhiyun 			pte_clear(&init_mm, addr, (pte_t *)pgd);
898*4882a593Smuzhiyun 			continue;
899*4882a593Smuzhiyun 		}
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 		pud_base = (pud_t *)p4d_page_vaddr(*p4d);
902*4882a593Smuzhiyun 		remove_pud_table(pud_base, addr, next);
903*4882a593Smuzhiyun 		free_pud_table(pud_base, p4d);
904*4882a593Smuzhiyun 	}
905*4882a593Smuzhiyun 
906*4882a593Smuzhiyun 	spin_unlock(&init_mm.page_table_lock);
907*4882a593Smuzhiyun 	radix__flush_tlb_kernel_range(start, end);
908*4882a593Smuzhiyun }
909*4882a593Smuzhiyun 
radix__create_section_mapping(unsigned long start,unsigned long end,int nid,pgprot_t prot)910*4882a593Smuzhiyun int __meminit radix__create_section_mapping(unsigned long start,
911*4882a593Smuzhiyun 					    unsigned long end, int nid,
912*4882a593Smuzhiyun 					    pgprot_t prot)
913*4882a593Smuzhiyun {
914*4882a593Smuzhiyun 	if (end >= RADIX_VMALLOC_START) {
915*4882a593Smuzhiyun 		pr_warn("Outside the supported range\n");
916*4882a593Smuzhiyun 		return -1;
917*4882a593Smuzhiyun 	}
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 	return create_physical_mapping(__pa(start), __pa(end),
920*4882a593Smuzhiyun 				       radix_mem_block_size, nid, prot);
921*4882a593Smuzhiyun }
922*4882a593Smuzhiyun 
radix__remove_section_mapping(unsigned long start,unsigned long end)923*4882a593Smuzhiyun int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
924*4882a593Smuzhiyun {
925*4882a593Smuzhiyun 	remove_pagetable(start, end);
926*4882a593Smuzhiyun 	return 0;
927*4882a593Smuzhiyun }
928*4882a593Smuzhiyun #endif /* CONFIG_MEMORY_HOTPLUG */
929*4882a593Smuzhiyun 
930*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
__map_kernel_page_nid(unsigned long ea,unsigned long pa,pgprot_t flags,unsigned int map_page_size,int nid)931*4882a593Smuzhiyun static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
932*4882a593Smuzhiyun 				 pgprot_t flags, unsigned int map_page_size,
933*4882a593Smuzhiyun 				 int nid)
934*4882a593Smuzhiyun {
935*4882a593Smuzhiyun 	return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
936*4882a593Smuzhiyun }
937*4882a593Smuzhiyun 
radix__vmemmap_create_mapping(unsigned long start,unsigned long page_size,unsigned long phys)938*4882a593Smuzhiyun int __meminit radix__vmemmap_create_mapping(unsigned long start,
939*4882a593Smuzhiyun 				      unsigned long page_size,
940*4882a593Smuzhiyun 				      unsigned long phys)
941*4882a593Smuzhiyun {
942*4882a593Smuzhiyun 	/* Create a PTE encoding */
943*4882a593Smuzhiyun 	unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
944*4882a593Smuzhiyun 	int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
945*4882a593Smuzhiyun 	int ret;
946*4882a593Smuzhiyun 
947*4882a593Smuzhiyun 	if ((start + page_size) >= RADIX_VMEMMAP_END) {
948*4882a593Smuzhiyun 		pr_warn("Outside the supported range\n");
949*4882a593Smuzhiyun 		return -1;
950*4882a593Smuzhiyun 	}
951*4882a593Smuzhiyun 
952*4882a593Smuzhiyun 	ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
953*4882a593Smuzhiyun 	BUG_ON(ret);
954*4882a593Smuzhiyun 
955*4882a593Smuzhiyun 	return 0;
956*4882a593Smuzhiyun }
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
radix__vmemmap_remove_mapping(unsigned long start,unsigned long page_size)959*4882a593Smuzhiyun void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
960*4882a593Smuzhiyun {
961*4882a593Smuzhiyun 	remove_pagetable(start, start + page_size);
962*4882a593Smuzhiyun }
963*4882a593Smuzhiyun #endif
964*4882a593Smuzhiyun #endif
965*4882a593Smuzhiyun 
966*4882a593Smuzhiyun #ifdef CONFIG_TRANSPARENT_HUGEPAGE
967*4882a593Smuzhiyun 
radix__pmd_hugepage_update(struct mm_struct * mm,unsigned long addr,pmd_t * pmdp,unsigned long clr,unsigned long set)968*4882a593Smuzhiyun unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
969*4882a593Smuzhiyun 				  pmd_t *pmdp, unsigned long clr,
970*4882a593Smuzhiyun 				  unsigned long set)
971*4882a593Smuzhiyun {
972*4882a593Smuzhiyun 	unsigned long old;
973*4882a593Smuzhiyun 
974*4882a593Smuzhiyun #ifdef CONFIG_DEBUG_VM
975*4882a593Smuzhiyun 	WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
976*4882a593Smuzhiyun 	assert_spin_locked(pmd_lockptr(mm, pmdp));
977*4882a593Smuzhiyun #endif
978*4882a593Smuzhiyun 
979*4882a593Smuzhiyun 	old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
980*4882a593Smuzhiyun 	trace_hugepage_update(addr, old, clr, set);
981*4882a593Smuzhiyun 
982*4882a593Smuzhiyun 	return old;
983*4882a593Smuzhiyun }
984*4882a593Smuzhiyun 
radix__pmdp_collapse_flush(struct vm_area_struct * vma,unsigned long address,pmd_t * pmdp)985*4882a593Smuzhiyun pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
986*4882a593Smuzhiyun 			pmd_t *pmdp)
987*4882a593Smuzhiyun 
988*4882a593Smuzhiyun {
989*4882a593Smuzhiyun 	pmd_t pmd;
990*4882a593Smuzhiyun 
991*4882a593Smuzhiyun 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
992*4882a593Smuzhiyun 	VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
993*4882a593Smuzhiyun 	VM_BUG_ON(pmd_devmap(*pmdp));
994*4882a593Smuzhiyun 	/*
995*4882a593Smuzhiyun 	 * khugepaged calls this for normal pmd
996*4882a593Smuzhiyun 	 */
997*4882a593Smuzhiyun 	pmd = *pmdp;
998*4882a593Smuzhiyun 	pmd_clear(pmdp);
999*4882a593Smuzhiyun 
1000*4882a593Smuzhiyun 	radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
1001*4882a593Smuzhiyun 
1002*4882a593Smuzhiyun 	return pmd;
1003*4882a593Smuzhiyun }
1004*4882a593Smuzhiyun 
1005*4882a593Smuzhiyun /*
1006*4882a593Smuzhiyun  * For us pgtable_t is pte_t *. Inorder to save the deposisted
1007*4882a593Smuzhiyun  * page table, we consider the allocated page table as a list
1008*4882a593Smuzhiyun  * head. On withdraw we need to make sure we zero out the used
1009*4882a593Smuzhiyun  * list_head memory area.
1010*4882a593Smuzhiyun  */
radix__pgtable_trans_huge_deposit(struct mm_struct * mm,pmd_t * pmdp,pgtable_t pgtable)1011*4882a593Smuzhiyun void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
1012*4882a593Smuzhiyun 				 pgtable_t pgtable)
1013*4882a593Smuzhiyun {
1014*4882a593Smuzhiyun 	struct list_head *lh = (struct list_head *) pgtable;
1015*4882a593Smuzhiyun 
1016*4882a593Smuzhiyun 	assert_spin_locked(pmd_lockptr(mm, pmdp));
1017*4882a593Smuzhiyun 
1018*4882a593Smuzhiyun 	/* FIFO */
1019*4882a593Smuzhiyun 	if (!pmd_huge_pte(mm, pmdp))
1020*4882a593Smuzhiyun 		INIT_LIST_HEAD(lh);
1021*4882a593Smuzhiyun 	else
1022*4882a593Smuzhiyun 		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
1023*4882a593Smuzhiyun 	pmd_huge_pte(mm, pmdp) = pgtable;
1024*4882a593Smuzhiyun }
1025*4882a593Smuzhiyun 
radix__pgtable_trans_huge_withdraw(struct mm_struct * mm,pmd_t * pmdp)1026*4882a593Smuzhiyun pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
1027*4882a593Smuzhiyun {
1028*4882a593Smuzhiyun 	pte_t *ptep;
1029*4882a593Smuzhiyun 	pgtable_t pgtable;
1030*4882a593Smuzhiyun 	struct list_head *lh;
1031*4882a593Smuzhiyun 
1032*4882a593Smuzhiyun 	assert_spin_locked(pmd_lockptr(mm, pmdp));
1033*4882a593Smuzhiyun 
1034*4882a593Smuzhiyun 	/* FIFO */
1035*4882a593Smuzhiyun 	pgtable = pmd_huge_pte(mm, pmdp);
1036*4882a593Smuzhiyun 	lh = (struct list_head *) pgtable;
1037*4882a593Smuzhiyun 	if (list_empty(lh))
1038*4882a593Smuzhiyun 		pmd_huge_pte(mm, pmdp) = NULL;
1039*4882a593Smuzhiyun 	else {
1040*4882a593Smuzhiyun 		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
1041*4882a593Smuzhiyun 		list_del(lh);
1042*4882a593Smuzhiyun 	}
1043*4882a593Smuzhiyun 	ptep = (pte_t *) pgtable;
1044*4882a593Smuzhiyun 	*ptep = __pte(0);
1045*4882a593Smuzhiyun 	ptep++;
1046*4882a593Smuzhiyun 	*ptep = __pte(0);
1047*4882a593Smuzhiyun 	return pgtable;
1048*4882a593Smuzhiyun }
1049*4882a593Smuzhiyun 
radix__pmdp_huge_get_and_clear(struct mm_struct * mm,unsigned long addr,pmd_t * pmdp)1050*4882a593Smuzhiyun pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
1051*4882a593Smuzhiyun 				     unsigned long addr, pmd_t *pmdp)
1052*4882a593Smuzhiyun {
1053*4882a593Smuzhiyun 	pmd_t old_pmd;
1054*4882a593Smuzhiyun 	unsigned long old;
1055*4882a593Smuzhiyun 
1056*4882a593Smuzhiyun 	old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
1057*4882a593Smuzhiyun 	old_pmd = __pmd(old);
1058*4882a593Smuzhiyun 	return old_pmd;
1059*4882a593Smuzhiyun }
1060*4882a593Smuzhiyun 
1061*4882a593Smuzhiyun #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1062*4882a593Smuzhiyun 
radix__ptep_set_access_flags(struct vm_area_struct * vma,pte_t * ptep,pte_t entry,unsigned long address,int psize)1063*4882a593Smuzhiyun void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
1064*4882a593Smuzhiyun 				  pte_t entry, unsigned long address, int psize)
1065*4882a593Smuzhiyun {
1066*4882a593Smuzhiyun 	struct mm_struct *mm = vma->vm_mm;
1067*4882a593Smuzhiyun 	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
1068*4882a593Smuzhiyun 					      _PAGE_RW | _PAGE_EXEC);
1069*4882a593Smuzhiyun 
1070*4882a593Smuzhiyun 	unsigned long change = pte_val(entry) ^ pte_val(*ptep);
1071*4882a593Smuzhiyun 	/*
1072*4882a593Smuzhiyun 	 * To avoid NMMU hang while relaxing access, we need mark
1073*4882a593Smuzhiyun 	 * the pte invalid in between.
1074*4882a593Smuzhiyun 	 */
1075*4882a593Smuzhiyun 	if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) {
1076*4882a593Smuzhiyun 		unsigned long old_pte, new_pte;
1077*4882a593Smuzhiyun 
1078*4882a593Smuzhiyun 		old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
1079*4882a593Smuzhiyun 		/*
1080*4882a593Smuzhiyun 		 * new value of pte
1081*4882a593Smuzhiyun 		 */
1082*4882a593Smuzhiyun 		new_pte = old_pte | set;
1083*4882a593Smuzhiyun 		radix__flush_tlb_page_psize(mm, address, psize);
1084*4882a593Smuzhiyun 		__radix_pte_update(ptep, _PAGE_INVALID, new_pte);
1085*4882a593Smuzhiyun 	} else {
1086*4882a593Smuzhiyun 		__radix_pte_update(ptep, 0, set);
1087*4882a593Smuzhiyun 		/*
1088*4882a593Smuzhiyun 		 * Book3S does not require a TLB flush when relaxing access
1089*4882a593Smuzhiyun 		 * restrictions when the address space is not attached to a
1090*4882a593Smuzhiyun 		 * NMMU, because the core MMU will reload the pte after taking
1091*4882a593Smuzhiyun 		 * an access fault, which is defined by the architectue.
1092*4882a593Smuzhiyun 		 */
1093*4882a593Smuzhiyun 	}
1094*4882a593Smuzhiyun 	/* See ptesync comment in radix__set_pte_at */
1095*4882a593Smuzhiyun }
1096*4882a593Smuzhiyun 
radix__ptep_modify_prot_commit(struct vm_area_struct * vma,unsigned long addr,pte_t * ptep,pte_t old_pte,pte_t pte)1097*4882a593Smuzhiyun void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
1098*4882a593Smuzhiyun 				    unsigned long addr, pte_t *ptep,
1099*4882a593Smuzhiyun 				    pte_t old_pte, pte_t pte)
1100*4882a593Smuzhiyun {
1101*4882a593Smuzhiyun 	struct mm_struct *mm = vma->vm_mm;
1102*4882a593Smuzhiyun 
1103*4882a593Smuzhiyun 	/*
1104*4882a593Smuzhiyun 	 * To avoid NMMU hang while relaxing access we need to flush the tlb before
1105*4882a593Smuzhiyun 	 * we set the new value. We need to do this only for radix, because hash
1106*4882a593Smuzhiyun 	 * translation does flush when updating the linux pte.
1107*4882a593Smuzhiyun 	 */
1108*4882a593Smuzhiyun 	if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
1109*4882a593Smuzhiyun 	    (atomic_read(&mm->context.copros) > 0))
1110*4882a593Smuzhiyun 		radix__flush_tlb_page(vma, addr);
1111*4882a593Smuzhiyun 
1112*4882a593Smuzhiyun 	set_pte_at(mm, addr, ptep, pte);
1113*4882a593Smuzhiyun }
1114*4882a593Smuzhiyun 
arch_ioremap_pud_supported(void)1115*4882a593Smuzhiyun int __init arch_ioremap_pud_supported(void)
1116*4882a593Smuzhiyun {
1117*4882a593Smuzhiyun 	/* HPT does not cope with large pages in the vmalloc area */
1118*4882a593Smuzhiyun 	return radix_enabled();
1119*4882a593Smuzhiyun }
1120*4882a593Smuzhiyun 
arch_ioremap_pmd_supported(void)1121*4882a593Smuzhiyun int __init arch_ioremap_pmd_supported(void)
1122*4882a593Smuzhiyun {
1123*4882a593Smuzhiyun 	return radix_enabled();
1124*4882a593Smuzhiyun }
1125*4882a593Smuzhiyun 
p4d_free_pud_page(p4d_t * p4d,unsigned long addr)1126*4882a593Smuzhiyun int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1127*4882a593Smuzhiyun {
1128*4882a593Smuzhiyun 	return 0;
1129*4882a593Smuzhiyun }
1130*4882a593Smuzhiyun 
pud_set_huge(pud_t * pud,phys_addr_t addr,pgprot_t prot)1131*4882a593Smuzhiyun int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1132*4882a593Smuzhiyun {
1133*4882a593Smuzhiyun 	pte_t *ptep = (pte_t *)pud;
1134*4882a593Smuzhiyun 	pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
1135*4882a593Smuzhiyun 
1136*4882a593Smuzhiyun 	if (!radix_enabled())
1137*4882a593Smuzhiyun 		return 0;
1138*4882a593Smuzhiyun 
1139*4882a593Smuzhiyun 	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
1140*4882a593Smuzhiyun 
1141*4882a593Smuzhiyun 	return 1;
1142*4882a593Smuzhiyun }
1143*4882a593Smuzhiyun 
pud_clear_huge(pud_t * pud)1144*4882a593Smuzhiyun int pud_clear_huge(pud_t *pud)
1145*4882a593Smuzhiyun {
1146*4882a593Smuzhiyun 	if (pud_is_leaf(*pud)) {
1147*4882a593Smuzhiyun 		pud_clear(pud);
1148*4882a593Smuzhiyun 		return 1;
1149*4882a593Smuzhiyun 	}
1150*4882a593Smuzhiyun 
1151*4882a593Smuzhiyun 	return 0;
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun 
pud_free_pmd_page(pud_t * pud,unsigned long addr)1154*4882a593Smuzhiyun int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1155*4882a593Smuzhiyun {
1156*4882a593Smuzhiyun 	pmd_t *pmd;
1157*4882a593Smuzhiyun 	int i;
1158*4882a593Smuzhiyun 
1159*4882a593Smuzhiyun 	pmd = (pmd_t *)pud_page_vaddr(*pud);
1160*4882a593Smuzhiyun 	pud_clear(pud);
1161*4882a593Smuzhiyun 
1162*4882a593Smuzhiyun 	flush_tlb_kernel_range(addr, addr + PUD_SIZE);
1163*4882a593Smuzhiyun 
1164*4882a593Smuzhiyun 	for (i = 0; i < PTRS_PER_PMD; i++) {
1165*4882a593Smuzhiyun 		if (!pmd_none(pmd[i])) {
1166*4882a593Smuzhiyun 			pte_t *pte;
1167*4882a593Smuzhiyun 			pte = (pte_t *)pmd_page_vaddr(pmd[i]);
1168*4882a593Smuzhiyun 
1169*4882a593Smuzhiyun 			pte_free_kernel(&init_mm, pte);
1170*4882a593Smuzhiyun 		}
1171*4882a593Smuzhiyun 	}
1172*4882a593Smuzhiyun 
1173*4882a593Smuzhiyun 	pmd_free(&init_mm, pmd);
1174*4882a593Smuzhiyun 
1175*4882a593Smuzhiyun 	return 1;
1176*4882a593Smuzhiyun }
1177*4882a593Smuzhiyun 
pmd_set_huge(pmd_t * pmd,phys_addr_t addr,pgprot_t prot)1178*4882a593Smuzhiyun int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1179*4882a593Smuzhiyun {
1180*4882a593Smuzhiyun 	pte_t *ptep = (pte_t *)pmd;
1181*4882a593Smuzhiyun 	pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
1182*4882a593Smuzhiyun 
1183*4882a593Smuzhiyun 	if (!radix_enabled())
1184*4882a593Smuzhiyun 		return 0;
1185*4882a593Smuzhiyun 
1186*4882a593Smuzhiyun 	set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
1187*4882a593Smuzhiyun 
1188*4882a593Smuzhiyun 	return 1;
1189*4882a593Smuzhiyun }
1190*4882a593Smuzhiyun 
pmd_clear_huge(pmd_t * pmd)1191*4882a593Smuzhiyun int pmd_clear_huge(pmd_t *pmd)
1192*4882a593Smuzhiyun {
1193*4882a593Smuzhiyun 	if (pmd_is_leaf(*pmd)) {
1194*4882a593Smuzhiyun 		pmd_clear(pmd);
1195*4882a593Smuzhiyun 		return 1;
1196*4882a593Smuzhiyun 	}
1197*4882a593Smuzhiyun 
1198*4882a593Smuzhiyun 	return 0;
1199*4882a593Smuzhiyun }
1200*4882a593Smuzhiyun 
pmd_free_pte_page(pmd_t * pmd,unsigned long addr)1201*4882a593Smuzhiyun int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1202*4882a593Smuzhiyun {
1203*4882a593Smuzhiyun 	pte_t *pte;
1204*4882a593Smuzhiyun 
1205*4882a593Smuzhiyun 	pte = (pte_t *)pmd_page_vaddr(*pmd);
1206*4882a593Smuzhiyun 	pmd_clear(pmd);
1207*4882a593Smuzhiyun 
1208*4882a593Smuzhiyun 	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
1209*4882a593Smuzhiyun 
1210*4882a593Smuzhiyun 	pte_free_kernel(&init_mm, pte);
1211*4882a593Smuzhiyun 
1212*4882a593Smuzhiyun 	return 1;
1213*4882a593Smuzhiyun }
1214*4882a593Smuzhiyun 
arch_ioremap_p4d_supported(void)1215*4882a593Smuzhiyun int __init arch_ioremap_p4d_supported(void)
1216*4882a593Smuzhiyun {
1217*4882a593Smuzhiyun 	return 0;
1218*4882a593Smuzhiyun }
1219