1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright IBM Corp. 2006
4*4882a593Smuzhiyun * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
5*4882a593Smuzhiyun */
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <linux/memblock.h>
8*4882a593Smuzhiyun #include <linux/pfn.h>
9*4882a593Smuzhiyun #include <linux/mm.h>
10*4882a593Smuzhiyun #include <linux/init.h>
11*4882a593Smuzhiyun #include <linux/list.h>
12*4882a593Smuzhiyun #include <linux/hugetlb.h>
13*4882a593Smuzhiyun #include <linux/slab.h>
14*4882a593Smuzhiyun #include <asm/cacheflush.h>
15*4882a593Smuzhiyun #include <asm/pgalloc.h>
16*4882a593Smuzhiyun #include <asm/setup.h>
17*4882a593Smuzhiyun #include <asm/tlbflush.h>
18*4882a593Smuzhiyun #include <asm/sections.h>
19*4882a593Smuzhiyun #include <asm/set_memory.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun static DEFINE_MUTEX(vmem_mutex);
22*4882a593Smuzhiyun
vmem_alloc_pages(unsigned int order)23*4882a593Smuzhiyun static void __ref *vmem_alloc_pages(unsigned int order)
24*4882a593Smuzhiyun {
25*4882a593Smuzhiyun unsigned long size = PAGE_SIZE << order;
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun if (slab_is_available())
28*4882a593Smuzhiyun return (void *)__get_free_pages(GFP_KERNEL, order);
29*4882a593Smuzhiyun return (void *) memblock_phys_alloc(size, size);
30*4882a593Smuzhiyun }
31*4882a593Smuzhiyun
vmem_free_pages(unsigned long addr,int order)32*4882a593Smuzhiyun static void vmem_free_pages(unsigned long addr, int order)
33*4882a593Smuzhiyun {
34*4882a593Smuzhiyun /* We don't expect boot memory to be removed ever. */
35*4882a593Smuzhiyun if (!slab_is_available() ||
36*4882a593Smuzhiyun WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
37*4882a593Smuzhiyun return;
38*4882a593Smuzhiyun free_pages(addr, order);
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun
vmem_crst_alloc(unsigned long val)41*4882a593Smuzhiyun void *vmem_crst_alloc(unsigned long val)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun unsigned long *table;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun table = vmem_alloc_pages(CRST_ALLOC_ORDER);
46*4882a593Smuzhiyun if (table)
47*4882a593Smuzhiyun crst_table_init(table, val);
48*4882a593Smuzhiyun return table;
49*4882a593Smuzhiyun }
50*4882a593Smuzhiyun
vmem_pte_alloc(void)51*4882a593Smuzhiyun pte_t __ref *vmem_pte_alloc(void)
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
54*4882a593Smuzhiyun pte_t *pte;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun if (slab_is_available())
57*4882a593Smuzhiyun pte = (pte_t *) page_table_alloc(&init_mm);
58*4882a593Smuzhiyun else
59*4882a593Smuzhiyun pte = (pte_t *) memblock_phys_alloc(size, size);
60*4882a593Smuzhiyun if (!pte)
61*4882a593Smuzhiyun return NULL;
62*4882a593Smuzhiyun memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
63*4882a593Smuzhiyun return pte;
64*4882a593Smuzhiyun }
65*4882a593Smuzhiyun
vmem_pte_free(unsigned long * table)66*4882a593Smuzhiyun static void vmem_pte_free(unsigned long *table)
67*4882a593Smuzhiyun {
68*4882a593Smuzhiyun /* We don't expect boot memory to be removed ever. */
69*4882a593Smuzhiyun if (!slab_is_available() ||
70*4882a593Smuzhiyun WARN_ON_ONCE(PageReserved(virt_to_page(table))))
71*4882a593Smuzhiyun return;
72*4882a593Smuzhiyun page_table_free(&init_mm, table);
73*4882a593Smuzhiyun }
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun #define PAGE_UNUSED 0xFD
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun /*
78*4882a593Smuzhiyun * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
79*4882a593Smuzhiyun * from unused_pmd_start to next PMD_SIZE boundary.
80*4882a593Smuzhiyun */
81*4882a593Smuzhiyun static unsigned long unused_pmd_start;
82*4882a593Smuzhiyun
vmemmap_flush_unused_pmd(void)83*4882a593Smuzhiyun static void vmemmap_flush_unused_pmd(void)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun if (!unused_pmd_start)
86*4882a593Smuzhiyun return;
87*4882a593Smuzhiyun memset(__va(unused_pmd_start), PAGE_UNUSED,
88*4882a593Smuzhiyun ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
89*4882a593Smuzhiyun unused_pmd_start = 0;
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun
__vmemmap_use_sub_pmd(unsigned long start,unsigned long end)92*4882a593Smuzhiyun static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun /*
95*4882a593Smuzhiyun * As we expect to add in the same granularity as we remove, it's
96*4882a593Smuzhiyun * sufficient to mark only some piece used to block the memmap page from
97*4882a593Smuzhiyun * getting removed (just in case the memmap never gets initialized,
98*4882a593Smuzhiyun * e.g., because the memory block never gets onlined).
99*4882a593Smuzhiyun */
100*4882a593Smuzhiyun memset(__va(start), 0, sizeof(struct page));
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun
vmemmap_use_sub_pmd(unsigned long start,unsigned long end)103*4882a593Smuzhiyun static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun /*
106*4882a593Smuzhiyun * We only optimize if the new used range directly follows the
107*4882a593Smuzhiyun * previously unused range (esp., when populating consecutive sections).
108*4882a593Smuzhiyun */
109*4882a593Smuzhiyun if (unused_pmd_start == start) {
110*4882a593Smuzhiyun unused_pmd_start = end;
111*4882a593Smuzhiyun if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
112*4882a593Smuzhiyun unused_pmd_start = 0;
113*4882a593Smuzhiyun return;
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun vmemmap_flush_unused_pmd();
116*4882a593Smuzhiyun __vmemmap_use_sub_pmd(start, end);
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
vmemmap_use_new_sub_pmd(unsigned long start,unsigned long end)119*4882a593Smuzhiyun static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun vmemmap_flush_unused_pmd();
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun /* Could be our memmap page is filled with PAGE_UNUSED already ... */
126*4882a593Smuzhiyun __vmemmap_use_sub_pmd(start, end);
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
129*4882a593Smuzhiyun if (!IS_ALIGNED(start, PMD_SIZE))
130*4882a593Smuzhiyun memset(page, PAGE_UNUSED, start - __pa(page));
131*4882a593Smuzhiyun /*
132*4882a593Smuzhiyun * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
133*4882a593Smuzhiyun * consecutive sections. Remember for the last added PMD the last
134*4882a593Smuzhiyun * unused range in the populated PMD.
135*4882a593Smuzhiyun */
136*4882a593Smuzhiyun if (!IS_ALIGNED(end, PMD_SIZE))
137*4882a593Smuzhiyun unused_pmd_start = end;
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun /* Returns true if the PMD is completely unused and can be freed. */
vmemmap_unuse_sub_pmd(unsigned long start,unsigned long end)141*4882a593Smuzhiyun static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun vmemmap_flush_unused_pmd();
146*4882a593Smuzhiyun memset(__va(start), PAGE_UNUSED, end - start);
147*4882a593Smuzhiyun return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
148*4882a593Smuzhiyun }
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
modify_pte_table(pmd_t * pmd,unsigned long addr,unsigned long end,bool add,bool direct)151*4882a593Smuzhiyun static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
152*4882a593Smuzhiyun unsigned long end, bool add, bool direct)
153*4882a593Smuzhiyun {
154*4882a593Smuzhiyun unsigned long prot, pages = 0;
155*4882a593Smuzhiyun int ret = -ENOMEM;
156*4882a593Smuzhiyun pte_t *pte;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun prot = pgprot_val(PAGE_KERNEL);
159*4882a593Smuzhiyun if (!MACHINE_HAS_NX)
160*4882a593Smuzhiyun prot &= ~_PAGE_NOEXEC;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun pte = pte_offset_kernel(pmd, addr);
163*4882a593Smuzhiyun for (; addr < end; addr += PAGE_SIZE, pte++) {
164*4882a593Smuzhiyun if (!add) {
165*4882a593Smuzhiyun if (pte_none(*pte))
166*4882a593Smuzhiyun continue;
167*4882a593Smuzhiyun if (!direct)
168*4882a593Smuzhiyun vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
169*4882a593Smuzhiyun pte_clear(&init_mm, addr, pte);
170*4882a593Smuzhiyun } else if (pte_none(*pte)) {
171*4882a593Smuzhiyun if (!direct) {
172*4882a593Smuzhiyun void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun if (!new_page)
175*4882a593Smuzhiyun goto out;
176*4882a593Smuzhiyun pte_val(*pte) = __pa(new_page) | prot;
177*4882a593Smuzhiyun } else {
178*4882a593Smuzhiyun pte_val(*pte) = addr | prot;
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun } else {
181*4882a593Smuzhiyun continue;
182*4882a593Smuzhiyun }
183*4882a593Smuzhiyun pages++;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun ret = 0;
186*4882a593Smuzhiyun out:
187*4882a593Smuzhiyun if (direct)
188*4882a593Smuzhiyun update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
189*4882a593Smuzhiyun return ret;
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun
try_free_pte_table(pmd_t * pmd,unsigned long start)192*4882a593Smuzhiyun static void try_free_pte_table(pmd_t *pmd, unsigned long start)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun pte_t *pte;
195*4882a593Smuzhiyun int i;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
198*4882a593Smuzhiyun pte = pte_offset_kernel(pmd, start);
199*4882a593Smuzhiyun for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
200*4882a593Smuzhiyun if (!pte_none(*pte))
201*4882a593Smuzhiyun return;
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun vmem_pte_free(__va(pmd_deref(*pmd)));
204*4882a593Smuzhiyun pmd_clear(pmd);
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun
207*4882a593Smuzhiyun /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
modify_pmd_table(pud_t * pud,unsigned long addr,unsigned long end,bool add,bool direct)208*4882a593Smuzhiyun static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
209*4882a593Smuzhiyun unsigned long end, bool add, bool direct)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun unsigned long next, prot, pages = 0;
212*4882a593Smuzhiyun int ret = -ENOMEM;
213*4882a593Smuzhiyun pmd_t *pmd;
214*4882a593Smuzhiyun pte_t *pte;
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun prot = pgprot_val(SEGMENT_KERNEL);
217*4882a593Smuzhiyun if (!MACHINE_HAS_NX)
218*4882a593Smuzhiyun prot &= ~_SEGMENT_ENTRY_NOEXEC;
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun pmd = pmd_offset(pud, addr);
221*4882a593Smuzhiyun for (; addr < end; addr = next, pmd++) {
222*4882a593Smuzhiyun next = pmd_addr_end(addr, end);
223*4882a593Smuzhiyun if (!add) {
224*4882a593Smuzhiyun if (pmd_none(*pmd))
225*4882a593Smuzhiyun continue;
226*4882a593Smuzhiyun if (pmd_large(*pmd) && !add) {
227*4882a593Smuzhiyun if (IS_ALIGNED(addr, PMD_SIZE) &&
228*4882a593Smuzhiyun IS_ALIGNED(next, PMD_SIZE)) {
229*4882a593Smuzhiyun if (!direct)
230*4882a593Smuzhiyun vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
231*4882a593Smuzhiyun pmd_clear(pmd);
232*4882a593Smuzhiyun pages++;
233*4882a593Smuzhiyun } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
234*4882a593Smuzhiyun vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
235*4882a593Smuzhiyun pmd_clear(pmd);
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun continue;
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun } else if (pmd_none(*pmd)) {
240*4882a593Smuzhiyun if (IS_ALIGNED(addr, PMD_SIZE) &&
241*4882a593Smuzhiyun IS_ALIGNED(next, PMD_SIZE) &&
242*4882a593Smuzhiyun MACHINE_HAS_EDAT1 && addr && direct &&
243*4882a593Smuzhiyun !debug_pagealloc_enabled()) {
244*4882a593Smuzhiyun pmd_val(*pmd) = addr | prot;
245*4882a593Smuzhiyun pages++;
246*4882a593Smuzhiyun continue;
247*4882a593Smuzhiyun } else if (!direct && MACHINE_HAS_EDAT1) {
248*4882a593Smuzhiyun void *new_page;
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun /*
251*4882a593Smuzhiyun * Use 1MB frames for vmemmap if available. We
252*4882a593Smuzhiyun * always use large frames even if they are only
253*4882a593Smuzhiyun * partially used. Otherwise we would have also
254*4882a593Smuzhiyun * page tables since vmemmap_populate gets
255*4882a593Smuzhiyun * called for each section separately.
256*4882a593Smuzhiyun */
257*4882a593Smuzhiyun new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
258*4882a593Smuzhiyun if (new_page) {
259*4882a593Smuzhiyun pmd_val(*pmd) = __pa(new_page) | prot;
260*4882a593Smuzhiyun if (!IS_ALIGNED(addr, PMD_SIZE) ||
261*4882a593Smuzhiyun !IS_ALIGNED(next, PMD_SIZE)) {
262*4882a593Smuzhiyun vmemmap_use_new_sub_pmd(addr, next);
263*4882a593Smuzhiyun }
264*4882a593Smuzhiyun continue;
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun }
267*4882a593Smuzhiyun pte = vmem_pte_alloc();
268*4882a593Smuzhiyun if (!pte)
269*4882a593Smuzhiyun goto out;
270*4882a593Smuzhiyun pmd_populate(&init_mm, pmd, pte);
271*4882a593Smuzhiyun } else if (pmd_large(*pmd)) {
272*4882a593Smuzhiyun if (!direct)
273*4882a593Smuzhiyun vmemmap_use_sub_pmd(addr, next);
274*4882a593Smuzhiyun continue;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun ret = modify_pte_table(pmd, addr, next, add, direct);
277*4882a593Smuzhiyun if (ret)
278*4882a593Smuzhiyun goto out;
279*4882a593Smuzhiyun if (!add)
280*4882a593Smuzhiyun try_free_pte_table(pmd, addr & PMD_MASK);
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun ret = 0;
283*4882a593Smuzhiyun out:
284*4882a593Smuzhiyun if (direct)
285*4882a593Smuzhiyun update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
286*4882a593Smuzhiyun return ret;
287*4882a593Smuzhiyun }
288*4882a593Smuzhiyun
try_free_pmd_table(pud_t * pud,unsigned long start)289*4882a593Smuzhiyun static void try_free_pmd_table(pud_t *pud, unsigned long start)
290*4882a593Smuzhiyun {
291*4882a593Smuzhiyun const unsigned long end = start + PUD_SIZE;
292*4882a593Smuzhiyun pmd_t *pmd;
293*4882a593Smuzhiyun int i;
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
296*4882a593Smuzhiyun if (end > VMALLOC_START)
297*4882a593Smuzhiyun return;
298*4882a593Smuzhiyun #ifdef CONFIG_KASAN
299*4882a593Smuzhiyun if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
300*4882a593Smuzhiyun return;
301*4882a593Smuzhiyun #endif
302*4882a593Smuzhiyun pmd = pmd_offset(pud, start);
303*4882a593Smuzhiyun for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
304*4882a593Smuzhiyun if (!pmd_none(*pmd))
305*4882a593Smuzhiyun return;
306*4882a593Smuzhiyun vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
307*4882a593Smuzhiyun pud_clear(pud);
308*4882a593Smuzhiyun }
309*4882a593Smuzhiyun
modify_pud_table(p4d_t * p4d,unsigned long addr,unsigned long end,bool add,bool direct)310*4882a593Smuzhiyun static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
311*4882a593Smuzhiyun bool add, bool direct)
312*4882a593Smuzhiyun {
313*4882a593Smuzhiyun unsigned long next, prot, pages = 0;
314*4882a593Smuzhiyun int ret = -ENOMEM;
315*4882a593Smuzhiyun pud_t *pud;
316*4882a593Smuzhiyun pmd_t *pmd;
317*4882a593Smuzhiyun
318*4882a593Smuzhiyun prot = pgprot_val(REGION3_KERNEL);
319*4882a593Smuzhiyun if (!MACHINE_HAS_NX)
320*4882a593Smuzhiyun prot &= ~_REGION_ENTRY_NOEXEC;
321*4882a593Smuzhiyun pud = pud_offset(p4d, addr);
322*4882a593Smuzhiyun for (; addr < end; addr = next, pud++) {
323*4882a593Smuzhiyun next = pud_addr_end(addr, end);
324*4882a593Smuzhiyun if (!add) {
325*4882a593Smuzhiyun if (pud_none(*pud))
326*4882a593Smuzhiyun continue;
327*4882a593Smuzhiyun if (pud_large(*pud)) {
328*4882a593Smuzhiyun if (IS_ALIGNED(addr, PUD_SIZE) &&
329*4882a593Smuzhiyun IS_ALIGNED(next, PUD_SIZE)) {
330*4882a593Smuzhiyun pud_clear(pud);
331*4882a593Smuzhiyun pages++;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun continue;
334*4882a593Smuzhiyun }
335*4882a593Smuzhiyun } else if (pud_none(*pud)) {
336*4882a593Smuzhiyun if (IS_ALIGNED(addr, PUD_SIZE) &&
337*4882a593Smuzhiyun IS_ALIGNED(next, PUD_SIZE) &&
338*4882a593Smuzhiyun MACHINE_HAS_EDAT2 && addr && direct &&
339*4882a593Smuzhiyun !debug_pagealloc_enabled()) {
340*4882a593Smuzhiyun pud_val(*pud) = addr | prot;
341*4882a593Smuzhiyun pages++;
342*4882a593Smuzhiyun continue;
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
345*4882a593Smuzhiyun if (!pmd)
346*4882a593Smuzhiyun goto out;
347*4882a593Smuzhiyun pud_populate(&init_mm, pud, pmd);
348*4882a593Smuzhiyun } else if (pud_large(*pud)) {
349*4882a593Smuzhiyun continue;
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun ret = modify_pmd_table(pud, addr, next, add, direct);
352*4882a593Smuzhiyun if (ret)
353*4882a593Smuzhiyun goto out;
354*4882a593Smuzhiyun if (!add)
355*4882a593Smuzhiyun try_free_pmd_table(pud, addr & PUD_MASK);
356*4882a593Smuzhiyun }
357*4882a593Smuzhiyun ret = 0;
358*4882a593Smuzhiyun out:
359*4882a593Smuzhiyun if (direct)
360*4882a593Smuzhiyun update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
361*4882a593Smuzhiyun return ret;
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun
try_free_pud_table(p4d_t * p4d,unsigned long start)364*4882a593Smuzhiyun static void try_free_pud_table(p4d_t *p4d, unsigned long start)
365*4882a593Smuzhiyun {
366*4882a593Smuzhiyun const unsigned long end = start + P4D_SIZE;
367*4882a593Smuzhiyun pud_t *pud;
368*4882a593Smuzhiyun int i;
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
371*4882a593Smuzhiyun if (end > VMALLOC_START)
372*4882a593Smuzhiyun return;
373*4882a593Smuzhiyun #ifdef CONFIG_KASAN
374*4882a593Smuzhiyun if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
375*4882a593Smuzhiyun return;
376*4882a593Smuzhiyun #endif
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun pud = pud_offset(p4d, start);
379*4882a593Smuzhiyun for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
380*4882a593Smuzhiyun if (!pud_none(*pud))
381*4882a593Smuzhiyun return;
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
384*4882a593Smuzhiyun p4d_clear(p4d);
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun
modify_p4d_table(pgd_t * pgd,unsigned long addr,unsigned long end,bool add,bool direct)387*4882a593Smuzhiyun static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
388*4882a593Smuzhiyun bool add, bool direct)
389*4882a593Smuzhiyun {
390*4882a593Smuzhiyun unsigned long next;
391*4882a593Smuzhiyun int ret = -ENOMEM;
392*4882a593Smuzhiyun p4d_t *p4d;
393*4882a593Smuzhiyun pud_t *pud;
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun p4d = p4d_offset(pgd, addr);
396*4882a593Smuzhiyun for (; addr < end; addr = next, p4d++) {
397*4882a593Smuzhiyun next = p4d_addr_end(addr, end);
398*4882a593Smuzhiyun if (!add) {
399*4882a593Smuzhiyun if (p4d_none(*p4d))
400*4882a593Smuzhiyun continue;
401*4882a593Smuzhiyun } else if (p4d_none(*p4d)) {
402*4882a593Smuzhiyun pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
403*4882a593Smuzhiyun if (!pud)
404*4882a593Smuzhiyun goto out;
405*4882a593Smuzhiyun p4d_populate(&init_mm, p4d, pud);
406*4882a593Smuzhiyun }
407*4882a593Smuzhiyun ret = modify_pud_table(p4d, addr, next, add, direct);
408*4882a593Smuzhiyun if (ret)
409*4882a593Smuzhiyun goto out;
410*4882a593Smuzhiyun if (!add)
411*4882a593Smuzhiyun try_free_pud_table(p4d, addr & P4D_MASK);
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun ret = 0;
414*4882a593Smuzhiyun out:
415*4882a593Smuzhiyun return ret;
416*4882a593Smuzhiyun }
417*4882a593Smuzhiyun
try_free_p4d_table(pgd_t * pgd,unsigned long start)418*4882a593Smuzhiyun static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
419*4882a593Smuzhiyun {
420*4882a593Smuzhiyun const unsigned long end = start + PGDIR_SIZE;
421*4882a593Smuzhiyun p4d_t *p4d;
422*4882a593Smuzhiyun int i;
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
425*4882a593Smuzhiyun if (end > VMALLOC_START)
426*4882a593Smuzhiyun return;
427*4882a593Smuzhiyun #ifdef CONFIG_KASAN
428*4882a593Smuzhiyun if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
429*4882a593Smuzhiyun return;
430*4882a593Smuzhiyun #endif
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun p4d = p4d_offset(pgd, start);
433*4882a593Smuzhiyun for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
434*4882a593Smuzhiyun if (!p4d_none(*p4d))
435*4882a593Smuzhiyun return;
436*4882a593Smuzhiyun }
437*4882a593Smuzhiyun vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
438*4882a593Smuzhiyun pgd_clear(pgd);
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun
modify_pagetable(unsigned long start,unsigned long end,bool add,bool direct)441*4882a593Smuzhiyun static int modify_pagetable(unsigned long start, unsigned long end, bool add,
442*4882a593Smuzhiyun bool direct)
443*4882a593Smuzhiyun {
444*4882a593Smuzhiyun unsigned long addr, next;
445*4882a593Smuzhiyun int ret = -ENOMEM;
446*4882a593Smuzhiyun pgd_t *pgd;
447*4882a593Smuzhiyun p4d_t *p4d;
448*4882a593Smuzhiyun
449*4882a593Smuzhiyun if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
450*4882a593Smuzhiyun return -EINVAL;
451*4882a593Smuzhiyun for (addr = start; addr < end; addr = next) {
452*4882a593Smuzhiyun next = pgd_addr_end(addr, end);
453*4882a593Smuzhiyun pgd = pgd_offset_k(addr);
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun if (!add) {
456*4882a593Smuzhiyun if (pgd_none(*pgd))
457*4882a593Smuzhiyun continue;
458*4882a593Smuzhiyun } else if (pgd_none(*pgd)) {
459*4882a593Smuzhiyun p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
460*4882a593Smuzhiyun if (!p4d)
461*4882a593Smuzhiyun goto out;
462*4882a593Smuzhiyun pgd_populate(&init_mm, pgd, p4d);
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun ret = modify_p4d_table(pgd, addr, next, add, direct);
465*4882a593Smuzhiyun if (ret)
466*4882a593Smuzhiyun goto out;
467*4882a593Smuzhiyun if (!add)
468*4882a593Smuzhiyun try_free_p4d_table(pgd, addr & PGDIR_MASK);
469*4882a593Smuzhiyun }
470*4882a593Smuzhiyun ret = 0;
471*4882a593Smuzhiyun out:
472*4882a593Smuzhiyun if (!add)
473*4882a593Smuzhiyun flush_tlb_kernel_range(start, end);
474*4882a593Smuzhiyun return ret;
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun
add_pagetable(unsigned long start,unsigned long end,bool direct)477*4882a593Smuzhiyun static int add_pagetable(unsigned long start, unsigned long end, bool direct)
478*4882a593Smuzhiyun {
479*4882a593Smuzhiyun return modify_pagetable(start, end, true, direct);
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun
remove_pagetable(unsigned long start,unsigned long end,bool direct)482*4882a593Smuzhiyun static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun return modify_pagetable(start, end, false, direct);
485*4882a593Smuzhiyun }
486*4882a593Smuzhiyun
487*4882a593Smuzhiyun /*
488*4882a593Smuzhiyun * Add a physical memory range to the 1:1 mapping.
489*4882a593Smuzhiyun */
vmem_add_range(unsigned long start,unsigned long size)490*4882a593Smuzhiyun static int vmem_add_range(unsigned long start, unsigned long size)
491*4882a593Smuzhiyun {
492*4882a593Smuzhiyun return add_pagetable(start, start + size, true);
493*4882a593Smuzhiyun }
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun /*
496*4882a593Smuzhiyun * Remove a physical memory range from the 1:1 mapping.
497*4882a593Smuzhiyun */
vmem_remove_range(unsigned long start,unsigned long size)498*4882a593Smuzhiyun static void vmem_remove_range(unsigned long start, unsigned long size)
499*4882a593Smuzhiyun {
500*4882a593Smuzhiyun remove_pagetable(start, start + size, true);
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun /*
504*4882a593Smuzhiyun * Add a backed mem_map array to the virtual mem_map array.
505*4882a593Smuzhiyun */
vmemmap_populate(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)506*4882a593Smuzhiyun int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
507*4882a593Smuzhiyun struct vmem_altmap *altmap)
508*4882a593Smuzhiyun {
509*4882a593Smuzhiyun int ret;
510*4882a593Smuzhiyun
511*4882a593Smuzhiyun mutex_lock(&vmem_mutex);
512*4882a593Smuzhiyun /* We don't care about the node, just use NUMA_NO_NODE on allocations */
513*4882a593Smuzhiyun ret = add_pagetable(start, end, false);
514*4882a593Smuzhiyun if (ret)
515*4882a593Smuzhiyun remove_pagetable(start, end, false);
516*4882a593Smuzhiyun mutex_unlock(&vmem_mutex);
517*4882a593Smuzhiyun return ret;
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun
vmemmap_free(unsigned long start,unsigned long end,struct vmem_altmap * altmap)520*4882a593Smuzhiyun void vmemmap_free(unsigned long start, unsigned long end,
521*4882a593Smuzhiyun struct vmem_altmap *altmap)
522*4882a593Smuzhiyun {
523*4882a593Smuzhiyun mutex_lock(&vmem_mutex);
524*4882a593Smuzhiyun remove_pagetable(start, end, false);
525*4882a593Smuzhiyun mutex_unlock(&vmem_mutex);
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun
vmem_remove_mapping(unsigned long start,unsigned long size)528*4882a593Smuzhiyun void vmem_remove_mapping(unsigned long start, unsigned long size)
529*4882a593Smuzhiyun {
530*4882a593Smuzhiyun mutex_lock(&vmem_mutex);
531*4882a593Smuzhiyun vmem_remove_range(start, size);
532*4882a593Smuzhiyun mutex_unlock(&vmem_mutex);
533*4882a593Smuzhiyun }
534*4882a593Smuzhiyun
vmem_add_mapping(unsigned long start,unsigned long size)535*4882a593Smuzhiyun int vmem_add_mapping(unsigned long start, unsigned long size)
536*4882a593Smuzhiyun {
537*4882a593Smuzhiyun int ret;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun if (start + size > VMEM_MAX_PHYS ||
540*4882a593Smuzhiyun start + size < start)
541*4882a593Smuzhiyun return -ERANGE;
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun mutex_lock(&vmem_mutex);
544*4882a593Smuzhiyun ret = vmem_add_range(start, size);
545*4882a593Smuzhiyun if (ret)
546*4882a593Smuzhiyun vmem_remove_range(start, size);
547*4882a593Smuzhiyun mutex_unlock(&vmem_mutex);
548*4882a593Smuzhiyun return ret;
549*4882a593Smuzhiyun }
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun /*
552*4882a593Smuzhiyun * map whole physical memory to virtual memory (identity mapping)
553*4882a593Smuzhiyun * we reserve enough space in the vmalloc area for vmemmap to hotplug
554*4882a593Smuzhiyun * additional memory segments.
555*4882a593Smuzhiyun */
vmem_map_init(void)556*4882a593Smuzhiyun void __init vmem_map_init(void)
557*4882a593Smuzhiyun {
558*4882a593Smuzhiyun phys_addr_t base, end;
559*4882a593Smuzhiyun u64 i;
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun for_each_mem_range(i, &base, &end)
562*4882a593Smuzhiyun vmem_add_range(base, end - base);
563*4882a593Smuzhiyun __set_memory((unsigned long)_stext,
564*4882a593Smuzhiyun (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
565*4882a593Smuzhiyun SET_MEMORY_RO | SET_MEMORY_X);
566*4882a593Smuzhiyun __set_memory((unsigned long)_etext,
567*4882a593Smuzhiyun (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
568*4882a593Smuzhiyun SET_MEMORY_RO);
569*4882a593Smuzhiyun __set_memory((unsigned long)_sinittext,
570*4882a593Smuzhiyun (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
571*4882a593Smuzhiyun SET_MEMORY_RO | SET_MEMORY_X);
572*4882a593Smuzhiyun __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
573*4882a593Smuzhiyun SET_MEMORY_RO | SET_MEMORY_X);
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun /* we need lowcore executable for our LPSWE instructions */
576*4882a593Smuzhiyun set_memory_x(0, 1);
577*4882a593Smuzhiyun
578*4882a593Smuzhiyun pr_info("Write protected kernel read-only data: %luk\n",
579*4882a593Smuzhiyun (unsigned long)(__end_rodata - _stext) >> 10);
580*4882a593Smuzhiyun }
581