1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Virtual Memory Map support
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * (C) 2007 sgi. Christoph Lameter.
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
8*4882a593Smuzhiyun * virt_to_page, page_address() to be implemented as a base offset
9*4882a593Smuzhiyun * calculation without memory access.
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * However, virtual mappings need a page table and TLBs. Many Linux
12*4882a593Smuzhiyun * architectures already map their physical space using 1-1 mappings
13*4882a593Smuzhiyun * via TLBs. For those arches the virtual memory map is essentially
14*4882a593Smuzhiyun * for free if we use the same page size as the 1-1 mappings. In that
15*4882a593Smuzhiyun * case the overhead consists of a few additional pages that are
16*4882a593Smuzhiyun * allocated to create a view of memory for vmemmap.
17*4882a593Smuzhiyun *
18*4882a593Smuzhiyun * The architecture is expected to provide a vmemmap_populate() function
19*4882a593Smuzhiyun * to instantiate the mapping.
20*4882a593Smuzhiyun */
21*4882a593Smuzhiyun #include <linux/mm.h>
22*4882a593Smuzhiyun #include <linux/mmzone.h>
23*4882a593Smuzhiyun #include <linux/memblock.h>
24*4882a593Smuzhiyun #include <linux/memremap.h>
25*4882a593Smuzhiyun #include <linux/highmem.h>
26*4882a593Smuzhiyun #include <linux/slab.h>
27*4882a593Smuzhiyun #include <linux/spinlock.h>
28*4882a593Smuzhiyun #include <linux/vmalloc.h>
29*4882a593Smuzhiyun #include <linux/sched.h>
30*4882a593Smuzhiyun #include <asm/dma.h>
31*4882a593Smuzhiyun #include <asm/pgalloc.h>
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun /*
34*4882a593Smuzhiyun * Allocate a block of memory to be used to back the virtual memory map
35*4882a593Smuzhiyun * or to back the page tables that are used to create the mapping.
36*4882a593Smuzhiyun * Uses the main allocators if they are available, else bootmem.
37*4882a593Smuzhiyun */
38*4882a593Smuzhiyun
__earlyonly_bootmem_alloc(int node,unsigned long size,unsigned long align,unsigned long goal)39*4882a593Smuzhiyun static void * __ref __earlyonly_bootmem_alloc(int node,
40*4882a593Smuzhiyun unsigned long size,
41*4882a593Smuzhiyun unsigned long align,
42*4882a593Smuzhiyun unsigned long goal)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun return memblock_alloc_try_nid_raw(size, align, goal,
45*4882a593Smuzhiyun MEMBLOCK_ALLOC_ACCESSIBLE, node);
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun
vmemmap_alloc_block(unsigned long size,int node)48*4882a593Smuzhiyun void * __meminit vmemmap_alloc_block(unsigned long size, int node)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun /* If the main allocator is up use that, fallback to bootmem. */
51*4882a593Smuzhiyun if (slab_is_available()) {
52*4882a593Smuzhiyun gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
53*4882a593Smuzhiyun int order = get_order(size);
54*4882a593Smuzhiyun static bool warned;
55*4882a593Smuzhiyun struct page *page;
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun page = alloc_pages_node(node, gfp_mask, order);
58*4882a593Smuzhiyun if (page)
59*4882a593Smuzhiyun return page_address(page);
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun if (!warned) {
62*4882a593Smuzhiyun warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
63*4882a593Smuzhiyun "vmemmap alloc failure: order:%u", order);
64*4882a593Smuzhiyun warned = true;
65*4882a593Smuzhiyun }
66*4882a593Smuzhiyun return NULL;
67*4882a593Smuzhiyun } else
68*4882a593Smuzhiyun return __earlyonly_bootmem_alloc(node, size, size,
69*4882a593Smuzhiyun __pa(MAX_DMA_ADDRESS));
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun static void * __meminit altmap_alloc_block_buf(unsigned long size,
73*4882a593Smuzhiyun struct vmem_altmap *altmap);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun /* need to make sure size is all the same during early stage */
vmemmap_alloc_block_buf(unsigned long size,int node,struct vmem_altmap * altmap)76*4882a593Smuzhiyun void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
77*4882a593Smuzhiyun struct vmem_altmap *altmap)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun void *ptr;
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun if (altmap)
82*4882a593Smuzhiyun return altmap_alloc_block_buf(size, altmap);
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun ptr = sparse_buffer_alloc(size);
85*4882a593Smuzhiyun if (!ptr)
86*4882a593Smuzhiyun ptr = vmemmap_alloc_block(size, node);
87*4882a593Smuzhiyun return ptr;
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun
vmem_altmap_next_pfn(struct vmem_altmap * altmap)90*4882a593Smuzhiyun static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun return altmap->base_pfn + altmap->reserve + altmap->alloc
93*4882a593Smuzhiyun + altmap->align;
94*4882a593Smuzhiyun }
95*4882a593Smuzhiyun
vmem_altmap_nr_free(struct vmem_altmap * altmap)96*4882a593Smuzhiyun static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun unsigned long allocated = altmap->alloc + altmap->align;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun if (altmap->free > allocated)
101*4882a593Smuzhiyun return altmap->free - allocated;
102*4882a593Smuzhiyun return 0;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun
altmap_alloc_block_buf(unsigned long size,struct vmem_altmap * altmap)105*4882a593Smuzhiyun static void * __meminit altmap_alloc_block_buf(unsigned long size,
106*4882a593Smuzhiyun struct vmem_altmap *altmap)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun unsigned long pfn, nr_pfns, nr_align;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun if (size & ~PAGE_MASK) {
111*4882a593Smuzhiyun pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
112*4882a593Smuzhiyun __func__, size);
113*4882a593Smuzhiyun return NULL;
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun pfn = vmem_altmap_next_pfn(altmap);
117*4882a593Smuzhiyun nr_pfns = size >> PAGE_SHIFT;
118*4882a593Smuzhiyun nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
119*4882a593Smuzhiyun nr_align = ALIGN(pfn, nr_align) - pfn;
120*4882a593Smuzhiyun if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
121*4882a593Smuzhiyun return NULL;
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun altmap->alloc += nr_pfns;
124*4882a593Smuzhiyun altmap->align += nr_align;
125*4882a593Smuzhiyun pfn += nr_align;
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
128*4882a593Smuzhiyun __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
129*4882a593Smuzhiyun return __va(__pfn_to_phys(pfn));
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun
vmemmap_verify(pte_t * pte,int node,unsigned long start,unsigned long end)132*4882a593Smuzhiyun void __meminit vmemmap_verify(pte_t *pte, int node,
133*4882a593Smuzhiyun unsigned long start, unsigned long end)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun unsigned long pfn = pte_pfn(*pte);
136*4882a593Smuzhiyun int actual_node = early_pfn_to_nid(pfn);
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun if (node_distance(actual_node, node) > LOCAL_DISTANCE)
139*4882a593Smuzhiyun pr_warn("[%lx-%lx] potential offnode page_structs\n",
140*4882a593Smuzhiyun start, end - 1);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun
vmemmap_pte_populate(pmd_t * pmd,unsigned long addr,int node,struct vmem_altmap * altmap)143*4882a593Smuzhiyun pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
144*4882a593Smuzhiyun struct vmem_altmap *altmap)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun pte_t *pte = pte_offset_kernel(pmd, addr);
147*4882a593Smuzhiyun if (pte_none(*pte)) {
148*4882a593Smuzhiyun pte_t entry;
149*4882a593Smuzhiyun void *p;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
152*4882a593Smuzhiyun if (!p)
153*4882a593Smuzhiyun return NULL;
154*4882a593Smuzhiyun entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
155*4882a593Smuzhiyun set_pte_at(&init_mm, addr, pte, entry);
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun return pte;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
vmemmap_alloc_block_zero(unsigned long size,int node)160*4882a593Smuzhiyun static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun void *p = vmemmap_alloc_block(size, node);
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun if (!p)
165*4882a593Smuzhiyun return NULL;
166*4882a593Smuzhiyun memset(p, 0, size);
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun return p;
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun
vmemmap_pmd_populate(pud_t * pud,unsigned long addr,int node)171*4882a593Smuzhiyun pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun pmd_t *pmd = pmd_offset(pud, addr);
174*4882a593Smuzhiyun if (pmd_none(*pmd)) {
175*4882a593Smuzhiyun void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
176*4882a593Smuzhiyun if (!p)
177*4882a593Smuzhiyun return NULL;
178*4882a593Smuzhiyun pmd_populate_kernel(&init_mm, pmd, p);
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun return pmd;
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun
vmemmap_pud_populate(p4d_t * p4d,unsigned long addr,int node)183*4882a593Smuzhiyun pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun pud_t *pud = pud_offset(p4d, addr);
186*4882a593Smuzhiyun if (pud_none(*pud)) {
187*4882a593Smuzhiyun void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
188*4882a593Smuzhiyun if (!p)
189*4882a593Smuzhiyun return NULL;
190*4882a593Smuzhiyun pud_populate(&init_mm, pud, p);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun return pud;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
vmemmap_p4d_populate(pgd_t * pgd,unsigned long addr,int node)195*4882a593Smuzhiyun p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
196*4882a593Smuzhiyun {
197*4882a593Smuzhiyun p4d_t *p4d = p4d_offset(pgd, addr);
198*4882a593Smuzhiyun if (p4d_none(*p4d)) {
199*4882a593Smuzhiyun void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
200*4882a593Smuzhiyun if (!p)
201*4882a593Smuzhiyun return NULL;
202*4882a593Smuzhiyun p4d_populate(&init_mm, p4d, p);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun return p4d;
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun
vmemmap_pgd_populate(unsigned long addr,int node)207*4882a593Smuzhiyun pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun pgd_t *pgd = pgd_offset_k(addr);
210*4882a593Smuzhiyun if (pgd_none(*pgd)) {
211*4882a593Smuzhiyun void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
212*4882a593Smuzhiyun if (!p)
213*4882a593Smuzhiyun return NULL;
214*4882a593Smuzhiyun pgd_populate(&init_mm, pgd, p);
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun return pgd;
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun
vmemmap_populate_basepages(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)219*4882a593Smuzhiyun int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
220*4882a593Smuzhiyun int node, struct vmem_altmap *altmap)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun unsigned long addr = start;
223*4882a593Smuzhiyun pgd_t *pgd;
224*4882a593Smuzhiyun p4d_t *p4d;
225*4882a593Smuzhiyun pud_t *pud;
226*4882a593Smuzhiyun pmd_t *pmd;
227*4882a593Smuzhiyun pte_t *pte;
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun for (; addr < end; addr += PAGE_SIZE) {
230*4882a593Smuzhiyun pgd = vmemmap_pgd_populate(addr, node);
231*4882a593Smuzhiyun if (!pgd)
232*4882a593Smuzhiyun return -ENOMEM;
233*4882a593Smuzhiyun p4d = vmemmap_p4d_populate(pgd, addr, node);
234*4882a593Smuzhiyun if (!p4d)
235*4882a593Smuzhiyun return -ENOMEM;
236*4882a593Smuzhiyun pud = vmemmap_pud_populate(p4d, addr, node);
237*4882a593Smuzhiyun if (!pud)
238*4882a593Smuzhiyun return -ENOMEM;
239*4882a593Smuzhiyun pmd = vmemmap_pmd_populate(pud, addr, node);
240*4882a593Smuzhiyun if (!pmd)
241*4882a593Smuzhiyun return -ENOMEM;
242*4882a593Smuzhiyun pte = vmemmap_pte_populate(pmd, addr, node, altmap);
243*4882a593Smuzhiyun if (!pte)
244*4882a593Smuzhiyun return -ENOMEM;
245*4882a593Smuzhiyun vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
246*4882a593Smuzhiyun }
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun return 0;
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun
__populate_section_memmap(unsigned long pfn,unsigned long nr_pages,int nid,struct vmem_altmap * altmap)251*4882a593Smuzhiyun struct page * __meminit __populate_section_memmap(unsigned long pfn,
252*4882a593Smuzhiyun unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun unsigned long start = (unsigned long) pfn_to_page(pfn);
255*4882a593Smuzhiyun unsigned long end = start + nr_pages * sizeof(struct page);
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
258*4882a593Smuzhiyun !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
259*4882a593Smuzhiyun return NULL;
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun if (vmemmap_populate(start, end, nid, altmap))
262*4882a593Smuzhiyun return NULL;
263*4882a593Smuzhiyun
264*4882a593Smuzhiyun return pfn_to_page(pfn);
265*4882a593Smuzhiyun }
266