xref: /OK3568_Linux_fs/kernel/mm/sparse-vmemmap.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Virtual Memory Map support
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * (C) 2007 sgi. Christoph Lameter.
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
8*4882a593Smuzhiyun  * virt_to_page, page_address() to be implemented as a base offset
9*4882a593Smuzhiyun  * calculation without memory access.
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  * However, virtual mappings need a page table and TLBs. Many Linux
12*4882a593Smuzhiyun  * architectures already map their physical space using 1-1 mappings
13*4882a593Smuzhiyun  * via TLBs. For those arches the virtual memory map is essentially
14*4882a593Smuzhiyun  * for free if we use the same page size as the 1-1 mappings. In that
15*4882a593Smuzhiyun  * case the overhead consists of a few additional pages that are
16*4882a593Smuzhiyun  * allocated to create a view of memory for vmemmap.
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  * The architecture is expected to provide a vmemmap_populate() function
19*4882a593Smuzhiyun  * to instantiate the mapping.
20*4882a593Smuzhiyun  */
21*4882a593Smuzhiyun #include <linux/mm.h>
22*4882a593Smuzhiyun #include <linux/mmzone.h>
23*4882a593Smuzhiyun #include <linux/memblock.h>
24*4882a593Smuzhiyun #include <linux/memremap.h>
25*4882a593Smuzhiyun #include <linux/highmem.h>
26*4882a593Smuzhiyun #include <linux/slab.h>
27*4882a593Smuzhiyun #include <linux/spinlock.h>
28*4882a593Smuzhiyun #include <linux/vmalloc.h>
29*4882a593Smuzhiyun #include <linux/sched.h>
30*4882a593Smuzhiyun #include <asm/dma.h>
31*4882a593Smuzhiyun #include <asm/pgalloc.h>
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun /*
34*4882a593Smuzhiyun  * Allocate a block of memory to be used to back the virtual memory map
35*4882a593Smuzhiyun  * or to back the page tables that are used to create the mapping.
36*4882a593Smuzhiyun  * Uses the main allocators if they are available, else bootmem.
37*4882a593Smuzhiyun  */
38*4882a593Smuzhiyun 
__earlyonly_bootmem_alloc(int node,unsigned long size,unsigned long align,unsigned long goal)39*4882a593Smuzhiyun static void * __ref __earlyonly_bootmem_alloc(int node,
40*4882a593Smuzhiyun 				unsigned long size,
41*4882a593Smuzhiyun 				unsigned long align,
42*4882a593Smuzhiyun 				unsigned long goal)
43*4882a593Smuzhiyun {
44*4882a593Smuzhiyun 	return memblock_alloc_try_nid_raw(size, align, goal,
45*4882a593Smuzhiyun 					       MEMBLOCK_ALLOC_ACCESSIBLE, node);
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun 
vmemmap_alloc_block(unsigned long size,int node)48*4882a593Smuzhiyun void * __meminit vmemmap_alloc_block(unsigned long size, int node)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun 	/* If the main allocator is up use that, fallback to bootmem. */
51*4882a593Smuzhiyun 	if (slab_is_available()) {
52*4882a593Smuzhiyun 		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
53*4882a593Smuzhiyun 		int order = get_order(size);
54*4882a593Smuzhiyun 		static bool warned;
55*4882a593Smuzhiyun 		struct page *page;
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun 		page = alloc_pages_node(node, gfp_mask, order);
58*4882a593Smuzhiyun 		if (page)
59*4882a593Smuzhiyun 			return page_address(page);
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun 		if (!warned) {
62*4882a593Smuzhiyun 			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
63*4882a593Smuzhiyun 				   "vmemmap alloc failure: order:%u", order);
64*4882a593Smuzhiyun 			warned = true;
65*4882a593Smuzhiyun 		}
66*4882a593Smuzhiyun 		return NULL;
67*4882a593Smuzhiyun 	} else
68*4882a593Smuzhiyun 		return __earlyonly_bootmem_alloc(node, size, size,
69*4882a593Smuzhiyun 				__pa(MAX_DMA_ADDRESS));
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun static void * __meminit altmap_alloc_block_buf(unsigned long size,
73*4882a593Smuzhiyun 					       struct vmem_altmap *altmap);
74*4882a593Smuzhiyun 
75*4882a593Smuzhiyun /* need to make sure size is all the same during early stage */
vmemmap_alloc_block_buf(unsigned long size,int node,struct vmem_altmap * altmap)76*4882a593Smuzhiyun void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
77*4882a593Smuzhiyun 					 struct vmem_altmap *altmap)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun 	void *ptr;
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 	if (altmap)
82*4882a593Smuzhiyun 		return altmap_alloc_block_buf(size, altmap);
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 	ptr = sparse_buffer_alloc(size);
85*4882a593Smuzhiyun 	if (!ptr)
86*4882a593Smuzhiyun 		ptr = vmemmap_alloc_block(size, node);
87*4882a593Smuzhiyun 	return ptr;
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun 
vmem_altmap_next_pfn(struct vmem_altmap * altmap)90*4882a593Smuzhiyun static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun 	return altmap->base_pfn + altmap->reserve + altmap->alloc
93*4882a593Smuzhiyun 		+ altmap->align;
94*4882a593Smuzhiyun }
95*4882a593Smuzhiyun 
vmem_altmap_nr_free(struct vmem_altmap * altmap)96*4882a593Smuzhiyun static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun 	unsigned long allocated = altmap->alloc + altmap->align;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	if (altmap->free > allocated)
101*4882a593Smuzhiyun 		return altmap->free - allocated;
102*4882a593Smuzhiyun 	return 0;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun 
altmap_alloc_block_buf(unsigned long size,struct vmem_altmap * altmap)105*4882a593Smuzhiyun static void * __meminit altmap_alloc_block_buf(unsigned long size,
106*4882a593Smuzhiyun 					       struct vmem_altmap *altmap)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	unsigned long pfn, nr_pfns, nr_align;
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	if (size & ~PAGE_MASK) {
111*4882a593Smuzhiyun 		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
112*4882a593Smuzhiyun 				__func__, size);
113*4882a593Smuzhiyun 		return NULL;
114*4882a593Smuzhiyun 	}
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun 	pfn = vmem_altmap_next_pfn(altmap);
117*4882a593Smuzhiyun 	nr_pfns = size >> PAGE_SHIFT;
118*4882a593Smuzhiyun 	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
119*4882a593Smuzhiyun 	nr_align = ALIGN(pfn, nr_align) - pfn;
120*4882a593Smuzhiyun 	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
121*4882a593Smuzhiyun 		return NULL;
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	altmap->alloc += nr_pfns;
124*4882a593Smuzhiyun 	altmap->align += nr_align;
125*4882a593Smuzhiyun 	pfn += nr_align;
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
128*4882a593Smuzhiyun 			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
129*4882a593Smuzhiyun 	return __va(__pfn_to_phys(pfn));
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun 
vmemmap_verify(pte_t * pte,int node,unsigned long start,unsigned long end)132*4882a593Smuzhiyun void __meminit vmemmap_verify(pte_t *pte, int node,
133*4882a593Smuzhiyun 				unsigned long start, unsigned long end)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun 	unsigned long pfn = pte_pfn(*pte);
136*4882a593Smuzhiyun 	int actual_node = early_pfn_to_nid(pfn);
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
139*4882a593Smuzhiyun 		pr_warn("[%lx-%lx] potential offnode page_structs\n",
140*4882a593Smuzhiyun 			start, end - 1);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun 
vmemmap_pte_populate(pmd_t * pmd,unsigned long addr,int node,struct vmem_altmap * altmap)143*4882a593Smuzhiyun pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
144*4882a593Smuzhiyun 				       struct vmem_altmap *altmap)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun 	pte_t *pte = pte_offset_kernel(pmd, addr);
147*4882a593Smuzhiyun 	if (pte_none(*pte)) {
148*4882a593Smuzhiyun 		pte_t entry;
149*4882a593Smuzhiyun 		void *p;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 		p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
152*4882a593Smuzhiyun 		if (!p)
153*4882a593Smuzhiyun 			return NULL;
154*4882a593Smuzhiyun 		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
155*4882a593Smuzhiyun 		set_pte_at(&init_mm, addr, pte, entry);
156*4882a593Smuzhiyun 	}
157*4882a593Smuzhiyun 	return pte;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun 
vmemmap_alloc_block_zero(unsigned long size,int node)160*4882a593Smuzhiyun static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun 	void *p = vmemmap_alloc_block(size, node);
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 	if (!p)
165*4882a593Smuzhiyun 		return NULL;
166*4882a593Smuzhiyun 	memset(p, 0, size);
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	return p;
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun 
vmemmap_pmd_populate(pud_t * pud,unsigned long addr,int node)171*4882a593Smuzhiyun pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun 	pmd_t *pmd = pmd_offset(pud, addr);
174*4882a593Smuzhiyun 	if (pmd_none(*pmd)) {
175*4882a593Smuzhiyun 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
176*4882a593Smuzhiyun 		if (!p)
177*4882a593Smuzhiyun 			return NULL;
178*4882a593Smuzhiyun 		pmd_populate_kernel(&init_mm, pmd, p);
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 	return pmd;
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun 
vmemmap_pud_populate(p4d_t * p4d,unsigned long addr,int node)183*4882a593Smuzhiyun pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
184*4882a593Smuzhiyun {
185*4882a593Smuzhiyun 	pud_t *pud = pud_offset(p4d, addr);
186*4882a593Smuzhiyun 	if (pud_none(*pud)) {
187*4882a593Smuzhiyun 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
188*4882a593Smuzhiyun 		if (!p)
189*4882a593Smuzhiyun 			return NULL;
190*4882a593Smuzhiyun 		pud_populate(&init_mm, pud, p);
191*4882a593Smuzhiyun 	}
192*4882a593Smuzhiyun 	return pud;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun 
vmemmap_p4d_populate(pgd_t * pgd,unsigned long addr,int node)195*4882a593Smuzhiyun p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
196*4882a593Smuzhiyun {
197*4882a593Smuzhiyun 	p4d_t *p4d = p4d_offset(pgd, addr);
198*4882a593Smuzhiyun 	if (p4d_none(*p4d)) {
199*4882a593Smuzhiyun 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
200*4882a593Smuzhiyun 		if (!p)
201*4882a593Smuzhiyun 			return NULL;
202*4882a593Smuzhiyun 		p4d_populate(&init_mm, p4d, p);
203*4882a593Smuzhiyun 	}
204*4882a593Smuzhiyun 	return p4d;
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun 
vmemmap_pgd_populate(unsigned long addr,int node)207*4882a593Smuzhiyun pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun 	pgd_t *pgd = pgd_offset_k(addr);
210*4882a593Smuzhiyun 	if (pgd_none(*pgd)) {
211*4882a593Smuzhiyun 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
212*4882a593Smuzhiyun 		if (!p)
213*4882a593Smuzhiyun 			return NULL;
214*4882a593Smuzhiyun 		pgd_populate(&init_mm, pgd, p);
215*4882a593Smuzhiyun 	}
216*4882a593Smuzhiyun 	return pgd;
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun 
vmemmap_populate_basepages(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)219*4882a593Smuzhiyun int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
220*4882a593Smuzhiyun 					 int node, struct vmem_altmap *altmap)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun 	unsigned long addr = start;
223*4882a593Smuzhiyun 	pgd_t *pgd;
224*4882a593Smuzhiyun 	p4d_t *p4d;
225*4882a593Smuzhiyun 	pud_t *pud;
226*4882a593Smuzhiyun 	pmd_t *pmd;
227*4882a593Smuzhiyun 	pte_t *pte;
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	for (; addr < end; addr += PAGE_SIZE) {
230*4882a593Smuzhiyun 		pgd = vmemmap_pgd_populate(addr, node);
231*4882a593Smuzhiyun 		if (!pgd)
232*4882a593Smuzhiyun 			return -ENOMEM;
233*4882a593Smuzhiyun 		p4d = vmemmap_p4d_populate(pgd, addr, node);
234*4882a593Smuzhiyun 		if (!p4d)
235*4882a593Smuzhiyun 			return -ENOMEM;
236*4882a593Smuzhiyun 		pud = vmemmap_pud_populate(p4d, addr, node);
237*4882a593Smuzhiyun 		if (!pud)
238*4882a593Smuzhiyun 			return -ENOMEM;
239*4882a593Smuzhiyun 		pmd = vmemmap_pmd_populate(pud, addr, node);
240*4882a593Smuzhiyun 		if (!pmd)
241*4882a593Smuzhiyun 			return -ENOMEM;
242*4882a593Smuzhiyun 		pte = vmemmap_pte_populate(pmd, addr, node, altmap);
243*4882a593Smuzhiyun 		if (!pte)
244*4882a593Smuzhiyun 			return -ENOMEM;
245*4882a593Smuzhiyun 		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
246*4882a593Smuzhiyun 	}
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun 	return 0;
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun 
__populate_section_memmap(unsigned long pfn,unsigned long nr_pages,int nid,struct vmem_altmap * altmap)251*4882a593Smuzhiyun struct page * __meminit __populate_section_memmap(unsigned long pfn,
252*4882a593Smuzhiyun 		unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun 	unsigned long start = (unsigned long) pfn_to_page(pfn);
255*4882a593Smuzhiyun 	unsigned long end = start + nr_pages * sizeof(struct page);
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
258*4882a593Smuzhiyun 		!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
259*4882a593Smuzhiyun 		return NULL;
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	if (vmemmap_populate(start, end, nid, altmap))
262*4882a593Smuzhiyun 		return NULL;
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	return pfn_to_page(pfn);
265*4882a593Smuzhiyun }
266