xref: /OK3568_Linux_fs/kernel/arch/x86/xen/setup.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Machine specific setup for xen
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/init.h>
9*4882a593Smuzhiyun #include <linux/sched.h>
10*4882a593Smuzhiyun #include <linux/mm.h>
11*4882a593Smuzhiyun #include <linux/pm.h>
12*4882a593Smuzhiyun #include <linux/memblock.h>
13*4882a593Smuzhiyun #include <linux/cpuidle.h>
14*4882a593Smuzhiyun #include <linux/cpufreq.h>
15*4882a593Smuzhiyun #include <linux/memory_hotplug.h>
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun #include <asm/elf.h>
18*4882a593Smuzhiyun #include <asm/vdso.h>
19*4882a593Smuzhiyun #include <asm/e820/api.h>
20*4882a593Smuzhiyun #include <asm/setup.h>
21*4882a593Smuzhiyun #include <asm/acpi.h>
22*4882a593Smuzhiyun #include <asm/numa.h>
23*4882a593Smuzhiyun #include <asm/idtentry.h>
24*4882a593Smuzhiyun #include <asm/xen/hypervisor.h>
25*4882a593Smuzhiyun #include <asm/xen/hypercall.h>
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #include <xen/xen.h>
28*4882a593Smuzhiyun #include <xen/page.h>
29*4882a593Smuzhiyun #include <xen/interface/callback.h>
30*4882a593Smuzhiyun #include <xen/interface/memory.h>
31*4882a593Smuzhiyun #include <xen/interface/physdev.h>
32*4882a593Smuzhiyun #include <xen/features.h>
33*4882a593Smuzhiyun #include <xen/hvc-console.h>
34*4882a593Smuzhiyun #include "xen-ops.h"
35*4882a593Smuzhiyun #include "mmu.h"
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun /* Amount of extra memory space we add to the e820 ranges */
40*4882a593Smuzhiyun struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun /* Number of pages released from the initial allocation. */
43*4882a593Smuzhiyun unsigned long xen_released_pages;
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun /* E820 map used during setting up memory. */
46*4882a593Smuzhiyun static struct e820_table xen_e820_table __initdata;
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun  * Buffer used to remap identity mapped pages. We only need the virtual space.
50*4882a593Smuzhiyun  * The physical page behind this address is remapped as needed to different
51*4882a593Smuzhiyun  * buffer pages.
52*4882a593Smuzhiyun  */
53*4882a593Smuzhiyun #define REMAP_SIZE	(P2M_PER_PAGE - 3)
54*4882a593Smuzhiyun static struct {
55*4882a593Smuzhiyun 	unsigned long	next_area_mfn;
56*4882a593Smuzhiyun 	unsigned long	target_pfn;
57*4882a593Smuzhiyun 	unsigned long	size;
58*4882a593Smuzhiyun 	unsigned long	mfns[REMAP_SIZE];
59*4882a593Smuzhiyun } xen_remap_buf __initdata __aligned(PAGE_SIZE);
60*4882a593Smuzhiyun static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun /*
63*4882a593Smuzhiyun  * The maximum amount of extra memory compared to the base size.  The
64*4882a593Smuzhiyun  * main scaling factor is the size of struct page.  At extreme ratios
65*4882a593Smuzhiyun  * of base:extra, all the base memory can be filled with page
66*4882a593Smuzhiyun  * structures for the extra memory, leaving no space for anything
67*4882a593Smuzhiyun  * else.
68*4882a593Smuzhiyun  *
69*4882a593Smuzhiyun  * 10x seems like a reasonable balance between scaling flexibility and
70*4882a593Smuzhiyun  * leaving a practically usable system.
71*4882a593Smuzhiyun  */
72*4882a593Smuzhiyun #define EXTRA_MEM_RATIO		(10)
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
75*4882a593Smuzhiyun 
xen_parse_512gb(void)76*4882a593Smuzhiyun static void __init xen_parse_512gb(void)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun 	bool val = false;
79*4882a593Smuzhiyun 	char *arg;
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 	arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit");
82*4882a593Smuzhiyun 	if (!arg)
83*4882a593Smuzhiyun 		return;
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun 	arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
86*4882a593Smuzhiyun 	if (!arg)
87*4882a593Smuzhiyun 		val = true;
88*4882a593Smuzhiyun 	else if (strtobool(arg + strlen("xen_512gb_limit="), &val))
89*4882a593Smuzhiyun 		return;
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	xen_512gb_limit = val;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun 
xen_add_extra_mem(unsigned long start_pfn,unsigned long n_pfns)94*4882a593Smuzhiyun static void __init xen_add_extra_mem(unsigned long start_pfn,
95*4882a593Smuzhiyun 				     unsigned long n_pfns)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun 	int i;
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	/*
100*4882a593Smuzhiyun 	 * No need to check for zero size, should happen rarely and will only
101*4882a593Smuzhiyun 	 * write a new entry regarded to be unused due to zero size.
102*4882a593Smuzhiyun 	 */
103*4882a593Smuzhiyun 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
104*4882a593Smuzhiyun 		/* Add new region. */
105*4882a593Smuzhiyun 		if (xen_extra_mem[i].n_pfns == 0) {
106*4882a593Smuzhiyun 			xen_extra_mem[i].start_pfn = start_pfn;
107*4882a593Smuzhiyun 			xen_extra_mem[i].n_pfns = n_pfns;
108*4882a593Smuzhiyun 			break;
109*4882a593Smuzhiyun 		}
110*4882a593Smuzhiyun 		/* Append to existing region. */
111*4882a593Smuzhiyun 		if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
112*4882a593Smuzhiyun 		    start_pfn) {
113*4882a593Smuzhiyun 			xen_extra_mem[i].n_pfns += n_pfns;
114*4882a593Smuzhiyun 			break;
115*4882a593Smuzhiyun 		}
116*4882a593Smuzhiyun 	}
117*4882a593Smuzhiyun 	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
118*4882a593Smuzhiyun 		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun 
xen_del_extra_mem(unsigned long start_pfn,unsigned long n_pfns)123*4882a593Smuzhiyun static void __init xen_del_extra_mem(unsigned long start_pfn,
124*4882a593Smuzhiyun 				     unsigned long n_pfns)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun 	int i;
127*4882a593Smuzhiyun 	unsigned long start_r, size_r;
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
130*4882a593Smuzhiyun 		start_r = xen_extra_mem[i].start_pfn;
131*4882a593Smuzhiyun 		size_r = xen_extra_mem[i].n_pfns;
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 		/* Start of region. */
134*4882a593Smuzhiyun 		if (start_r == start_pfn) {
135*4882a593Smuzhiyun 			BUG_ON(n_pfns > size_r);
136*4882a593Smuzhiyun 			xen_extra_mem[i].start_pfn += n_pfns;
137*4882a593Smuzhiyun 			xen_extra_mem[i].n_pfns -= n_pfns;
138*4882a593Smuzhiyun 			break;
139*4882a593Smuzhiyun 		}
140*4882a593Smuzhiyun 		/* End of region. */
141*4882a593Smuzhiyun 		if (start_r + size_r == start_pfn + n_pfns) {
142*4882a593Smuzhiyun 			BUG_ON(n_pfns > size_r);
143*4882a593Smuzhiyun 			xen_extra_mem[i].n_pfns -= n_pfns;
144*4882a593Smuzhiyun 			break;
145*4882a593Smuzhiyun 		}
146*4882a593Smuzhiyun 		/* Mid of region. */
147*4882a593Smuzhiyun 		if (start_pfn > start_r && start_pfn < start_r + size_r) {
148*4882a593Smuzhiyun 			BUG_ON(start_pfn + n_pfns > start_r + size_r);
149*4882a593Smuzhiyun 			xen_extra_mem[i].n_pfns = start_pfn - start_r;
150*4882a593Smuzhiyun 			/* Calling memblock_reserve() again is okay. */
151*4882a593Smuzhiyun 			xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r -
152*4882a593Smuzhiyun 					  (start_pfn + n_pfns));
153*4882a593Smuzhiyun 			break;
154*4882a593Smuzhiyun 		}
155*4882a593Smuzhiyun 	}
156*4882a593Smuzhiyun 	memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun /*
160*4882a593Smuzhiyun  * Called during boot before the p2m list can take entries beyond the
161*4882a593Smuzhiyun  * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
162*4882a593Smuzhiyun  * invalid.
163*4882a593Smuzhiyun  */
xen_chk_extra_mem(unsigned long pfn)164*4882a593Smuzhiyun unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun 	int i;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
169*4882a593Smuzhiyun 		if (pfn >= xen_extra_mem[i].start_pfn &&
170*4882a593Smuzhiyun 		    pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns)
171*4882a593Smuzhiyun 			return INVALID_P2M_ENTRY;
172*4882a593Smuzhiyun 	}
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	return IDENTITY_FRAME(pfn);
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun /*
178*4882a593Smuzhiyun  * Mark all pfns of extra mem as invalid in p2m list.
179*4882a593Smuzhiyun  */
xen_inv_extra_mem(void)180*4882a593Smuzhiyun void __init xen_inv_extra_mem(void)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun 	unsigned long pfn, pfn_s, pfn_e;
183*4882a593Smuzhiyun 	int i;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
186*4882a593Smuzhiyun 		if (!xen_extra_mem[i].n_pfns)
187*4882a593Smuzhiyun 			continue;
188*4882a593Smuzhiyun 		pfn_s = xen_extra_mem[i].start_pfn;
189*4882a593Smuzhiyun 		pfn_e = pfn_s + xen_extra_mem[i].n_pfns;
190*4882a593Smuzhiyun 		for (pfn = pfn_s; pfn < pfn_e; pfn++)
191*4882a593Smuzhiyun 			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
192*4882a593Smuzhiyun 	}
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun /*
196*4882a593Smuzhiyun  * Finds the next RAM pfn available in the E820 map after min_pfn.
197*4882a593Smuzhiyun  * This function updates min_pfn with the pfn found and returns
198*4882a593Smuzhiyun  * the size of that range or zero if not found.
199*4882a593Smuzhiyun  */
xen_find_pfn_range(unsigned long * min_pfn)200*4882a593Smuzhiyun static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	const struct e820_entry *entry = xen_e820_table.entries;
203*4882a593Smuzhiyun 	unsigned int i;
204*4882a593Smuzhiyun 	unsigned long done = 0;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
207*4882a593Smuzhiyun 		unsigned long s_pfn;
208*4882a593Smuzhiyun 		unsigned long e_pfn;
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 		if (entry->type != E820_TYPE_RAM)
211*4882a593Smuzhiyun 			continue;
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 		e_pfn = PFN_DOWN(entry->addr + entry->size);
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 		/* We only care about E820 after this */
216*4882a593Smuzhiyun 		if (e_pfn <= *min_pfn)
217*4882a593Smuzhiyun 			continue;
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 		s_pfn = PFN_UP(entry->addr);
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 		/* If min_pfn falls within the E820 entry, we want to start
222*4882a593Smuzhiyun 		 * at the min_pfn PFN.
223*4882a593Smuzhiyun 		 */
224*4882a593Smuzhiyun 		if (s_pfn <= *min_pfn) {
225*4882a593Smuzhiyun 			done = e_pfn - *min_pfn;
226*4882a593Smuzhiyun 		} else {
227*4882a593Smuzhiyun 			done = e_pfn - s_pfn;
228*4882a593Smuzhiyun 			*min_pfn = s_pfn;
229*4882a593Smuzhiyun 		}
230*4882a593Smuzhiyun 		break;
231*4882a593Smuzhiyun 	}
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	return done;
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun 
xen_free_mfn(unsigned long mfn)236*4882a593Smuzhiyun static int __init xen_free_mfn(unsigned long mfn)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun 	struct xen_memory_reservation reservation = {
239*4882a593Smuzhiyun 		.address_bits = 0,
240*4882a593Smuzhiyun 		.extent_order = 0,
241*4882a593Smuzhiyun 		.domid        = DOMID_SELF
242*4882a593Smuzhiyun 	};
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	set_xen_guest_handle(reservation.extent_start, &mfn);
245*4882a593Smuzhiyun 	reservation.nr_extents = 1;
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun /*
251*4882a593Smuzhiyun  * This releases a chunk of memory and then does the identity map. It's used
252*4882a593Smuzhiyun  * as a fallback if the remapping fails.
253*4882a593Smuzhiyun  */
xen_set_identity_and_release_chunk(unsigned long start_pfn,unsigned long end_pfn,unsigned long nr_pages)254*4882a593Smuzhiyun static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
255*4882a593Smuzhiyun 			unsigned long end_pfn, unsigned long nr_pages)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun 	unsigned long pfn, end;
258*4882a593Smuzhiyun 	int ret;
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	WARN_ON(start_pfn > end_pfn);
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 	/* Release pages first. */
263*4882a593Smuzhiyun 	end = min(end_pfn, nr_pages);
264*4882a593Smuzhiyun 	for (pfn = start_pfn; pfn < end; pfn++) {
265*4882a593Smuzhiyun 		unsigned long mfn = pfn_to_mfn(pfn);
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun 		/* Make sure pfn exists to start with */
268*4882a593Smuzhiyun 		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
269*4882a593Smuzhiyun 			continue;
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 		ret = xen_free_mfn(mfn);
272*4882a593Smuzhiyun 		WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 		if (ret == 1) {
275*4882a593Smuzhiyun 			xen_released_pages++;
276*4882a593Smuzhiyun 			if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
277*4882a593Smuzhiyun 				break;
278*4882a593Smuzhiyun 		} else
279*4882a593Smuzhiyun 			break;
280*4882a593Smuzhiyun 	}
281*4882a593Smuzhiyun 
282*4882a593Smuzhiyun 	set_phys_range_identity(start_pfn, end_pfn);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun /*
286*4882a593Smuzhiyun  * Helper function to update the p2m and m2p tables and kernel mapping.
287*4882a593Smuzhiyun  */
xen_update_mem_tables(unsigned long pfn,unsigned long mfn)288*4882a593Smuzhiyun static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun 	struct mmu_update update = {
291*4882a593Smuzhiyun 		.ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
292*4882a593Smuzhiyun 		.val = pfn
293*4882a593Smuzhiyun 	};
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	/* Update p2m */
296*4882a593Smuzhiyun 	if (!set_phys_to_machine(pfn, mfn)) {
297*4882a593Smuzhiyun 		WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
298*4882a593Smuzhiyun 		     pfn, mfn);
299*4882a593Smuzhiyun 		BUG();
300*4882a593Smuzhiyun 	}
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	/* Update m2p */
303*4882a593Smuzhiyun 	if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
304*4882a593Smuzhiyun 		WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
305*4882a593Smuzhiyun 		     mfn, pfn);
306*4882a593Smuzhiyun 		BUG();
307*4882a593Smuzhiyun 	}
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	/* Update kernel mapping, but not for highmem. */
310*4882a593Smuzhiyun 	if (pfn >= PFN_UP(__pa(high_memory - 1)))
311*4882a593Smuzhiyun 		return;
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
314*4882a593Smuzhiyun 					 mfn_pte(mfn, PAGE_KERNEL), 0)) {
315*4882a593Smuzhiyun 		WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
316*4882a593Smuzhiyun 		      mfn, pfn);
317*4882a593Smuzhiyun 		BUG();
318*4882a593Smuzhiyun 	}
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun /*
322*4882a593Smuzhiyun  * This function updates the p2m and m2p tables with an identity map from
323*4882a593Smuzhiyun  * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
324*4882a593Smuzhiyun  * original allocation at remap_pfn. The information needed for remapping is
325*4882a593Smuzhiyun  * saved in the memory itself to avoid the need for allocating buffers. The
326*4882a593Smuzhiyun  * complete remap information is contained in a list of MFNs each containing
327*4882a593Smuzhiyun  * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
328*4882a593Smuzhiyun  * This enables us to preserve the original mfn sequence while doing the
329*4882a593Smuzhiyun  * remapping at a time when the memory management is capable of allocating
330*4882a593Smuzhiyun  * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
331*4882a593Smuzhiyun  * its callers.
332*4882a593Smuzhiyun  */
xen_do_set_identity_and_remap_chunk(unsigned long start_pfn,unsigned long size,unsigned long remap_pfn)333*4882a593Smuzhiyun static void __init xen_do_set_identity_and_remap_chunk(
334*4882a593Smuzhiyun         unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun 	unsigned long buf = (unsigned long)&xen_remap_buf;
337*4882a593Smuzhiyun 	unsigned long mfn_save, mfn;
338*4882a593Smuzhiyun 	unsigned long ident_pfn_iter, remap_pfn_iter;
339*4882a593Smuzhiyun 	unsigned long ident_end_pfn = start_pfn + size;
340*4882a593Smuzhiyun 	unsigned long left = size;
341*4882a593Smuzhiyun 	unsigned int i, chunk;
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	WARN_ON(size == 0);
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	mfn_save = virt_to_mfn(buf);
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
348*4882a593Smuzhiyun 	     ident_pfn_iter < ident_end_pfn;
349*4882a593Smuzhiyun 	     ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
350*4882a593Smuzhiyun 		chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
351*4882a593Smuzhiyun 
352*4882a593Smuzhiyun 		/* Map first pfn to xen_remap_buf */
353*4882a593Smuzhiyun 		mfn = pfn_to_mfn(ident_pfn_iter);
354*4882a593Smuzhiyun 		set_pte_mfn(buf, mfn, PAGE_KERNEL);
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 		/* Save mapping information in page */
357*4882a593Smuzhiyun 		xen_remap_buf.next_area_mfn = xen_remap_mfn;
358*4882a593Smuzhiyun 		xen_remap_buf.target_pfn = remap_pfn_iter;
359*4882a593Smuzhiyun 		xen_remap_buf.size = chunk;
360*4882a593Smuzhiyun 		for (i = 0; i < chunk; i++)
361*4882a593Smuzhiyun 			xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 		/* Put remap buf into list. */
364*4882a593Smuzhiyun 		xen_remap_mfn = mfn;
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 		/* Set identity map */
367*4882a593Smuzhiyun 		set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk);
368*4882a593Smuzhiyun 
369*4882a593Smuzhiyun 		left -= chunk;
370*4882a593Smuzhiyun 	}
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	/* Restore old xen_remap_buf mapping */
373*4882a593Smuzhiyun 	set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
374*4882a593Smuzhiyun }
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun /*
377*4882a593Smuzhiyun  * This function takes a contiguous pfn range that needs to be identity mapped
378*4882a593Smuzhiyun  * and:
379*4882a593Smuzhiyun  *
380*4882a593Smuzhiyun  *  1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
381*4882a593Smuzhiyun  *  2) Calls the do_ function to actually do the mapping/remapping work.
382*4882a593Smuzhiyun  *
383*4882a593Smuzhiyun  * The goal is to not allocate additional memory but to remap the existing
384*4882a593Smuzhiyun  * pages. In the case of an error the underlying memory is simply released back
385*4882a593Smuzhiyun  * to Xen and not remapped.
386*4882a593Smuzhiyun  */
xen_set_identity_and_remap_chunk(unsigned long start_pfn,unsigned long end_pfn,unsigned long nr_pages,unsigned long remap_pfn)387*4882a593Smuzhiyun static unsigned long __init xen_set_identity_and_remap_chunk(
388*4882a593Smuzhiyun 	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
389*4882a593Smuzhiyun 	unsigned long remap_pfn)
390*4882a593Smuzhiyun {
391*4882a593Smuzhiyun 	unsigned long pfn;
392*4882a593Smuzhiyun 	unsigned long i = 0;
393*4882a593Smuzhiyun 	unsigned long n = end_pfn - start_pfn;
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	if (remap_pfn == 0)
396*4882a593Smuzhiyun 		remap_pfn = nr_pages;
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	while (i < n) {
399*4882a593Smuzhiyun 		unsigned long cur_pfn = start_pfn + i;
400*4882a593Smuzhiyun 		unsigned long left = n - i;
401*4882a593Smuzhiyun 		unsigned long size = left;
402*4882a593Smuzhiyun 		unsigned long remap_range_size;
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 		/* Do not remap pages beyond the current allocation */
405*4882a593Smuzhiyun 		if (cur_pfn >= nr_pages) {
406*4882a593Smuzhiyun 			/* Identity map remaining pages */
407*4882a593Smuzhiyun 			set_phys_range_identity(cur_pfn, cur_pfn + size);
408*4882a593Smuzhiyun 			break;
409*4882a593Smuzhiyun 		}
410*4882a593Smuzhiyun 		if (cur_pfn + size > nr_pages)
411*4882a593Smuzhiyun 			size = nr_pages - cur_pfn;
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 		remap_range_size = xen_find_pfn_range(&remap_pfn);
414*4882a593Smuzhiyun 		if (!remap_range_size) {
415*4882a593Smuzhiyun 			pr_warn("Unable to find available pfn range, not remapping identity pages\n");
416*4882a593Smuzhiyun 			xen_set_identity_and_release_chunk(cur_pfn,
417*4882a593Smuzhiyun 						cur_pfn + left, nr_pages);
418*4882a593Smuzhiyun 			break;
419*4882a593Smuzhiyun 		}
420*4882a593Smuzhiyun 		/* Adjust size to fit in current e820 RAM region */
421*4882a593Smuzhiyun 		if (size > remap_range_size)
422*4882a593Smuzhiyun 			size = remap_range_size;
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 		xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 		/* Update variables to reflect new mappings. */
427*4882a593Smuzhiyun 		i += size;
428*4882a593Smuzhiyun 		remap_pfn += size;
429*4882a593Smuzhiyun 	}
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun 	/*
432*4882a593Smuzhiyun 	 * If the PFNs are currently mapped, the VA mapping also needs
433*4882a593Smuzhiyun 	 * to be updated to be 1:1.
434*4882a593Smuzhiyun 	 */
435*4882a593Smuzhiyun 	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
436*4882a593Smuzhiyun 		(void)HYPERVISOR_update_va_mapping(
437*4882a593Smuzhiyun 			(unsigned long)__va(pfn << PAGE_SHIFT),
438*4882a593Smuzhiyun 			mfn_pte(pfn, PAGE_KERNEL_IO), 0);
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	return remap_pfn;
441*4882a593Smuzhiyun }
442*4882a593Smuzhiyun 
xen_count_remap_pages(unsigned long start_pfn,unsigned long end_pfn,unsigned long nr_pages,unsigned long remap_pages)443*4882a593Smuzhiyun static unsigned long __init xen_count_remap_pages(
444*4882a593Smuzhiyun 	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
445*4882a593Smuzhiyun 	unsigned long remap_pages)
446*4882a593Smuzhiyun {
447*4882a593Smuzhiyun 	if (start_pfn >= nr_pages)
448*4882a593Smuzhiyun 		return remap_pages;
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	return remap_pages + min(end_pfn, nr_pages) - start_pfn;
451*4882a593Smuzhiyun }
452*4882a593Smuzhiyun 
xen_foreach_remap_area(unsigned long nr_pages,unsigned long (* func)(unsigned long start_pfn,unsigned long end_pfn,unsigned long nr_pages,unsigned long last_val))453*4882a593Smuzhiyun static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages,
454*4882a593Smuzhiyun 	unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn,
455*4882a593Smuzhiyun 			      unsigned long nr_pages, unsigned long last_val))
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun 	phys_addr_t start = 0;
458*4882a593Smuzhiyun 	unsigned long ret_val = 0;
459*4882a593Smuzhiyun 	const struct e820_entry *entry = xen_e820_table.entries;
460*4882a593Smuzhiyun 	int i;
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun 	/*
463*4882a593Smuzhiyun 	 * Combine non-RAM regions and gaps until a RAM region (or the
464*4882a593Smuzhiyun 	 * end of the map) is reached, then call the provided function
465*4882a593Smuzhiyun 	 * to perform its duty on the non-RAM region.
466*4882a593Smuzhiyun 	 *
467*4882a593Smuzhiyun 	 * The combined non-RAM regions are rounded to a whole number
468*4882a593Smuzhiyun 	 * of pages so any partial pages are accessible via the 1:1
469*4882a593Smuzhiyun 	 * mapping.  This is needed for some BIOSes that put (for
470*4882a593Smuzhiyun 	 * example) the DMI tables in a reserved region that begins on
471*4882a593Smuzhiyun 	 * a non-page boundary.
472*4882a593Smuzhiyun 	 */
473*4882a593Smuzhiyun 	for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
474*4882a593Smuzhiyun 		phys_addr_t end = entry->addr + entry->size;
475*4882a593Smuzhiyun 		if (entry->type == E820_TYPE_RAM || i == xen_e820_table.nr_entries - 1) {
476*4882a593Smuzhiyun 			unsigned long start_pfn = PFN_DOWN(start);
477*4882a593Smuzhiyun 			unsigned long end_pfn = PFN_UP(end);
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 			if (entry->type == E820_TYPE_RAM)
480*4882a593Smuzhiyun 				end_pfn = PFN_UP(entry->addr);
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 			if (start_pfn < end_pfn)
483*4882a593Smuzhiyun 				ret_val = func(start_pfn, end_pfn, nr_pages,
484*4882a593Smuzhiyun 					       ret_val);
485*4882a593Smuzhiyun 			start = end;
486*4882a593Smuzhiyun 		}
487*4882a593Smuzhiyun 	}
488*4882a593Smuzhiyun 
489*4882a593Smuzhiyun 	return ret_val;
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun /*
493*4882a593Smuzhiyun  * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
494*4882a593Smuzhiyun  * The remap information (which mfn remap to which pfn) is contained in the
495*4882a593Smuzhiyun  * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
496*4882a593Smuzhiyun  * This scheme allows to remap the different chunks in arbitrary order while
497*4882a593Smuzhiyun  * the resulting mapping will be independent from the order.
498*4882a593Smuzhiyun  */
xen_remap_memory(void)499*4882a593Smuzhiyun void __init xen_remap_memory(void)
500*4882a593Smuzhiyun {
501*4882a593Smuzhiyun 	unsigned long buf = (unsigned long)&xen_remap_buf;
502*4882a593Smuzhiyun 	unsigned long mfn_save, pfn;
503*4882a593Smuzhiyun 	unsigned long remapped = 0;
504*4882a593Smuzhiyun 	unsigned int i;
505*4882a593Smuzhiyun 	unsigned long pfn_s = ~0UL;
506*4882a593Smuzhiyun 	unsigned long len = 0;
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	mfn_save = virt_to_mfn(buf);
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	while (xen_remap_mfn != INVALID_P2M_ENTRY) {
511*4882a593Smuzhiyun 		/* Map the remap information */
512*4882a593Smuzhiyun 		set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
513*4882a593Smuzhiyun 
514*4882a593Smuzhiyun 		BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 		pfn = xen_remap_buf.target_pfn;
517*4882a593Smuzhiyun 		for (i = 0; i < xen_remap_buf.size; i++) {
518*4882a593Smuzhiyun 			xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]);
519*4882a593Smuzhiyun 			remapped++;
520*4882a593Smuzhiyun 			pfn++;
521*4882a593Smuzhiyun 		}
522*4882a593Smuzhiyun 		if (pfn_s == ~0UL || pfn == pfn_s) {
523*4882a593Smuzhiyun 			pfn_s = xen_remap_buf.target_pfn;
524*4882a593Smuzhiyun 			len += xen_remap_buf.size;
525*4882a593Smuzhiyun 		} else if (pfn_s + len == xen_remap_buf.target_pfn) {
526*4882a593Smuzhiyun 			len += xen_remap_buf.size;
527*4882a593Smuzhiyun 		} else {
528*4882a593Smuzhiyun 			xen_del_extra_mem(pfn_s, len);
529*4882a593Smuzhiyun 			pfn_s = xen_remap_buf.target_pfn;
530*4882a593Smuzhiyun 			len = xen_remap_buf.size;
531*4882a593Smuzhiyun 		}
532*4882a593Smuzhiyun 		xen_remap_mfn = xen_remap_buf.next_area_mfn;
533*4882a593Smuzhiyun 	}
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun 	if (pfn_s != ~0UL && len)
536*4882a593Smuzhiyun 		xen_del_extra_mem(pfn_s, len);
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	pr_info("Remapped %ld page(s)\n", remapped);
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun 
xen_get_pages_limit(void)543*4882a593Smuzhiyun static unsigned long __init xen_get_pages_limit(void)
544*4882a593Smuzhiyun {
545*4882a593Smuzhiyun 	unsigned long limit;
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun 	limit = MAXMEM / PAGE_SIZE;
548*4882a593Smuzhiyun 	if (!xen_initial_domain() && xen_512gb_limit)
549*4882a593Smuzhiyun 		limit = GB(512) / PAGE_SIZE;
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	return limit;
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun 
xen_get_max_pages(void)554*4882a593Smuzhiyun static unsigned long __init xen_get_max_pages(void)
555*4882a593Smuzhiyun {
556*4882a593Smuzhiyun 	unsigned long max_pages, limit;
557*4882a593Smuzhiyun 	domid_t domid = DOMID_SELF;
558*4882a593Smuzhiyun 	long ret;
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	limit = xen_get_pages_limit();
561*4882a593Smuzhiyun 	max_pages = limit;
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	/*
564*4882a593Smuzhiyun 	 * For the initial domain we use the maximum reservation as
565*4882a593Smuzhiyun 	 * the maximum page.
566*4882a593Smuzhiyun 	 *
567*4882a593Smuzhiyun 	 * For guest domains the current maximum reservation reflects
568*4882a593Smuzhiyun 	 * the current maximum rather than the static maximum. In this
569*4882a593Smuzhiyun 	 * case the e820 map provided to us will cover the static
570*4882a593Smuzhiyun 	 * maximum region.
571*4882a593Smuzhiyun 	 */
572*4882a593Smuzhiyun 	if (xen_initial_domain()) {
573*4882a593Smuzhiyun 		ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
574*4882a593Smuzhiyun 		if (ret > 0)
575*4882a593Smuzhiyun 			max_pages = ret;
576*4882a593Smuzhiyun 	}
577*4882a593Smuzhiyun 
578*4882a593Smuzhiyun 	return min(max_pages, limit);
579*4882a593Smuzhiyun }
580*4882a593Smuzhiyun 
xen_align_and_add_e820_region(phys_addr_t start,phys_addr_t size,int type)581*4882a593Smuzhiyun static void __init xen_align_and_add_e820_region(phys_addr_t start,
582*4882a593Smuzhiyun 						 phys_addr_t size, int type)
583*4882a593Smuzhiyun {
584*4882a593Smuzhiyun 	phys_addr_t end = start + size;
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	/* Align RAM regions to page boundaries. */
587*4882a593Smuzhiyun 	if (type == E820_TYPE_RAM) {
588*4882a593Smuzhiyun 		start = PAGE_ALIGN(start);
589*4882a593Smuzhiyun 		end &= ~((phys_addr_t)PAGE_SIZE - 1);
590*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
591*4882a593Smuzhiyun 		/*
592*4882a593Smuzhiyun 		 * Don't allow adding memory not in E820 map while booting the
593*4882a593Smuzhiyun 		 * system. Once the balloon driver is up it will remove that
594*4882a593Smuzhiyun 		 * restriction again.
595*4882a593Smuzhiyun 		 */
596*4882a593Smuzhiyun 		max_mem_size = end;
597*4882a593Smuzhiyun #endif
598*4882a593Smuzhiyun 	}
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	e820__range_add(start, end - start, type);
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun 
xen_ignore_unusable(void)603*4882a593Smuzhiyun static void __init xen_ignore_unusable(void)
604*4882a593Smuzhiyun {
605*4882a593Smuzhiyun 	struct e820_entry *entry = xen_e820_table.entries;
606*4882a593Smuzhiyun 	unsigned int i;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
609*4882a593Smuzhiyun 		if (entry->type == E820_TYPE_UNUSABLE)
610*4882a593Smuzhiyun 			entry->type = E820_TYPE_RAM;
611*4882a593Smuzhiyun 	}
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun 
xen_is_e820_reserved(phys_addr_t start,phys_addr_t size)614*4882a593Smuzhiyun bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun 	struct e820_entry *entry;
617*4882a593Smuzhiyun 	unsigned mapcnt;
618*4882a593Smuzhiyun 	phys_addr_t end;
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun 	if (!size)
621*4882a593Smuzhiyun 		return false;
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	end = start + size;
624*4882a593Smuzhiyun 	entry = xen_e820_table.entries;
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun 	for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
627*4882a593Smuzhiyun 		if (entry->type == E820_TYPE_RAM && entry->addr <= start &&
628*4882a593Smuzhiyun 		    (entry->addr + entry->size) >= end)
629*4882a593Smuzhiyun 			return false;
630*4882a593Smuzhiyun 
631*4882a593Smuzhiyun 		entry++;
632*4882a593Smuzhiyun 	}
633*4882a593Smuzhiyun 
634*4882a593Smuzhiyun 	return true;
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun /*
638*4882a593Smuzhiyun  * Find a free area in physical memory not yet reserved and compliant with
639*4882a593Smuzhiyun  * E820 map.
640*4882a593Smuzhiyun  * Used to relocate pre-allocated areas like initrd or p2m list which are in
641*4882a593Smuzhiyun  * conflict with the to be used E820 map.
642*4882a593Smuzhiyun  * In case no area is found, return 0. Otherwise return the physical address
643*4882a593Smuzhiyun  * of the area which is already reserved for convenience.
644*4882a593Smuzhiyun  */
xen_find_free_area(phys_addr_t size)645*4882a593Smuzhiyun phys_addr_t __init xen_find_free_area(phys_addr_t size)
646*4882a593Smuzhiyun {
647*4882a593Smuzhiyun 	unsigned mapcnt;
648*4882a593Smuzhiyun 	phys_addr_t addr, start;
649*4882a593Smuzhiyun 	struct e820_entry *entry = xen_e820_table.entries;
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 	for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++, entry++) {
652*4882a593Smuzhiyun 		if (entry->type != E820_TYPE_RAM || entry->size < size)
653*4882a593Smuzhiyun 			continue;
654*4882a593Smuzhiyun 		start = entry->addr;
655*4882a593Smuzhiyun 		for (addr = start; addr < start + size; addr += PAGE_SIZE) {
656*4882a593Smuzhiyun 			if (!memblock_is_reserved(addr))
657*4882a593Smuzhiyun 				continue;
658*4882a593Smuzhiyun 			start = addr + PAGE_SIZE;
659*4882a593Smuzhiyun 			if (start + size > entry->addr + entry->size)
660*4882a593Smuzhiyun 				break;
661*4882a593Smuzhiyun 		}
662*4882a593Smuzhiyun 		if (addr >= start + size) {
663*4882a593Smuzhiyun 			memblock_reserve(start, size);
664*4882a593Smuzhiyun 			return start;
665*4882a593Smuzhiyun 		}
666*4882a593Smuzhiyun 	}
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	return 0;
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun /*
672*4882a593Smuzhiyun  * Like memcpy, but with physical addresses for dest and src.
673*4882a593Smuzhiyun  */
xen_phys_memcpy(phys_addr_t dest,phys_addr_t src,phys_addr_t n)674*4882a593Smuzhiyun static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
675*4882a593Smuzhiyun 				   phys_addr_t n)
676*4882a593Smuzhiyun {
677*4882a593Smuzhiyun 	phys_addr_t dest_off, src_off, dest_len, src_len, len;
678*4882a593Smuzhiyun 	void *from, *to;
679*4882a593Smuzhiyun 
680*4882a593Smuzhiyun 	while (n) {
681*4882a593Smuzhiyun 		dest_off = dest & ~PAGE_MASK;
682*4882a593Smuzhiyun 		src_off = src & ~PAGE_MASK;
683*4882a593Smuzhiyun 		dest_len = n;
684*4882a593Smuzhiyun 		if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off)
685*4882a593Smuzhiyun 			dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off;
686*4882a593Smuzhiyun 		src_len = n;
687*4882a593Smuzhiyun 		if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off)
688*4882a593Smuzhiyun 			src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off;
689*4882a593Smuzhiyun 		len = min(dest_len, src_len);
690*4882a593Smuzhiyun 		to = early_memremap(dest - dest_off, dest_len + dest_off);
691*4882a593Smuzhiyun 		from = early_memremap(src - src_off, src_len + src_off);
692*4882a593Smuzhiyun 		memcpy(to, from, len);
693*4882a593Smuzhiyun 		early_memunmap(to, dest_len + dest_off);
694*4882a593Smuzhiyun 		early_memunmap(from, src_len + src_off);
695*4882a593Smuzhiyun 		n -= len;
696*4882a593Smuzhiyun 		dest += len;
697*4882a593Smuzhiyun 		src += len;
698*4882a593Smuzhiyun 	}
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun /*
702*4882a593Smuzhiyun  * Reserve Xen mfn_list.
703*4882a593Smuzhiyun  */
xen_reserve_xen_mfnlist(void)704*4882a593Smuzhiyun static void __init xen_reserve_xen_mfnlist(void)
705*4882a593Smuzhiyun {
706*4882a593Smuzhiyun 	phys_addr_t start, size;
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 	if (xen_start_info->mfn_list >= __START_KERNEL_map) {
709*4882a593Smuzhiyun 		start = __pa(xen_start_info->mfn_list);
710*4882a593Smuzhiyun 		size = PFN_ALIGN(xen_start_info->nr_pages *
711*4882a593Smuzhiyun 				 sizeof(unsigned long));
712*4882a593Smuzhiyun 	} else {
713*4882a593Smuzhiyun 		start = PFN_PHYS(xen_start_info->first_p2m_pfn);
714*4882a593Smuzhiyun 		size = PFN_PHYS(xen_start_info->nr_p2m_frames);
715*4882a593Smuzhiyun 	}
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun 	memblock_reserve(start, size);
718*4882a593Smuzhiyun 	if (!xen_is_e820_reserved(start, size))
719*4882a593Smuzhiyun 		return;
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun 	xen_relocate_p2m();
722*4882a593Smuzhiyun 	memblock_free(start, size);
723*4882a593Smuzhiyun }
724*4882a593Smuzhiyun 
725*4882a593Smuzhiyun /**
726*4882a593Smuzhiyun  * machine_specific_memory_setup - Hook for machine specific memory setup.
727*4882a593Smuzhiyun  **/
xen_memory_setup(void)728*4882a593Smuzhiyun char * __init xen_memory_setup(void)
729*4882a593Smuzhiyun {
730*4882a593Smuzhiyun 	unsigned long max_pfn, pfn_s, n_pfns;
731*4882a593Smuzhiyun 	phys_addr_t mem_end, addr, size, chunk_size;
732*4882a593Smuzhiyun 	u32 type;
733*4882a593Smuzhiyun 	int rc;
734*4882a593Smuzhiyun 	struct xen_memory_map memmap;
735*4882a593Smuzhiyun 	unsigned long max_pages;
736*4882a593Smuzhiyun 	unsigned long extra_pages = 0;
737*4882a593Smuzhiyun 	int i;
738*4882a593Smuzhiyun 	int op;
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	xen_parse_512gb();
741*4882a593Smuzhiyun 	max_pfn = xen_get_pages_limit();
742*4882a593Smuzhiyun 	max_pfn = min(max_pfn, xen_start_info->nr_pages);
743*4882a593Smuzhiyun 	mem_end = PFN_PHYS(max_pfn);
744*4882a593Smuzhiyun 
745*4882a593Smuzhiyun 	memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries);
746*4882a593Smuzhiyun 	set_xen_guest_handle(memmap.buffer, xen_e820_table.entries);
747*4882a593Smuzhiyun 
748*4882a593Smuzhiyun #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
749*4882a593Smuzhiyun 	xen_saved_max_mem_size = max_mem_size;
750*4882a593Smuzhiyun #endif
751*4882a593Smuzhiyun 
752*4882a593Smuzhiyun 	op = xen_initial_domain() ?
753*4882a593Smuzhiyun 		XENMEM_machine_memory_map :
754*4882a593Smuzhiyun 		XENMEM_memory_map;
755*4882a593Smuzhiyun 	rc = HYPERVISOR_memory_op(op, &memmap);
756*4882a593Smuzhiyun 	if (rc == -ENOSYS) {
757*4882a593Smuzhiyun 		BUG_ON(xen_initial_domain());
758*4882a593Smuzhiyun 		memmap.nr_entries = 1;
759*4882a593Smuzhiyun 		xen_e820_table.entries[0].addr = 0ULL;
760*4882a593Smuzhiyun 		xen_e820_table.entries[0].size = mem_end;
761*4882a593Smuzhiyun 		/* 8MB slack (to balance backend allocations). */
762*4882a593Smuzhiyun 		xen_e820_table.entries[0].size += 8ULL << 20;
763*4882a593Smuzhiyun 		xen_e820_table.entries[0].type = E820_TYPE_RAM;
764*4882a593Smuzhiyun 		rc = 0;
765*4882a593Smuzhiyun 	}
766*4882a593Smuzhiyun 	BUG_ON(rc);
767*4882a593Smuzhiyun 	BUG_ON(memmap.nr_entries == 0);
768*4882a593Smuzhiyun 	xen_e820_table.nr_entries = memmap.nr_entries;
769*4882a593Smuzhiyun 
770*4882a593Smuzhiyun 	/*
771*4882a593Smuzhiyun 	 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
772*4882a593Smuzhiyun 	 * regions, so if we're using the machine memory map leave the
773*4882a593Smuzhiyun 	 * region as RAM as it is in the pseudo-physical map.
774*4882a593Smuzhiyun 	 *
775*4882a593Smuzhiyun 	 * UNUSABLE regions in domUs are not handled and will need
776*4882a593Smuzhiyun 	 * a patch in the future.
777*4882a593Smuzhiyun 	 */
778*4882a593Smuzhiyun 	if (xen_initial_domain())
779*4882a593Smuzhiyun 		xen_ignore_unusable();
780*4882a593Smuzhiyun 
781*4882a593Smuzhiyun 	/* Make sure the Xen-supplied memory map is well-ordered. */
782*4882a593Smuzhiyun 	e820__update_table(&xen_e820_table);
783*4882a593Smuzhiyun 
784*4882a593Smuzhiyun 	max_pages = xen_get_max_pages();
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun 	/* How many extra pages do we need due to remapping? */
787*4882a593Smuzhiyun 	max_pages += xen_foreach_remap_area(max_pfn, xen_count_remap_pages);
788*4882a593Smuzhiyun 
789*4882a593Smuzhiyun 	if (max_pages > max_pfn)
790*4882a593Smuzhiyun 		extra_pages += max_pages - max_pfn;
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun 	/*
793*4882a593Smuzhiyun 	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
794*4882a593Smuzhiyun 	 * factor the base size.
795*4882a593Smuzhiyun 	 *
796*4882a593Smuzhiyun 	 * Make sure we have no memory above max_pages, as this area
797*4882a593Smuzhiyun 	 * isn't handled by the p2m management.
798*4882a593Smuzhiyun 	 */
799*4882a593Smuzhiyun 	extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
800*4882a593Smuzhiyun 			   extra_pages, max_pages - max_pfn);
801*4882a593Smuzhiyun 	i = 0;
802*4882a593Smuzhiyun 	addr = xen_e820_table.entries[0].addr;
803*4882a593Smuzhiyun 	size = xen_e820_table.entries[0].size;
804*4882a593Smuzhiyun 	while (i < xen_e820_table.nr_entries) {
805*4882a593Smuzhiyun 		bool discard = false;
806*4882a593Smuzhiyun 
807*4882a593Smuzhiyun 		chunk_size = size;
808*4882a593Smuzhiyun 		type = xen_e820_table.entries[i].type;
809*4882a593Smuzhiyun 
810*4882a593Smuzhiyun 		if (type == E820_TYPE_RAM) {
811*4882a593Smuzhiyun 			if (addr < mem_end) {
812*4882a593Smuzhiyun 				chunk_size = min(size, mem_end - addr);
813*4882a593Smuzhiyun 			} else if (extra_pages) {
814*4882a593Smuzhiyun 				chunk_size = min(size, PFN_PHYS(extra_pages));
815*4882a593Smuzhiyun 				pfn_s = PFN_UP(addr);
816*4882a593Smuzhiyun 				n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
817*4882a593Smuzhiyun 				extra_pages -= n_pfns;
818*4882a593Smuzhiyun 				xen_add_extra_mem(pfn_s, n_pfns);
819*4882a593Smuzhiyun 				xen_max_p2m_pfn = pfn_s + n_pfns;
820*4882a593Smuzhiyun 			} else
821*4882a593Smuzhiyun 				discard = true;
822*4882a593Smuzhiyun 		}
823*4882a593Smuzhiyun 
824*4882a593Smuzhiyun 		if (!discard)
825*4882a593Smuzhiyun 			xen_align_and_add_e820_region(addr, chunk_size, type);
826*4882a593Smuzhiyun 
827*4882a593Smuzhiyun 		addr += chunk_size;
828*4882a593Smuzhiyun 		size -= chunk_size;
829*4882a593Smuzhiyun 		if (size == 0) {
830*4882a593Smuzhiyun 			i++;
831*4882a593Smuzhiyun 			if (i < xen_e820_table.nr_entries) {
832*4882a593Smuzhiyun 				addr = xen_e820_table.entries[i].addr;
833*4882a593Smuzhiyun 				size = xen_e820_table.entries[i].size;
834*4882a593Smuzhiyun 			}
835*4882a593Smuzhiyun 		}
836*4882a593Smuzhiyun 	}
837*4882a593Smuzhiyun 
838*4882a593Smuzhiyun 	/*
839*4882a593Smuzhiyun 	 * Set the rest as identity mapped, in case PCI BARs are
840*4882a593Smuzhiyun 	 * located here.
841*4882a593Smuzhiyun 	 */
842*4882a593Smuzhiyun 	set_phys_range_identity(addr / PAGE_SIZE, ~0ul);
843*4882a593Smuzhiyun 
844*4882a593Smuzhiyun 	/*
845*4882a593Smuzhiyun 	 * In domU, the ISA region is normal, usable memory, but we
846*4882a593Smuzhiyun 	 * reserve ISA memory anyway because too many things poke
847*4882a593Smuzhiyun 	 * about in there.
848*4882a593Smuzhiyun 	 */
849*4882a593Smuzhiyun 	e820__range_add(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_TYPE_RESERVED);
850*4882a593Smuzhiyun 
851*4882a593Smuzhiyun 	e820__update_table(e820_table);
852*4882a593Smuzhiyun 
853*4882a593Smuzhiyun 	/*
854*4882a593Smuzhiyun 	 * Check whether the kernel itself conflicts with the target E820 map.
855*4882a593Smuzhiyun 	 * Failing now is better than running into weird problems later due
856*4882a593Smuzhiyun 	 * to relocating (and even reusing) pages with kernel text or data.
857*4882a593Smuzhiyun 	 */
858*4882a593Smuzhiyun 	if (xen_is_e820_reserved(__pa_symbol(_text),
859*4882a593Smuzhiyun 			__pa_symbol(__bss_stop) - __pa_symbol(_text))) {
860*4882a593Smuzhiyun 		xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n");
861*4882a593Smuzhiyun 		BUG();
862*4882a593Smuzhiyun 	}
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 	/*
865*4882a593Smuzhiyun 	 * Check for a conflict of the hypervisor supplied page tables with
866*4882a593Smuzhiyun 	 * the target E820 map.
867*4882a593Smuzhiyun 	 */
868*4882a593Smuzhiyun 	xen_pt_check_e820();
869*4882a593Smuzhiyun 
870*4882a593Smuzhiyun 	xen_reserve_xen_mfnlist();
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun 	/* Check for a conflict of the initrd with the target E820 map. */
873*4882a593Smuzhiyun 	if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image,
874*4882a593Smuzhiyun 				 boot_params.hdr.ramdisk_size)) {
875*4882a593Smuzhiyun 		phys_addr_t new_area, start, size;
876*4882a593Smuzhiyun 
877*4882a593Smuzhiyun 		new_area = xen_find_free_area(boot_params.hdr.ramdisk_size);
878*4882a593Smuzhiyun 		if (!new_area) {
879*4882a593Smuzhiyun 			xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n");
880*4882a593Smuzhiyun 			BUG();
881*4882a593Smuzhiyun 		}
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun 		start = boot_params.hdr.ramdisk_image;
884*4882a593Smuzhiyun 		size = boot_params.hdr.ramdisk_size;
885*4882a593Smuzhiyun 		xen_phys_memcpy(new_area, start, size);
886*4882a593Smuzhiyun 		pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
887*4882a593Smuzhiyun 			start, start + size, new_area, new_area + size);
888*4882a593Smuzhiyun 		memblock_free(start, size);
889*4882a593Smuzhiyun 		boot_params.hdr.ramdisk_image = new_area;
890*4882a593Smuzhiyun 		boot_params.ext_ramdisk_image = new_area >> 32;
891*4882a593Smuzhiyun 	}
892*4882a593Smuzhiyun 
893*4882a593Smuzhiyun 	/*
894*4882a593Smuzhiyun 	 * Set identity map on non-RAM pages and prepare remapping the
895*4882a593Smuzhiyun 	 * underlying RAM.
896*4882a593Smuzhiyun 	 */
897*4882a593Smuzhiyun 	xen_foreach_remap_area(max_pfn, xen_set_identity_and_remap_chunk);
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun 	pr_info("Released %ld page(s)\n", xen_released_pages);
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 	return "Xen";
902*4882a593Smuzhiyun }
903*4882a593Smuzhiyun 
register_callback(unsigned type,const void * func)904*4882a593Smuzhiyun static int register_callback(unsigned type, const void *func)
905*4882a593Smuzhiyun {
906*4882a593Smuzhiyun 	struct callback_register callback = {
907*4882a593Smuzhiyun 		.type = type,
908*4882a593Smuzhiyun 		.address = XEN_CALLBACK(__KERNEL_CS, func),
909*4882a593Smuzhiyun 		.flags = CALLBACKF_mask_events,
910*4882a593Smuzhiyun 	};
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun 	return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
913*4882a593Smuzhiyun }
914*4882a593Smuzhiyun 
xen_enable_sysenter(void)915*4882a593Smuzhiyun void xen_enable_sysenter(void)
916*4882a593Smuzhiyun {
917*4882a593Smuzhiyun 	int ret;
918*4882a593Smuzhiyun 	unsigned sysenter_feature;
919*4882a593Smuzhiyun 
920*4882a593Smuzhiyun 	sysenter_feature = X86_FEATURE_SYSENTER32;
921*4882a593Smuzhiyun 
922*4882a593Smuzhiyun 	if (!boot_cpu_has(sysenter_feature))
923*4882a593Smuzhiyun 		return;
924*4882a593Smuzhiyun 
925*4882a593Smuzhiyun 	ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
926*4882a593Smuzhiyun 	if(ret != 0)
927*4882a593Smuzhiyun 		setup_clear_cpu_cap(sysenter_feature);
928*4882a593Smuzhiyun }
929*4882a593Smuzhiyun 
xen_enable_syscall(void)930*4882a593Smuzhiyun void xen_enable_syscall(void)
931*4882a593Smuzhiyun {
932*4882a593Smuzhiyun 	int ret;
933*4882a593Smuzhiyun 
934*4882a593Smuzhiyun 	ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
935*4882a593Smuzhiyun 	if (ret != 0) {
936*4882a593Smuzhiyun 		printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
937*4882a593Smuzhiyun 		/* Pretty fatal; 64-bit userspace has no other
938*4882a593Smuzhiyun 		   mechanism for syscalls. */
939*4882a593Smuzhiyun 	}
940*4882a593Smuzhiyun 
941*4882a593Smuzhiyun 	if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
942*4882a593Smuzhiyun 		ret = register_callback(CALLBACKTYPE_syscall32,
943*4882a593Smuzhiyun 					xen_entry_SYSCALL_compat);
944*4882a593Smuzhiyun 		if (ret != 0)
945*4882a593Smuzhiyun 			setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
946*4882a593Smuzhiyun 	}
947*4882a593Smuzhiyun }
948*4882a593Smuzhiyun 
xen_pvmmu_arch_setup(void)949*4882a593Smuzhiyun static void __init xen_pvmmu_arch_setup(void)
950*4882a593Smuzhiyun {
951*4882a593Smuzhiyun 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
952*4882a593Smuzhiyun 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
953*4882a593Smuzhiyun 
954*4882a593Smuzhiyun 	HYPERVISOR_vm_assist(VMASST_CMD_enable,
955*4882a593Smuzhiyun 			     VMASST_TYPE_pae_extended_cr3);
956*4882a593Smuzhiyun 
957*4882a593Smuzhiyun 	if (register_callback(CALLBACKTYPE_event,
958*4882a593Smuzhiyun 			      xen_asm_exc_xen_hypervisor_callback) ||
959*4882a593Smuzhiyun 	    register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
960*4882a593Smuzhiyun 		BUG();
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun 	xen_enable_sysenter();
963*4882a593Smuzhiyun 	xen_enable_syscall();
964*4882a593Smuzhiyun }
965*4882a593Smuzhiyun 
966*4882a593Smuzhiyun /* This function is not called for HVM domains */
xen_arch_setup(void)967*4882a593Smuzhiyun void __init xen_arch_setup(void)
968*4882a593Smuzhiyun {
969*4882a593Smuzhiyun 	xen_panic_handler_init();
970*4882a593Smuzhiyun 	xen_pvmmu_arch_setup();
971*4882a593Smuzhiyun 
972*4882a593Smuzhiyun #ifdef CONFIG_ACPI
973*4882a593Smuzhiyun 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
974*4882a593Smuzhiyun 		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
975*4882a593Smuzhiyun 		disable_acpi();
976*4882a593Smuzhiyun 	}
977*4882a593Smuzhiyun #endif
978*4882a593Smuzhiyun 
979*4882a593Smuzhiyun 	memcpy(boot_command_line, xen_start_info->cmd_line,
980*4882a593Smuzhiyun 	       MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
981*4882a593Smuzhiyun 	       COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
982*4882a593Smuzhiyun 
983*4882a593Smuzhiyun 	/* Set up idle, making sure it calls safe_halt() pvop */
984*4882a593Smuzhiyun 	disable_cpuidle();
985*4882a593Smuzhiyun 	disable_cpufreq();
986*4882a593Smuzhiyun 	WARN_ON(xen_set_default_idle());
987*4882a593Smuzhiyun #ifdef CONFIG_NUMA
988*4882a593Smuzhiyun 	numa_off = 1;
989*4882a593Smuzhiyun #endif
990*4882a593Smuzhiyun }
991