1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Machine specific setup for xen
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/init.h>
9*4882a593Smuzhiyun #include <linux/sched.h>
10*4882a593Smuzhiyun #include <linux/mm.h>
11*4882a593Smuzhiyun #include <linux/pm.h>
12*4882a593Smuzhiyun #include <linux/memblock.h>
13*4882a593Smuzhiyun #include <linux/cpuidle.h>
14*4882a593Smuzhiyun #include <linux/cpufreq.h>
15*4882a593Smuzhiyun #include <linux/memory_hotplug.h>
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun #include <asm/elf.h>
18*4882a593Smuzhiyun #include <asm/vdso.h>
19*4882a593Smuzhiyun #include <asm/e820/api.h>
20*4882a593Smuzhiyun #include <asm/setup.h>
21*4882a593Smuzhiyun #include <asm/acpi.h>
22*4882a593Smuzhiyun #include <asm/numa.h>
23*4882a593Smuzhiyun #include <asm/idtentry.h>
24*4882a593Smuzhiyun #include <asm/xen/hypervisor.h>
25*4882a593Smuzhiyun #include <asm/xen/hypercall.h>
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #include <xen/xen.h>
28*4882a593Smuzhiyun #include <xen/page.h>
29*4882a593Smuzhiyun #include <xen/interface/callback.h>
30*4882a593Smuzhiyun #include <xen/interface/memory.h>
31*4882a593Smuzhiyun #include <xen/interface/physdev.h>
32*4882a593Smuzhiyun #include <xen/features.h>
33*4882a593Smuzhiyun #include <xen/hvc-console.h>
34*4882a593Smuzhiyun #include "xen-ops.h"
35*4882a593Smuzhiyun #include "mmu.h"
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun /* Amount of extra memory space we add to the e820 ranges */
40*4882a593Smuzhiyun struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun /* Number of pages released from the initial allocation. */
43*4882a593Smuzhiyun unsigned long xen_released_pages;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun /* E820 map used during setting up memory. */
46*4882a593Smuzhiyun static struct e820_table xen_e820_table __initdata;
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun /*
49*4882a593Smuzhiyun * Buffer used to remap identity mapped pages. We only need the virtual space.
50*4882a593Smuzhiyun * The physical page behind this address is remapped as needed to different
51*4882a593Smuzhiyun * buffer pages.
52*4882a593Smuzhiyun */
53*4882a593Smuzhiyun #define REMAP_SIZE (P2M_PER_PAGE - 3)
54*4882a593Smuzhiyun static struct {
55*4882a593Smuzhiyun unsigned long next_area_mfn;
56*4882a593Smuzhiyun unsigned long target_pfn;
57*4882a593Smuzhiyun unsigned long size;
58*4882a593Smuzhiyun unsigned long mfns[REMAP_SIZE];
59*4882a593Smuzhiyun } xen_remap_buf __initdata __aligned(PAGE_SIZE);
60*4882a593Smuzhiyun static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun /*
63*4882a593Smuzhiyun * The maximum amount of extra memory compared to the base size. The
64*4882a593Smuzhiyun * main scaling factor is the size of struct page. At extreme ratios
65*4882a593Smuzhiyun * of base:extra, all the base memory can be filled with page
66*4882a593Smuzhiyun * structures for the extra memory, leaving no space for anything
67*4882a593Smuzhiyun * else.
68*4882a593Smuzhiyun *
69*4882a593Smuzhiyun * 10x seems like a reasonable balance between scaling flexibility and
70*4882a593Smuzhiyun * leaving a practically usable system.
71*4882a593Smuzhiyun */
72*4882a593Smuzhiyun #define EXTRA_MEM_RATIO (10)
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
75*4882a593Smuzhiyun
xen_parse_512gb(void)76*4882a593Smuzhiyun static void __init xen_parse_512gb(void)
77*4882a593Smuzhiyun {
78*4882a593Smuzhiyun bool val = false;
79*4882a593Smuzhiyun char *arg;
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit");
82*4882a593Smuzhiyun if (!arg)
83*4882a593Smuzhiyun return;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
86*4882a593Smuzhiyun if (!arg)
87*4882a593Smuzhiyun val = true;
88*4882a593Smuzhiyun else if (strtobool(arg + strlen("xen_512gb_limit="), &val))
89*4882a593Smuzhiyun return;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun xen_512gb_limit = val;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
xen_add_extra_mem(unsigned long start_pfn,unsigned long n_pfns)94*4882a593Smuzhiyun static void __init xen_add_extra_mem(unsigned long start_pfn,
95*4882a593Smuzhiyun unsigned long n_pfns)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun int i;
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun /*
100*4882a593Smuzhiyun * No need to check for zero size, should happen rarely and will only
101*4882a593Smuzhiyun * write a new entry regarded to be unused due to zero size.
102*4882a593Smuzhiyun */
103*4882a593Smuzhiyun for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
104*4882a593Smuzhiyun /* Add new region. */
105*4882a593Smuzhiyun if (xen_extra_mem[i].n_pfns == 0) {
106*4882a593Smuzhiyun xen_extra_mem[i].start_pfn = start_pfn;
107*4882a593Smuzhiyun xen_extra_mem[i].n_pfns = n_pfns;
108*4882a593Smuzhiyun break;
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun /* Append to existing region. */
111*4882a593Smuzhiyun if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
112*4882a593Smuzhiyun start_pfn) {
113*4882a593Smuzhiyun xen_extra_mem[i].n_pfns += n_pfns;
114*4882a593Smuzhiyun break;
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun if (i == XEN_EXTRA_MEM_MAX_REGIONS)
118*4882a593Smuzhiyun printk(KERN_WARNING "Warning: not enough extra memory regions\n");
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun
xen_del_extra_mem(unsigned long start_pfn,unsigned long n_pfns)123*4882a593Smuzhiyun static void __init xen_del_extra_mem(unsigned long start_pfn,
124*4882a593Smuzhiyun unsigned long n_pfns)
125*4882a593Smuzhiyun {
126*4882a593Smuzhiyun int i;
127*4882a593Smuzhiyun unsigned long start_r, size_r;
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
130*4882a593Smuzhiyun start_r = xen_extra_mem[i].start_pfn;
131*4882a593Smuzhiyun size_r = xen_extra_mem[i].n_pfns;
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun /* Start of region. */
134*4882a593Smuzhiyun if (start_r == start_pfn) {
135*4882a593Smuzhiyun BUG_ON(n_pfns > size_r);
136*4882a593Smuzhiyun xen_extra_mem[i].start_pfn += n_pfns;
137*4882a593Smuzhiyun xen_extra_mem[i].n_pfns -= n_pfns;
138*4882a593Smuzhiyun break;
139*4882a593Smuzhiyun }
140*4882a593Smuzhiyun /* End of region. */
141*4882a593Smuzhiyun if (start_r + size_r == start_pfn + n_pfns) {
142*4882a593Smuzhiyun BUG_ON(n_pfns > size_r);
143*4882a593Smuzhiyun xen_extra_mem[i].n_pfns -= n_pfns;
144*4882a593Smuzhiyun break;
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun /* Mid of region. */
147*4882a593Smuzhiyun if (start_pfn > start_r && start_pfn < start_r + size_r) {
148*4882a593Smuzhiyun BUG_ON(start_pfn + n_pfns > start_r + size_r);
149*4882a593Smuzhiyun xen_extra_mem[i].n_pfns = start_pfn - start_r;
150*4882a593Smuzhiyun /* Calling memblock_reserve() again is okay. */
151*4882a593Smuzhiyun xen_add_extra_mem(start_pfn + n_pfns, start_r + size_r -
152*4882a593Smuzhiyun (start_pfn + n_pfns));
153*4882a593Smuzhiyun break;
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun /*
160*4882a593Smuzhiyun * Called during boot before the p2m list can take entries beyond the
161*4882a593Smuzhiyun * hypervisor supplied p2m list. Entries in extra mem are to be regarded as
162*4882a593Smuzhiyun * invalid.
163*4882a593Smuzhiyun */
xen_chk_extra_mem(unsigned long pfn)164*4882a593Smuzhiyun unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun int i;
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
169*4882a593Smuzhiyun if (pfn >= xen_extra_mem[i].start_pfn &&
170*4882a593Smuzhiyun pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns)
171*4882a593Smuzhiyun return INVALID_P2M_ENTRY;
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun return IDENTITY_FRAME(pfn);
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun /*
178*4882a593Smuzhiyun * Mark all pfns of extra mem as invalid in p2m list.
179*4882a593Smuzhiyun */
xen_inv_extra_mem(void)180*4882a593Smuzhiyun void __init xen_inv_extra_mem(void)
181*4882a593Smuzhiyun {
182*4882a593Smuzhiyun unsigned long pfn, pfn_s, pfn_e;
183*4882a593Smuzhiyun int i;
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
186*4882a593Smuzhiyun if (!xen_extra_mem[i].n_pfns)
187*4882a593Smuzhiyun continue;
188*4882a593Smuzhiyun pfn_s = xen_extra_mem[i].start_pfn;
189*4882a593Smuzhiyun pfn_e = pfn_s + xen_extra_mem[i].n_pfns;
190*4882a593Smuzhiyun for (pfn = pfn_s; pfn < pfn_e; pfn++)
191*4882a593Smuzhiyun set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun /*
196*4882a593Smuzhiyun * Finds the next RAM pfn available in the E820 map after min_pfn.
197*4882a593Smuzhiyun * This function updates min_pfn with the pfn found and returns
198*4882a593Smuzhiyun * the size of that range or zero if not found.
199*4882a593Smuzhiyun */
xen_find_pfn_range(unsigned long * min_pfn)200*4882a593Smuzhiyun static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun const struct e820_entry *entry = xen_e820_table.entries;
203*4882a593Smuzhiyun unsigned int i;
204*4882a593Smuzhiyun unsigned long done = 0;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
207*4882a593Smuzhiyun unsigned long s_pfn;
208*4882a593Smuzhiyun unsigned long e_pfn;
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun if (entry->type != E820_TYPE_RAM)
211*4882a593Smuzhiyun continue;
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun e_pfn = PFN_DOWN(entry->addr + entry->size);
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun /* We only care about E820 after this */
216*4882a593Smuzhiyun if (e_pfn <= *min_pfn)
217*4882a593Smuzhiyun continue;
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun s_pfn = PFN_UP(entry->addr);
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun /* If min_pfn falls within the E820 entry, we want to start
222*4882a593Smuzhiyun * at the min_pfn PFN.
223*4882a593Smuzhiyun */
224*4882a593Smuzhiyun if (s_pfn <= *min_pfn) {
225*4882a593Smuzhiyun done = e_pfn - *min_pfn;
226*4882a593Smuzhiyun } else {
227*4882a593Smuzhiyun done = e_pfn - s_pfn;
228*4882a593Smuzhiyun *min_pfn = s_pfn;
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun break;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun return done;
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
xen_free_mfn(unsigned long mfn)236*4882a593Smuzhiyun static int __init xen_free_mfn(unsigned long mfn)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun struct xen_memory_reservation reservation = {
239*4882a593Smuzhiyun .address_bits = 0,
240*4882a593Smuzhiyun .extent_order = 0,
241*4882a593Smuzhiyun .domid = DOMID_SELF
242*4882a593Smuzhiyun };
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun set_xen_guest_handle(reservation.extent_start, &mfn);
245*4882a593Smuzhiyun reservation.nr_extents = 1;
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun /*
251*4882a593Smuzhiyun * This releases a chunk of memory and then does the identity map. It's used
252*4882a593Smuzhiyun * as a fallback if the remapping fails.
253*4882a593Smuzhiyun */
xen_set_identity_and_release_chunk(unsigned long start_pfn,unsigned long end_pfn,unsigned long nr_pages)254*4882a593Smuzhiyun static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
255*4882a593Smuzhiyun unsigned long end_pfn, unsigned long nr_pages)
256*4882a593Smuzhiyun {
257*4882a593Smuzhiyun unsigned long pfn, end;
258*4882a593Smuzhiyun int ret;
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun WARN_ON(start_pfn > end_pfn);
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun /* Release pages first. */
263*4882a593Smuzhiyun end = min(end_pfn, nr_pages);
264*4882a593Smuzhiyun for (pfn = start_pfn; pfn < end; pfn++) {
265*4882a593Smuzhiyun unsigned long mfn = pfn_to_mfn(pfn);
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun /* Make sure pfn exists to start with */
268*4882a593Smuzhiyun if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
269*4882a593Smuzhiyun continue;
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun ret = xen_free_mfn(mfn);
272*4882a593Smuzhiyun WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun if (ret == 1) {
275*4882a593Smuzhiyun xen_released_pages++;
276*4882a593Smuzhiyun if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
277*4882a593Smuzhiyun break;
278*4882a593Smuzhiyun } else
279*4882a593Smuzhiyun break;
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun set_phys_range_identity(start_pfn, end_pfn);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /*
286*4882a593Smuzhiyun * Helper function to update the p2m and m2p tables and kernel mapping.
287*4882a593Smuzhiyun */
xen_update_mem_tables(unsigned long pfn,unsigned long mfn)288*4882a593Smuzhiyun static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun struct mmu_update update = {
291*4882a593Smuzhiyun .ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
292*4882a593Smuzhiyun .val = pfn
293*4882a593Smuzhiyun };
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun /* Update p2m */
296*4882a593Smuzhiyun if (!set_phys_to_machine(pfn, mfn)) {
297*4882a593Smuzhiyun WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
298*4882a593Smuzhiyun pfn, mfn);
299*4882a593Smuzhiyun BUG();
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun /* Update m2p */
303*4882a593Smuzhiyun if (HYPERVISOR_mmu_update(&update, 1, NULL, DOMID_SELF) < 0) {
304*4882a593Smuzhiyun WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
305*4882a593Smuzhiyun mfn, pfn);
306*4882a593Smuzhiyun BUG();
307*4882a593Smuzhiyun }
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun /* Update kernel mapping, but not for highmem. */
310*4882a593Smuzhiyun if (pfn >= PFN_UP(__pa(high_memory - 1)))
311*4882a593Smuzhiyun return;
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT),
314*4882a593Smuzhiyun mfn_pte(mfn, PAGE_KERNEL), 0)) {
315*4882a593Smuzhiyun WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
316*4882a593Smuzhiyun mfn, pfn);
317*4882a593Smuzhiyun BUG();
318*4882a593Smuzhiyun }
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun /*
322*4882a593Smuzhiyun * This function updates the p2m and m2p tables with an identity map from
323*4882a593Smuzhiyun * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
324*4882a593Smuzhiyun * original allocation at remap_pfn. The information needed for remapping is
325*4882a593Smuzhiyun * saved in the memory itself to avoid the need for allocating buffers. The
326*4882a593Smuzhiyun * complete remap information is contained in a list of MFNs each containing
327*4882a593Smuzhiyun * up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
328*4882a593Smuzhiyun * This enables us to preserve the original mfn sequence while doing the
329*4882a593Smuzhiyun * remapping at a time when the memory management is capable of allocating
330*4882a593Smuzhiyun * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
331*4882a593Smuzhiyun * its callers.
332*4882a593Smuzhiyun */
xen_do_set_identity_and_remap_chunk(unsigned long start_pfn,unsigned long size,unsigned long remap_pfn)333*4882a593Smuzhiyun static void __init xen_do_set_identity_and_remap_chunk(
334*4882a593Smuzhiyun unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun unsigned long buf = (unsigned long)&xen_remap_buf;
337*4882a593Smuzhiyun unsigned long mfn_save, mfn;
338*4882a593Smuzhiyun unsigned long ident_pfn_iter, remap_pfn_iter;
339*4882a593Smuzhiyun unsigned long ident_end_pfn = start_pfn + size;
340*4882a593Smuzhiyun unsigned long left = size;
341*4882a593Smuzhiyun unsigned int i, chunk;
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun WARN_ON(size == 0);
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun mfn_save = virt_to_mfn(buf);
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
348*4882a593Smuzhiyun ident_pfn_iter < ident_end_pfn;
349*4882a593Smuzhiyun ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
350*4882a593Smuzhiyun chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun /* Map first pfn to xen_remap_buf */
353*4882a593Smuzhiyun mfn = pfn_to_mfn(ident_pfn_iter);
354*4882a593Smuzhiyun set_pte_mfn(buf, mfn, PAGE_KERNEL);
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun /* Save mapping information in page */
357*4882a593Smuzhiyun xen_remap_buf.next_area_mfn = xen_remap_mfn;
358*4882a593Smuzhiyun xen_remap_buf.target_pfn = remap_pfn_iter;
359*4882a593Smuzhiyun xen_remap_buf.size = chunk;
360*4882a593Smuzhiyun for (i = 0; i < chunk; i++)
361*4882a593Smuzhiyun xen_remap_buf.mfns[i] = pfn_to_mfn(ident_pfn_iter + i);
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun /* Put remap buf into list. */
364*4882a593Smuzhiyun xen_remap_mfn = mfn;
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun /* Set identity map */
367*4882a593Smuzhiyun set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk);
368*4882a593Smuzhiyun
369*4882a593Smuzhiyun left -= chunk;
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun /* Restore old xen_remap_buf mapping */
373*4882a593Smuzhiyun set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
374*4882a593Smuzhiyun }
375*4882a593Smuzhiyun
376*4882a593Smuzhiyun /*
377*4882a593Smuzhiyun * This function takes a contiguous pfn range that needs to be identity mapped
378*4882a593Smuzhiyun * and:
379*4882a593Smuzhiyun *
380*4882a593Smuzhiyun * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
381*4882a593Smuzhiyun * 2) Calls the do_ function to actually do the mapping/remapping work.
382*4882a593Smuzhiyun *
383*4882a593Smuzhiyun * The goal is to not allocate additional memory but to remap the existing
384*4882a593Smuzhiyun * pages. In the case of an error the underlying memory is simply released back
385*4882a593Smuzhiyun * to Xen and not remapped.
386*4882a593Smuzhiyun */
xen_set_identity_and_remap_chunk(unsigned long start_pfn,unsigned long end_pfn,unsigned long nr_pages,unsigned long remap_pfn)387*4882a593Smuzhiyun static unsigned long __init xen_set_identity_and_remap_chunk(
388*4882a593Smuzhiyun unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
389*4882a593Smuzhiyun unsigned long remap_pfn)
390*4882a593Smuzhiyun {
391*4882a593Smuzhiyun unsigned long pfn;
392*4882a593Smuzhiyun unsigned long i = 0;
393*4882a593Smuzhiyun unsigned long n = end_pfn - start_pfn;
394*4882a593Smuzhiyun
395*4882a593Smuzhiyun if (remap_pfn == 0)
396*4882a593Smuzhiyun remap_pfn = nr_pages;
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun while (i < n) {
399*4882a593Smuzhiyun unsigned long cur_pfn = start_pfn + i;
400*4882a593Smuzhiyun unsigned long left = n - i;
401*4882a593Smuzhiyun unsigned long size = left;
402*4882a593Smuzhiyun unsigned long remap_range_size;
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun /* Do not remap pages beyond the current allocation */
405*4882a593Smuzhiyun if (cur_pfn >= nr_pages) {
406*4882a593Smuzhiyun /* Identity map remaining pages */
407*4882a593Smuzhiyun set_phys_range_identity(cur_pfn, cur_pfn + size);
408*4882a593Smuzhiyun break;
409*4882a593Smuzhiyun }
410*4882a593Smuzhiyun if (cur_pfn + size > nr_pages)
411*4882a593Smuzhiyun size = nr_pages - cur_pfn;
412*4882a593Smuzhiyun
413*4882a593Smuzhiyun remap_range_size = xen_find_pfn_range(&remap_pfn);
414*4882a593Smuzhiyun if (!remap_range_size) {
415*4882a593Smuzhiyun pr_warn("Unable to find available pfn range, not remapping identity pages\n");
416*4882a593Smuzhiyun xen_set_identity_and_release_chunk(cur_pfn,
417*4882a593Smuzhiyun cur_pfn + left, nr_pages);
418*4882a593Smuzhiyun break;
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun /* Adjust size to fit in current e820 RAM region */
421*4882a593Smuzhiyun if (size > remap_range_size)
422*4882a593Smuzhiyun size = remap_range_size;
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun xen_do_set_identity_and_remap_chunk(cur_pfn, size, remap_pfn);
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun /* Update variables to reflect new mappings. */
427*4882a593Smuzhiyun i += size;
428*4882a593Smuzhiyun remap_pfn += size;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun /*
432*4882a593Smuzhiyun * If the PFNs are currently mapped, the VA mapping also needs
433*4882a593Smuzhiyun * to be updated to be 1:1.
434*4882a593Smuzhiyun */
435*4882a593Smuzhiyun for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
436*4882a593Smuzhiyun (void)HYPERVISOR_update_va_mapping(
437*4882a593Smuzhiyun (unsigned long)__va(pfn << PAGE_SHIFT),
438*4882a593Smuzhiyun mfn_pte(pfn, PAGE_KERNEL_IO), 0);
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun return remap_pfn;
441*4882a593Smuzhiyun }
442*4882a593Smuzhiyun
xen_count_remap_pages(unsigned long start_pfn,unsigned long end_pfn,unsigned long nr_pages,unsigned long remap_pages)443*4882a593Smuzhiyun static unsigned long __init xen_count_remap_pages(
444*4882a593Smuzhiyun unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
445*4882a593Smuzhiyun unsigned long remap_pages)
446*4882a593Smuzhiyun {
447*4882a593Smuzhiyun if (start_pfn >= nr_pages)
448*4882a593Smuzhiyun return remap_pages;
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun return remap_pages + min(end_pfn, nr_pages) - start_pfn;
451*4882a593Smuzhiyun }
452*4882a593Smuzhiyun
xen_foreach_remap_area(unsigned long nr_pages,unsigned long (* func)(unsigned long start_pfn,unsigned long end_pfn,unsigned long nr_pages,unsigned long last_val))453*4882a593Smuzhiyun static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages,
454*4882a593Smuzhiyun unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn,
455*4882a593Smuzhiyun unsigned long nr_pages, unsigned long last_val))
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun phys_addr_t start = 0;
458*4882a593Smuzhiyun unsigned long ret_val = 0;
459*4882a593Smuzhiyun const struct e820_entry *entry = xen_e820_table.entries;
460*4882a593Smuzhiyun int i;
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun /*
463*4882a593Smuzhiyun * Combine non-RAM regions and gaps until a RAM region (or the
464*4882a593Smuzhiyun * end of the map) is reached, then call the provided function
465*4882a593Smuzhiyun * to perform its duty on the non-RAM region.
466*4882a593Smuzhiyun *
467*4882a593Smuzhiyun * The combined non-RAM regions are rounded to a whole number
468*4882a593Smuzhiyun * of pages so any partial pages are accessible via the 1:1
469*4882a593Smuzhiyun * mapping. This is needed for some BIOSes that put (for
470*4882a593Smuzhiyun * example) the DMI tables in a reserved region that begins on
471*4882a593Smuzhiyun * a non-page boundary.
472*4882a593Smuzhiyun */
473*4882a593Smuzhiyun for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
474*4882a593Smuzhiyun phys_addr_t end = entry->addr + entry->size;
475*4882a593Smuzhiyun if (entry->type == E820_TYPE_RAM || i == xen_e820_table.nr_entries - 1) {
476*4882a593Smuzhiyun unsigned long start_pfn = PFN_DOWN(start);
477*4882a593Smuzhiyun unsigned long end_pfn = PFN_UP(end);
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun if (entry->type == E820_TYPE_RAM)
480*4882a593Smuzhiyun end_pfn = PFN_UP(entry->addr);
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun if (start_pfn < end_pfn)
483*4882a593Smuzhiyun ret_val = func(start_pfn, end_pfn, nr_pages,
484*4882a593Smuzhiyun ret_val);
485*4882a593Smuzhiyun start = end;
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun }
488*4882a593Smuzhiyun
489*4882a593Smuzhiyun return ret_val;
490*4882a593Smuzhiyun }
491*4882a593Smuzhiyun
492*4882a593Smuzhiyun /*
493*4882a593Smuzhiyun * Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
494*4882a593Smuzhiyun * The remap information (which mfn remap to which pfn) is contained in the
495*4882a593Smuzhiyun * to be remapped memory itself in a linked list anchored at xen_remap_mfn.
496*4882a593Smuzhiyun * This scheme allows to remap the different chunks in arbitrary order while
497*4882a593Smuzhiyun * the resulting mapping will be independent from the order.
498*4882a593Smuzhiyun */
xen_remap_memory(void)499*4882a593Smuzhiyun void __init xen_remap_memory(void)
500*4882a593Smuzhiyun {
501*4882a593Smuzhiyun unsigned long buf = (unsigned long)&xen_remap_buf;
502*4882a593Smuzhiyun unsigned long mfn_save, pfn;
503*4882a593Smuzhiyun unsigned long remapped = 0;
504*4882a593Smuzhiyun unsigned int i;
505*4882a593Smuzhiyun unsigned long pfn_s = ~0UL;
506*4882a593Smuzhiyun unsigned long len = 0;
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun mfn_save = virt_to_mfn(buf);
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun while (xen_remap_mfn != INVALID_P2M_ENTRY) {
511*4882a593Smuzhiyun /* Map the remap information */
512*4882a593Smuzhiyun set_pte_mfn(buf, xen_remap_mfn, PAGE_KERNEL);
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]);
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun pfn = xen_remap_buf.target_pfn;
517*4882a593Smuzhiyun for (i = 0; i < xen_remap_buf.size; i++) {
518*4882a593Smuzhiyun xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]);
519*4882a593Smuzhiyun remapped++;
520*4882a593Smuzhiyun pfn++;
521*4882a593Smuzhiyun }
522*4882a593Smuzhiyun if (pfn_s == ~0UL || pfn == pfn_s) {
523*4882a593Smuzhiyun pfn_s = xen_remap_buf.target_pfn;
524*4882a593Smuzhiyun len += xen_remap_buf.size;
525*4882a593Smuzhiyun } else if (pfn_s + len == xen_remap_buf.target_pfn) {
526*4882a593Smuzhiyun len += xen_remap_buf.size;
527*4882a593Smuzhiyun } else {
528*4882a593Smuzhiyun xen_del_extra_mem(pfn_s, len);
529*4882a593Smuzhiyun pfn_s = xen_remap_buf.target_pfn;
530*4882a593Smuzhiyun len = xen_remap_buf.size;
531*4882a593Smuzhiyun }
532*4882a593Smuzhiyun xen_remap_mfn = xen_remap_buf.next_area_mfn;
533*4882a593Smuzhiyun }
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun if (pfn_s != ~0UL && len)
536*4882a593Smuzhiyun xen_del_extra_mem(pfn_s, len);
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun set_pte_mfn(buf, mfn_save, PAGE_KERNEL);
539*4882a593Smuzhiyun
540*4882a593Smuzhiyun pr_info("Remapped %ld page(s)\n", remapped);
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun
xen_get_pages_limit(void)543*4882a593Smuzhiyun static unsigned long __init xen_get_pages_limit(void)
544*4882a593Smuzhiyun {
545*4882a593Smuzhiyun unsigned long limit;
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun limit = MAXMEM / PAGE_SIZE;
548*4882a593Smuzhiyun if (!xen_initial_domain() && xen_512gb_limit)
549*4882a593Smuzhiyun limit = GB(512) / PAGE_SIZE;
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun return limit;
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun
xen_get_max_pages(void)554*4882a593Smuzhiyun static unsigned long __init xen_get_max_pages(void)
555*4882a593Smuzhiyun {
556*4882a593Smuzhiyun unsigned long max_pages, limit;
557*4882a593Smuzhiyun domid_t domid = DOMID_SELF;
558*4882a593Smuzhiyun long ret;
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun limit = xen_get_pages_limit();
561*4882a593Smuzhiyun max_pages = limit;
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun /*
564*4882a593Smuzhiyun * For the initial domain we use the maximum reservation as
565*4882a593Smuzhiyun * the maximum page.
566*4882a593Smuzhiyun *
567*4882a593Smuzhiyun * For guest domains the current maximum reservation reflects
568*4882a593Smuzhiyun * the current maximum rather than the static maximum. In this
569*4882a593Smuzhiyun * case the e820 map provided to us will cover the static
570*4882a593Smuzhiyun * maximum region.
571*4882a593Smuzhiyun */
572*4882a593Smuzhiyun if (xen_initial_domain()) {
573*4882a593Smuzhiyun ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
574*4882a593Smuzhiyun if (ret > 0)
575*4882a593Smuzhiyun max_pages = ret;
576*4882a593Smuzhiyun }
577*4882a593Smuzhiyun
578*4882a593Smuzhiyun return min(max_pages, limit);
579*4882a593Smuzhiyun }
580*4882a593Smuzhiyun
xen_align_and_add_e820_region(phys_addr_t start,phys_addr_t size,int type)581*4882a593Smuzhiyun static void __init xen_align_and_add_e820_region(phys_addr_t start,
582*4882a593Smuzhiyun phys_addr_t size, int type)
583*4882a593Smuzhiyun {
584*4882a593Smuzhiyun phys_addr_t end = start + size;
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun /* Align RAM regions to page boundaries. */
587*4882a593Smuzhiyun if (type == E820_TYPE_RAM) {
588*4882a593Smuzhiyun start = PAGE_ALIGN(start);
589*4882a593Smuzhiyun end &= ~((phys_addr_t)PAGE_SIZE - 1);
590*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG
591*4882a593Smuzhiyun /*
592*4882a593Smuzhiyun * Don't allow adding memory not in E820 map while booting the
593*4882a593Smuzhiyun * system. Once the balloon driver is up it will remove that
594*4882a593Smuzhiyun * restriction again.
595*4882a593Smuzhiyun */
596*4882a593Smuzhiyun max_mem_size = end;
597*4882a593Smuzhiyun #endif
598*4882a593Smuzhiyun }
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun e820__range_add(start, end - start, type);
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun
xen_ignore_unusable(void)603*4882a593Smuzhiyun static void __init xen_ignore_unusable(void)
604*4882a593Smuzhiyun {
605*4882a593Smuzhiyun struct e820_entry *entry = xen_e820_table.entries;
606*4882a593Smuzhiyun unsigned int i;
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) {
609*4882a593Smuzhiyun if (entry->type == E820_TYPE_UNUSABLE)
610*4882a593Smuzhiyun entry->type = E820_TYPE_RAM;
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun
xen_is_e820_reserved(phys_addr_t start,phys_addr_t size)614*4882a593Smuzhiyun bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun struct e820_entry *entry;
617*4882a593Smuzhiyun unsigned mapcnt;
618*4882a593Smuzhiyun phys_addr_t end;
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun if (!size)
621*4882a593Smuzhiyun return false;
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun end = start + size;
624*4882a593Smuzhiyun entry = xen_e820_table.entries;
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
627*4882a593Smuzhiyun if (entry->type == E820_TYPE_RAM && entry->addr <= start &&
628*4882a593Smuzhiyun (entry->addr + entry->size) >= end)
629*4882a593Smuzhiyun return false;
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun entry++;
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun return true;
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun
637*4882a593Smuzhiyun /*
638*4882a593Smuzhiyun * Find a free area in physical memory not yet reserved and compliant with
639*4882a593Smuzhiyun * E820 map.
640*4882a593Smuzhiyun * Used to relocate pre-allocated areas like initrd or p2m list which are in
641*4882a593Smuzhiyun * conflict with the to be used E820 map.
642*4882a593Smuzhiyun * In case no area is found, return 0. Otherwise return the physical address
643*4882a593Smuzhiyun * of the area which is already reserved for convenience.
644*4882a593Smuzhiyun */
xen_find_free_area(phys_addr_t size)645*4882a593Smuzhiyun phys_addr_t __init xen_find_free_area(phys_addr_t size)
646*4882a593Smuzhiyun {
647*4882a593Smuzhiyun unsigned mapcnt;
648*4882a593Smuzhiyun phys_addr_t addr, start;
649*4882a593Smuzhiyun struct e820_entry *entry = xen_e820_table.entries;
650*4882a593Smuzhiyun
651*4882a593Smuzhiyun for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++, entry++) {
652*4882a593Smuzhiyun if (entry->type != E820_TYPE_RAM || entry->size < size)
653*4882a593Smuzhiyun continue;
654*4882a593Smuzhiyun start = entry->addr;
655*4882a593Smuzhiyun for (addr = start; addr < start + size; addr += PAGE_SIZE) {
656*4882a593Smuzhiyun if (!memblock_is_reserved(addr))
657*4882a593Smuzhiyun continue;
658*4882a593Smuzhiyun start = addr + PAGE_SIZE;
659*4882a593Smuzhiyun if (start + size > entry->addr + entry->size)
660*4882a593Smuzhiyun break;
661*4882a593Smuzhiyun }
662*4882a593Smuzhiyun if (addr >= start + size) {
663*4882a593Smuzhiyun memblock_reserve(start, size);
664*4882a593Smuzhiyun return start;
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun }
667*4882a593Smuzhiyun
668*4882a593Smuzhiyun return 0;
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun
671*4882a593Smuzhiyun /*
672*4882a593Smuzhiyun * Like memcpy, but with physical addresses for dest and src.
673*4882a593Smuzhiyun */
xen_phys_memcpy(phys_addr_t dest,phys_addr_t src,phys_addr_t n)674*4882a593Smuzhiyun static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
675*4882a593Smuzhiyun phys_addr_t n)
676*4882a593Smuzhiyun {
677*4882a593Smuzhiyun phys_addr_t dest_off, src_off, dest_len, src_len, len;
678*4882a593Smuzhiyun void *from, *to;
679*4882a593Smuzhiyun
680*4882a593Smuzhiyun while (n) {
681*4882a593Smuzhiyun dest_off = dest & ~PAGE_MASK;
682*4882a593Smuzhiyun src_off = src & ~PAGE_MASK;
683*4882a593Smuzhiyun dest_len = n;
684*4882a593Smuzhiyun if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off)
685*4882a593Smuzhiyun dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off;
686*4882a593Smuzhiyun src_len = n;
687*4882a593Smuzhiyun if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off)
688*4882a593Smuzhiyun src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off;
689*4882a593Smuzhiyun len = min(dest_len, src_len);
690*4882a593Smuzhiyun to = early_memremap(dest - dest_off, dest_len + dest_off);
691*4882a593Smuzhiyun from = early_memremap(src - src_off, src_len + src_off);
692*4882a593Smuzhiyun memcpy(to, from, len);
693*4882a593Smuzhiyun early_memunmap(to, dest_len + dest_off);
694*4882a593Smuzhiyun early_memunmap(from, src_len + src_off);
695*4882a593Smuzhiyun n -= len;
696*4882a593Smuzhiyun dest += len;
697*4882a593Smuzhiyun src += len;
698*4882a593Smuzhiyun }
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun /*
702*4882a593Smuzhiyun * Reserve Xen mfn_list.
703*4882a593Smuzhiyun */
xen_reserve_xen_mfnlist(void)704*4882a593Smuzhiyun static void __init xen_reserve_xen_mfnlist(void)
705*4882a593Smuzhiyun {
706*4882a593Smuzhiyun phys_addr_t start, size;
707*4882a593Smuzhiyun
708*4882a593Smuzhiyun if (xen_start_info->mfn_list >= __START_KERNEL_map) {
709*4882a593Smuzhiyun start = __pa(xen_start_info->mfn_list);
710*4882a593Smuzhiyun size = PFN_ALIGN(xen_start_info->nr_pages *
711*4882a593Smuzhiyun sizeof(unsigned long));
712*4882a593Smuzhiyun } else {
713*4882a593Smuzhiyun start = PFN_PHYS(xen_start_info->first_p2m_pfn);
714*4882a593Smuzhiyun size = PFN_PHYS(xen_start_info->nr_p2m_frames);
715*4882a593Smuzhiyun }
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun memblock_reserve(start, size);
718*4882a593Smuzhiyun if (!xen_is_e820_reserved(start, size))
719*4882a593Smuzhiyun return;
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun xen_relocate_p2m();
722*4882a593Smuzhiyun memblock_free(start, size);
723*4882a593Smuzhiyun }
724*4882a593Smuzhiyun
725*4882a593Smuzhiyun /**
726*4882a593Smuzhiyun * machine_specific_memory_setup - Hook for machine specific memory setup.
727*4882a593Smuzhiyun **/
xen_memory_setup(void)728*4882a593Smuzhiyun char * __init xen_memory_setup(void)
729*4882a593Smuzhiyun {
730*4882a593Smuzhiyun unsigned long max_pfn, pfn_s, n_pfns;
731*4882a593Smuzhiyun phys_addr_t mem_end, addr, size, chunk_size;
732*4882a593Smuzhiyun u32 type;
733*4882a593Smuzhiyun int rc;
734*4882a593Smuzhiyun struct xen_memory_map memmap;
735*4882a593Smuzhiyun unsigned long max_pages;
736*4882a593Smuzhiyun unsigned long extra_pages = 0;
737*4882a593Smuzhiyun int i;
738*4882a593Smuzhiyun int op;
739*4882a593Smuzhiyun
740*4882a593Smuzhiyun xen_parse_512gb();
741*4882a593Smuzhiyun max_pfn = xen_get_pages_limit();
742*4882a593Smuzhiyun max_pfn = min(max_pfn, xen_start_info->nr_pages);
743*4882a593Smuzhiyun mem_end = PFN_PHYS(max_pfn);
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries);
746*4882a593Smuzhiyun set_xen_guest_handle(memmap.buffer, xen_e820_table.entries);
747*4882a593Smuzhiyun
748*4882a593Smuzhiyun #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
749*4882a593Smuzhiyun xen_saved_max_mem_size = max_mem_size;
750*4882a593Smuzhiyun #endif
751*4882a593Smuzhiyun
752*4882a593Smuzhiyun op = xen_initial_domain() ?
753*4882a593Smuzhiyun XENMEM_machine_memory_map :
754*4882a593Smuzhiyun XENMEM_memory_map;
755*4882a593Smuzhiyun rc = HYPERVISOR_memory_op(op, &memmap);
756*4882a593Smuzhiyun if (rc == -ENOSYS) {
757*4882a593Smuzhiyun BUG_ON(xen_initial_domain());
758*4882a593Smuzhiyun memmap.nr_entries = 1;
759*4882a593Smuzhiyun xen_e820_table.entries[0].addr = 0ULL;
760*4882a593Smuzhiyun xen_e820_table.entries[0].size = mem_end;
761*4882a593Smuzhiyun /* 8MB slack (to balance backend allocations). */
762*4882a593Smuzhiyun xen_e820_table.entries[0].size += 8ULL << 20;
763*4882a593Smuzhiyun xen_e820_table.entries[0].type = E820_TYPE_RAM;
764*4882a593Smuzhiyun rc = 0;
765*4882a593Smuzhiyun }
766*4882a593Smuzhiyun BUG_ON(rc);
767*4882a593Smuzhiyun BUG_ON(memmap.nr_entries == 0);
768*4882a593Smuzhiyun xen_e820_table.nr_entries = memmap.nr_entries;
769*4882a593Smuzhiyun
770*4882a593Smuzhiyun /*
771*4882a593Smuzhiyun * Xen won't allow a 1:1 mapping to be created to UNUSABLE
772*4882a593Smuzhiyun * regions, so if we're using the machine memory map leave the
773*4882a593Smuzhiyun * region as RAM as it is in the pseudo-physical map.
774*4882a593Smuzhiyun *
775*4882a593Smuzhiyun * UNUSABLE regions in domUs are not handled and will need
776*4882a593Smuzhiyun * a patch in the future.
777*4882a593Smuzhiyun */
778*4882a593Smuzhiyun if (xen_initial_domain())
779*4882a593Smuzhiyun xen_ignore_unusable();
780*4882a593Smuzhiyun
781*4882a593Smuzhiyun /* Make sure the Xen-supplied memory map is well-ordered. */
782*4882a593Smuzhiyun e820__update_table(&xen_e820_table);
783*4882a593Smuzhiyun
784*4882a593Smuzhiyun max_pages = xen_get_max_pages();
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun /* How many extra pages do we need due to remapping? */
787*4882a593Smuzhiyun max_pages += xen_foreach_remap_area(max_pfn, xen_count_remap_pages);
788*4882a593Smuzhiyun
789*4882a593Smuzhiyun if (max_pages > max_pfn)
790*4882a593Smuzhiyun extra_pages += max_pages - max_pfn;
791*4882a593Smuzhiyun
792*4882a593Smuzhiyun /*
793*4882a593Smuzhiyun * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
794*4882a593Smuzhiyun * factor the base size.
795*4882a593Smuzhiyun *
796*4882a593Smuzhiyun * Make sure we have no memory above max_pages, as this area
797*4882a593Smuzhiyun * isn't handled by the p2m management.
798*4882a593Smuzhiyun */
799*4882a593Smuzhiyun extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
800*4882a593Smuzhiyun extra_pages, max_pages - max_pfn);
801*4882a593Smuzhiyun i = 0;
802*4882a593Smuzhiyun addr = xen_e820_table.entries[0].addr;
803*4882a593Smuzhiyun size = xen_e820_table.entries[0].size;
804*4882a593Smuzhiyun while (i < xen_e820_table.nr_entries) {
805*4882a593Smuzhiyun bool discard = false;
806*4882a593Smuzhiyun
807*4882a593Smuzhiyun chunk_size = size;
808*4882a593Smuzhiyun type = xen_e820_table.entries[i].type;
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun if (type == E820_TYPE_RAM) {
811*4882a593Smuzhiyun if (addr < mem_end) {
812*4882a593Smuzhiyun chunk_size = min(size, mem_end - addr);
813*4882a593Smuzhiyun } else if (extra_pages) {
814*4882a593Smuzhiyun chunk_size = min(size, PFN_PHYS(extra_pages));
815*4882a593Smuzhiyun pfn_s = PFN_UP(addr);
816*4882a593Smuzhiyun n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
817*4882a593Smuzhiyun extra_pages -= n_pfns;
818*4882a593Smuzhiyun xen_add_extra_mem(pfn_s, n_pfns);
819*4882a593Smuzhiyun xen_max_p2m_pfn = pfn_s + n_pfns;
820*4882a593Smuzhiyun } else
821*4882a593Smuzhiyun discard = true;
822*4882a593Smuzhiyun }
823*4882a593Smuzhiyun
824*4882a593Smuzhiyun if (!discard)
825*4882a593Smuzhiyun xen_align_and_add_e820_region(addr, chunk_size, type);
826*4882a593Smuzhiyun
827*4882a593Smuzhiyun addr += chunk_size;
828*4882a593Smuzhiyun size -= chunk_size;
829*4882a593Smuzhiyun if (size == 0) {
830*4882a593Smuzhiyun i++;
831*4882a593Smuzhiyun if (i < xen_e820_table.nr_entries) {
832*4882a593Smuzhiyun addr = xen_e820_table.entries[i].addr;
833*4882a593Smuzhiyun size = xen_e820_table.entries[i].size;
834*4882a593Smuzhiyun }
835*4882a593Smuzhiyun }
836*4882a593Smuzhiyun }
837*4882a593Smuzhiyun
838*4882a593Smuzhiyun /*
839*4882a593Smuzhiyun * Set the rest as identity mapped, in case PCI BARs are
840*4882a593Smuzhiyun * located here.
841*4882a593Smuzhiyun */
842*4882a593Smuzhiyun set_phys_range_identity(addr / PAGE_SIZE, ~0ul);
843*4882a593Smuzhiyun
844*4882a593Smuzhiyun /*
845*4882a593Smuzhiyun * In domU, the ISA region is normal, usable memory, but we
846*4882a593Smuzhiyun * reserve ISA memory anyway because too many things poke
847*4882a593Smuzhiyun * about in there.
848*4882a593Smuzhiyun */
849*4882a593Smuzhiyun e820__range_add(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_TYPE_RESERVED);
850*4882a593Smuzhiyun
851*4882a593Smuzhiyun e820__update_table(e820_table);
852*4882a593Smuzhiyun
853*4882a593Smuzhiyun /*
854*4882a593Smuzhiyun * Check whether the kernel itself conflicts with the target E820 map.
855*4882a593Smuzhiyun * Failing now is better than running into weird problems later due
856*4882a593Smuzhiyun * to relocating (and even reusing) pages with kernel text or data.
857*4882a593Smuzhiyun */
858*4882a593Smuzhiyun if (xen_is_e820_reserved(__pa_symbol(_text),
859*4882a593Smuzhiyun __pa_symbol(__bss_stop) - __pa_symbol(_text))) {
860*4882a593Smuzhiyun xen_raw_console_write("Xen hypervisor allocated kernel memory conflicts with E820 map\n");
861*4882a593Smuzhiyun BUG();
862*4882a593Smuzhiyun }
863*4882a593Smuzhiyun
864*4882a593Smuzhiyun /*
865*4882a593Smuzhiyun * Check for a conflict of the hypervisor supplied page tables with
866*4882a593Smuzhiyun * the target E820 map.
867*4882a593Smuzhiyun */
868*4882a593Smuzhiyun xen_pt_check_e820();
869*4882a593Smuzhiyun
870*4882a593Smuzhiyun xen_reserve_xen_mfnlist();
871*4882a593Smuzhiyun
872*4882a593Smuzhiyun /* Check for a conflict of the initrd with the target E820 map. */
873*4882a593Smuzhiyun if (xen_is_e820_reserved(boot_params.hdr.ramdisk_image,
874*4882a593Smuzhiyun boot_params.hdr.ramdisk_size)) {
875*4882a593Smuzhiyun phys_addr_t new_area, start, size;
876*4882a593Smuzhiyun
877*4882a593Smuzhiyun new_area = xen_find_free_area(boot_params.hdr.ramdisk_size);
878*4882a593Smuzhiyun if (!new_area) {
879*4882a593Smuzhiyun xen_raw_console_write("Can't find new memory area for initrd needed due to E820 map conflict\n");
880*4882a593Smuzhiyun BUG();
881*4882a593Smuzhiyun }
882*4882a593Smuzhiyun
883*4882a593Smuzhiyun start = boot_params.hdr.ramdisk_image;
884*4882a593Smuzhiyun size = boot_params.hdr.ramdisk_size;
885*4882a593Smuzhiyun xen_phys_memcpy(new_area, start, size);
886*4882a593Smuzhiyun pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
887*4882a593Smuzhiyun start, start + size, new_area, new_area + size);
888*4882a593Smuzhiyun memblock_free(start, size);
889*4882a593Smuzhiyun boot_params.hdr.ramdisk_image = new_area;
890*4882a593Smuzhiyun boot_params.ext_ramdisk_image = new_area >> 32;
891*4882a593Smuzhiyun }
892*4882a593Smuzhiyun
893*4882a593Smuzhiyun /*
894*4882a593Smuzhiyun * Set identity map on non-RAM pages and prepare remapping the
895*4882a593Smuzhiyun * underlying RAM.
896*4882a593Smuzhiyun */
897*4882a593Smuzhiyun xen_foreach_remap_area(max_pfn, xen_set_identity_and_remap_chunk);
898*4882a593Smuzhiyun
899*4882a593Smuzhiyun pr_info("Released %ld page(s)\n", xen_released_pages);
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun return "Xen";
902*4882a593Smuzhiyun }
903*4882a593Smuzhiyun
register_callback(unsigned type,const void * func)904*4882a593Smuzhiyun static int register_callback(unsigned type, const void *func)
905*4882a593Smuzhiyun {
906*4882a593Smuzhiyun struct callback_register callback = {
907*4882a593Smuzhiyun .type = type,
908*4882a593Smuzhiyun .address = XEN_CALLBACK(__KERNEL_CS, func),
909*4882a593Smuzhiyun .flags = CALLBACKF_mask_events,
910*4882a593Smuzhiyun };
911*4882a593Smuzhiyun
912*4882a593Smuzhiyun return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
913*4882a593Smuzhiyun }
914*4882a593Smuzhiyun
xen_enable_sysenter(void)915*4882a593Smuzhiyun void xen_enable_sysenter(void)
916*4882a593Smuzhiyun {
917*4882a593Smuzhiyun int ret;
918*4882a593Smuzhiyun unsigned sysenter_feature;
919*4882a593Smuzhiyun
920*4882a593Smuzhiyun sysenter_feature = X86_FEATURE_SYSENTER32;
921*4882a593Smuzhiyun
922*4882a593Smuzhiyun if (!boot_cpu_has(sysenter_feature))
923*4882a593Smuzhiyun return;
924*4882a593Smuzhiyun
925*4882a593Smuzhiyun ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
926*4882a593Smuzhiyun if(ret != 0)
927*4882a593Smuzhiyun setup_clear_cpu_cap(sysenter_feature);
928*4882a593Smuzhiyun }
929*4882a593Smuzhiyun
xen_enable_syscall(void)930*4882a593Smuzhiyun void xen_enable_syscall(void)
931*4882a593Smuzhiyun {
932*4882a593Smuzhiyun int ret;
933*4882a593Smuzhiyun
934*4882a593Smuzhiyun ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
935*4882a593Smuzhiyun if (ret != 0) {
936*4882a593Smuzhiyun printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
937*4882a593Smuzhiyun /* Pretty fatal; 64-bit userspace has no other
938*4882a593Smuzhiyun mechanism for syscalls. */
939*4882a593Smuzhiyun }
940*4882a593Smuzhiyun
941*4882a593Smuzhiyun if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
942*4882a593Smuzhiyun ret = register_callback(CALLBACKTYPE_syscall32,
943*4882a593Smuzhiyun xen_entry_SYSCALL_compat);
944*4882a593Smuzhiyun if (ret != 0)
945*4882a593Smuzhiyun setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
946*4882a593Smuzhiyun }
947*4882a593Smuzhiyun }
948*4882a593Smuzhiyun
xen_pvmmu_arch_setup(void)949*4882a593Smuzhiyun static void __init xen_pvmmu_arch_setup(void)
950*4882a593Smuzhiyun {
951*4882a593Smuzhiyun HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
952*4882a593Smuzhiyun HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
953*4882a593Smuzhiyun
954*4882a593Smuzhiyun HYPERVISOR_vm_assist(VMASST_CMD_enable,
955*4882a593Smuzhiyun VMASST_TYPE_pae_extended_cr3);
956*4882a593Smuzhiyun
957*4882a593Smuzhiyun if (register_callback(CALLBACKTYPE_event,
958*4882a593Smuzhiyun xen_asm_exc_xen_hypervisor_callback) ||
959*4882a593Smuzhiyun register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
960*4882a593Smuzhiyun BUG();
961*4882a593Smuzhiyun
962*4882a593Smuzhiyun xen_enable_sysenter();
963*4882a593Smuzhiyun xen_enable_syscall();
964*4882a593Smuzhiyun }
965*4882a593Smuzhiyun
966*4882a593Smuzhiyun /* This function is not called for HVM domains */
xen_arch_setup(void)967*4882a593Smuzhiyun void __init xen_arch_setup(void)
968*4882a593Smuzhiyun {
969*4882a593Smuzhiyun xen_panic_handler_init();
970*4882a593Smuzhiyun xen_pvmmu_arch_setup();
971*4882a593Smuzhiyun
972*4882a593Smuzhiyun #ifdef CONFIG_ACPI
973*4882a593Smuzhiyun if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
974*4882a593Smuzhiyun printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
975*4882a593Smuzhiyun disable_acpi();
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun #endif
978*4882a593Smuzhiyun
979*4882a593Smuzhiyun memcpy(boot_command_line, xen_start_info->cmd_line,
980*4882a593Smuzhiyun MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
981*4882a593Smuzhiyun COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
982*4882a593Smuzhiyun
983*4882a593Smuzhiyun /* Set up idle, making sure it calls safe_halt() pvop */
984*4882a593Smuzhiyun disable_cpuidle();
985*4882a593Smuzhiyun disable_cpufreq();
986*4882a593Smuzhiyun WARN_ON(xen_set_default_idle());
987*4882a593Smuzhiyun #ifdef CONFIG_NUMA
988*4882a593Smuzhiyun numa_off = 1;
989*4882a593Smuzhiyun #endif
990*4882a593Smuzhiyun }
991