1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun #include <linux/spinlock.h>
4*4882a593Smuzhiyun #include <linux/percpu.h>
5*4882a593Smuzhiyun #include <linux/kallsyms.h>
6*4882a593Smuzhiyun #include <linux/kcore.h>
7*4882a593Smuzhiyun #include <linux/pgtable.h>
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include <asm/cpu_entry_area.h>
10*4882a593Smuzhiyun #include <asm/fixmap.h>
11*4882a593Smuzhiyun #include <asm/desc.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #ifdef CONFIG_X86_64
16*4882a593Smuzhiyun static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
17*4882a593Smuzhiyun DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
18*4882a593Smuzhiyun #endif
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun #ifdef CONFIG_X86_32
21*4882a593Smuzhiyun DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
22*4882a593Smuzhiyun #endif
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun /* Is called from entry code, so must be noinstr */
get_cpu_entry_area(int cpu)25*4882a593Smuzhiyun noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
28*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun return (struct cpu_entry_area *) va;
31*4882a593Smuzhiyun }
32*4882a593Smuzhiyun EXPORT_SYMBOL(get_cpu_entry_area);
33*4882a593Smuzhiyun
cea_set_pte(void * cea_vaddr,phys_addr_t pa,pgprot_t flags)34*4882a593Smuzhiyun void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun unsigned long va = (unsigned long) cea_vaddr;
37*4882a593Smuzhiyun pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags);
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun /*
40*4882a593Smuzhiyun * The cpu_entry_area is shared between the user and kernel
41*4882a593Smuzhiyun * page tables. All of its ptes can safely be global.
42*4882a593Smuzhiyun * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for
43*4882a593Smuzhiyun * non-present PTEs, so be careful not to set it in that
44*4882a593Smuzhiyun * case to avoid confusion.
45*4882a593Smuzhiyun */
46*4882a593Smuzhiyun if (boot_cpu_has(X86_FEATURE_PGE) &&
47*4882a593Smuzhiyun (pgprot_val(flags) & _PAGE_PRESENT))
48*4882a593Smuzhiyun pte = pte_set_flags(pte, _PAGE_GLOBAL);
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun set_pte_vaddr(va, pte);
51*4882a593Smuzhiyun }
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun static void __init
cea_map_percpu_pages(void * cea_vaddr,void * ptr,int pages,pgprot_t prot)54*4882a593Smuzhiyun cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
57*4882a593Smuzhiyun cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun
percpu_setup_debug_store(unsigned int cpu)60*4882a593Smuzhiyun static void __init percpu_setup_debug_store(unsigned int cpu)
61*4882a593Smuzhiyun {
62*4882a593Smuzhiyun #ifdef CONFIG_CPU_SUP_INTEL
63*4882a593Smuzhiyun unsigned int npages;
64*4882a593Smuzhiyun void *cea;
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
67*4882a593Smuzhiyun return;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
70*4882a593Smuzhiyun npages = sizeof(struct debug_store) / PAGE_SIZE;
71*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
72*4882a593Smuzhiyun cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
73*4882a593Smuzhiyun PAGE_KERNEL);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
76*4882a593Smuzhiyun /*
77*4882a593Smuzhiyun * Force the population of PMDs for not yet allocated per cpu
78*4882a593Smuzhiyun * memory like debug store buffers.
79*4882a593Smuzhiyun */
80*4882a593Smuzhiyun npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
81*4882a593Smuzhiyun for (; npages; npages--, cea += PAGE_SIZE)
82*4882a593Smuzhiyun cea_set_pte(cea, 0, PAGE_NONE);
83*4882a593Smuzhiyun #endif
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun #ifdef CONFIG_X86_64
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun #define cea_map_stack(name) do { \
89*4882a593Smuzhiyun npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \
90*4882a593Smuzhiyun cea_map_percpu_pages(cea->estacks.name## _stack, \
91*4882a593Smuzhiyun estacks->name## _stack, npages, PAGE_KERNEL); \
92*4882a593Smuzhiyun } while (0)
93*4882a593Smuzhiyun
percpu_setup_exception_stacks(unsigned int cpu)94*4882a593Smuzhiyun static void __init percpu_setup_exception_stacks(unsigned int cpu)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
97*4882a593Smuzhiyun struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
98*4882a593Smuzhiyun unsigned int npages;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun /*
105*4882a593Smuzhiyun * The exceptions stack mappings in the per cpu area are protected
106*4882a593Smuzhiyun * by guard pages so each stack must be mapped separately. DB2 is
107*4882a593Smuzhiyun * not mapped; it just exists to catch triple nesting of #DB.
108*4882a593Smuzhiyun */
109*4882a593Smuzhiyun cea_map_stack(DF);
110*4882a593Smuzhiyun cea_map_stack(NMI);
111*4882a593Smuzhiyun cea_map_stack(DB);
112*4882a593Smuzhiyun cea_map_stack(MCE);
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
115*4882a593Smuzhiyun if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
116*4882a593Smuzhiyun cea_map_stack(VC);
117*4882a593Smuzhiyun cea_map_stack(VC2);
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun #else
percpu_setup_exception_stacks(unsigned int cpu)122*4882a593Smuzhiyun static inline void percpu_setup_exception_stacks(unsigned int cpu)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun cea_map_percpu_pages(&cea->doublefault_stack,
127*4882a593Smuzhiyun &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL);
128*4882a593Smuzhiyun }
129*4882a593Smuzhiyun #endif
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun /* Setup the fixmap mappings only once per-processor */
setup_cpu_entry_area(unsigned int cpu)132*4882a593Smuzhiyun static void __init setup_cpu_entry_area(unsigned int cpu)
133*4882a593Smuzhiyun {
134*4882a593Smuzhiyun struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
135*4882a593Smuzhiyun #ifdef CONFIG_X86_64
136*4882a593Smuzhiyun /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
137*4882a593Smuzhiyun pgprot_t gdt_prot = PAGE_KERNEL_RO;
138*4882a593Smuzhiyun pgprot_t tss_prot = PAGE_KERNEL_RO;
139*4882a593Smuzhiyun #else
140*4882a593Smuzhiyun /*
141*4882a593Smuzhiyun * On native 32-bit systems, the GDT cannot be read-only because
142*4882a593Smuzhiyun * our double fault handler uses a task gate, and entering through
143*4882a593Smuzhiyun * a task gate needs to change an available TSS to busy. If the
144*4882a593Smuzhiyun * GDT is read-only, that will triple fault. The TSS cannot be
145*4882a593Smuzhiyun * read-only because the CPU writes to it on task switches.
146*4882a593Smuzhiyun *
147*4882a593Smuzhiyun * On Xen PV, the GDT must be read-only because the hypervisor
148*4882a593Smuzhiyun * requires it.
149*4882a593Smuzhiyun */
150*4882a593Smuzhiyun pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
151*4882a593Smuzhiyun PAGE_KERNEL_RO : PAGE_KERNEL;
152*4882a593Smuzhiyun pgprot_t tss_prot = PAGE_KERNEL;
153*4882a593Smuzhiyun #endif
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun cea_map_percpu_pages(&cea->entry_stack_page,
158*4882a593Smuzhiyun per_cpu_ptr(&entry_stack_storage, cpu), 1,
159*4882a593Smuzhiyun PAGE_KERNEL);
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun /*
162*4882a593Smuzhiyun * The Intel SDM says (Volume 3, 7.2.1):
163*4882a593Smuzhiyun *
164*4882a593Smuzhiyun * Avoid placing a page boundary in the part of the TSS that the
165*4882a593Smuzhiyun * processor reads during a task switch (the first 104 bytes). The
166*4882a593Smuzhiyun * processor may not correctly perform address translations if a
167*4882a593Smuzhiyun * boundary occurs in this area. During a task switch, the processor
168*4882a593Smuzhiyun * reads and writes into the first 104 bytes of each TSS (using
169*4882a593Smuzhiyun * contiguous physical addresses beginning with the physical address
170*4882a593Smuzhiyun * of the first byte of the TSS). So, after TSS access begins, if
171*4882a593Smuzhiyun * part of the 104 bytes is not physically contiguous, the processor
172*4882a593Smuzhiyun * will access incorrect information without generating a page-fault
173*4882a593Smuzhiyun * exception.
174*4882a593Smuzhiyun *
175*4882a593Smuzhiyun * There are also a lot of errata involving the TSS spanning a page
176*4882a593Smuzhiyun * boundary. Assert that we're not doing that.
177*4882a593Smuzhiyun */
178*4882a593Smuzhiyun BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
179*4882a593Smuzhiyun offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
180*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
181*4882a593Smuzhiyun /*
182*4882a593Smuzhiyun * VMX changes the host TR limit to 0x67 after a VM exit. This is
183*4882a593Smuzhiyun * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure
184*4882a593Smuzhiyun * that this is correct.
185*4882a593Smuzhiyun */
186*4882a593Smuzhiyun BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0);
187*4882a593Smuzhiyun BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68);
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
190*4882a593Smuzhiyun sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun #ifdef CONFIG_X86_32
193*4882a593Smuzhiyun per_cpu(cpu_entry_area, cpu) = cea;
194*4882a593Smuzhiyun #endif
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun percpu_setup_exception_stacks(cpu);
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun percpu_setup_debug_store(cpu);
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun
setup_cpu_entry_area_ptes(void)201*4882a593Smuzhiyun static __init void setup_cpu_entry_area_ptes(void)
202*4882a593Smuzhiyun {
203*4882a593Smuzhiyun #ifdef CONFIG_X86_32
204*4882a593Smuzhiyun unsigned long start, end;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun /* The +1 is for the readonly IDT: */
207*4882a593Smuzhiyun BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
208*4882a593Smuzhiyun BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
209*4882a593Smuzhiyun BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun start = CPU_ENTRY_AREA_BASE;
212*4882a593Smuzhiyun end = start + CPU_ENTRY_AREA_MAP_SIZE;
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun /* Careful here: start + PMD_SIZE might wrap around */
215*4882a593Smuzhiyun for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
216*4882a593Smuzhiyun populate_extra_pte(start);
217*4882a593Smuzhiyun #endif
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
setup_cpu_entry_areas(void)220*4882a593Smuzhiyun void __init setup_cpu_entry_areas(void)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun unsigned int cpu;
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun setup_cpu_entry_area_ptes();
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun for_each_possible_cpu(cpu)
227*4882a593Smuzhiyun setup_cpu_entry_area(cpu);
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /*
230*4882a593Smuzhiyun * This is the last essential update to swapper_pgdir which needs
231*4882a593Smuzhiyun * to be synchronized to initial_page_table on 32bit.
232*4882a593Smuzhiyun */
233*4882a593Smuzhiyun sync_initial_page_table();
234*4882a593Smuzhiyun }
235