1*4882a593Smuzhiyun #define pr_fmt(fmt) "Hyper-V: " fmt
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun #include <linux/hyperv.h>
4*4882a593Smuzhiyun #include <linux/log2.h>
5*4882a593Smuzhiyun #include <linux/slab.h>
6*4882a593Smuzhiyun #include <linux/types.h>
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <asm/fpu/api.h>
9*4882a593Smuzhiyun #include <asm/mshyperv.h>
10*4882a593Smuzhiyun #include <asm/msr.h>
11*4882a593Smuzhiyun #include <asm/tlbflush.h>
12*4882a593Smuzhiyun #include <asm/tlb.h>
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #define CREATE_TRACE_POINTS
15*4882a593Smuzhiyun #include <asm/trace/hyperv.h>
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun /* Each gva in gva_list encodes up to 4096 pages to flush */
18*4882a593Smuzhiyun #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
21*4882a593Smuzhiyun const struct flush_tlb_info *info);
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun /*
24*4882a593Smuzhiyun * Fills in gva_list starting from offset. Returns the number of items added.
25*4882a593Smuzhiyun */
fill_gva_list(u64 gva_list[],int offset,unsigned long start,unsigned long end)26*4882a593Smuzhiyun static inline int fill_gva_list(u64 gva_list[], int offset,
27*4882a593Smuzhiyun unsigned long start, unsigned long end)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun int gva_n = offset;
30*4882a593Smuzhiyun unsigned long cur = start, diff;
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun do {
33*4882a593Smuzhiyun diff = end > cur ? end - cur : 0;
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun gva_list[gva_n] = cur & PAGE_MASK;
36*4882a593Smuzhiyun /*
37*4882a593Smuzhiyun * Lower 12 bits encode the number of additional
38*4882a593Smuzhiyun * pages to flush (in addition to the 'cur' page).
39*4882a593Smuzhiyun */
40*4882a593Smuzhiyun if (diff >= HV_TLB_FLUSH_UNIT) {
41*4882a593Smuzhiyun gva_list[gva_n] |= ~PAGE_MASK;
42*4882a593Smuzhiyun cur += HV_TLB_FLUSH_UNIT;
43*4882a593Smuzhiyun } else if (diff) {
44*4882a593Smuzhiyun gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
45*4882a593Smuzhiyun cur = end;
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun gva_n++;
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun } while (cur < end);
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun return gva_n - offset;
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun
hyperv_flush_tlb_others(const struct cpumask * cpus,const struct flush_tlb_info * info)55*4882a593Smuzhiyun static void hyperv_flush_tlb_others(const struct cpumask *cpus,
56*4882a593Smuzhiyun const struct flush_tlb_info *info)
57*4882a593Smuzhiyun {
58*4882a593Smuzhiyun int cpu, vcpu, gva_n, max_gvas;
59*4882a593Smuzhiyun struct hv_tlb_flush **flush_pcpu;
60*4882a593Smuzhiyun struct hv_tlb_flush *flush;
61*4882a593Smuzhiyun u64 status = U64_MAX;
62*4882a593Smuzhiyun unsigned long flags;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun trace_hyperv_mmu_flush_tlb_others(cpus, info);
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun if (!hv_hypercall_pg)
67*4882a593Smuzhiyun goto do_native;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun local_irq_save(flags);
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun /*
72*4882a593Smuzhiyun * Only check the mask _after_ interrupt has been disabled to avoid the
73*4882a593Smuzhiyun * mask changing under our feet.
74*4882a593Smuzhiyun */
75*4882a593Smuzhiyun if (cpumask_empty(cpus)) {
76*4882a593Smuzhiyun local_irq_restore(flags);
77*4882a593Smuzhiyun return;
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun flush_pcpu = (struct hv_tlb_flush **)
81*4882a593Smuzhiyun this_cpu_ptr(hyperv_pcpu_input_arg);
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun flush = *flush_pcpu;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun if (unlikely(!flush)) {
86*4882a593Smuzhiyun local_irq_restore(flags);
87*4882a593Smuzhiyun goto do_native;
88*4882a593Smuzhiyun }
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun if (info->mm) {
91*4882a593Smuzhiyun /*
92*4882a593Smuzhiyun * AddressSpace argument must match the CR3 with PCID bits
93*4882a593Smuzhiyun * stripped out.
94*4882a593Smuzhiyun */
95*4882a593Smuzhiyun flush->address_space = virt_to_phys(info->mm->pgd);
96*4882a593Smuzhiyun flush->address_space &= CR3_ADDR_MASK;
97*4882a593Smuzhiyun flush->flags = 0;
98*4882a593Smuzhiyun } else {
99*4882a593Smuzhiyun flush->address_space = 0;
100*4882a593Smuzhiyun flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
101*4882a593Smuzhiyun }
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun flush->processor_mask = 0;
104*4882a593Smuzhiyun if (cpumask_equal(cpus, cpu_present_mask)) {
105*4882a593Smuzhiyun flush->flags |= HV_FLUSH_ALL_PROCESSORS;
106*4882a593Smuzhiyun } else {
107*4882a593Smuzhiyun /*
108*4882a593Smuzhiyun * From the supplied CPU set we need to figure out if we can get
109*4882a593Smuzhiyun * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
110*4882a593Smuzhiyun * hypercalls. This is possible when the highest VP number in
111*4882a593Smuzhiyun * the set is < 64. As VP numbers are usually in ascending order
112*4882a593Smuzhiyun * and match Linux CPU ids, here is an optimization: we check
113*4882a593Smuzhiyun * the VP number for the highest bit in the supplied set first
114*4882a593Smuzhiyun * so we can quickly find out if using *_EX hypercalls is a
115*4882a593Smuzhiyun * must. We will also check all VP numbers when walking the
116*4882a593Smuzhiyun * supplied CPU set to remain correct in all cases.
117*4882a593Smuzhiyun */
118*4882a593Smuzhiyun if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
119*4882a593Smuzhiyun goto do_ex_hypercall;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun for_each_cpu(cpu, cpus) {
122*4882a593Smuzhiyun vcpu = hv_cpu_number_to_vp_number(cpu);
123*4882a593Smuzhiyun if (vcpu == VP_INVAL) {
124*4882a593Smuzhiyun local_irq_restore(flags);
125*4882a593Smuzhiyun goto do_native;
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun if (vcpu >= 64)
129*4882a593Smuzhiyun goto do_ex_hypercall;
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun __set_bit(vcpu, (unsigned long *)
132*4882a593Smuzhiyun &flush->processor_mask);
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun }
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun /*
137*4882a593Smuzhiyun * We can flush not more than max_gvas with one hypercall. Flush the
138*4882a593Smuzhiyun * whole address space if we were asked to do more.
139*4882a593Smuzhiyun */
140*4882a593Smuzhiyun max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun if (info->end == TLB_FLUSH_ALL) {
143*4882a593Smuzhiyun flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
144*4882a593Smuzhiyun status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
145*4882a593Smuzhiyun flush, NULL);
146*4882a593Smuzhiyun } else if (info->end &&
147*4882a593Smuzhiyun ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
148*4882a593Smuzhiyun status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
149*4882a593Smuzhiyun flush, NULL);
150*4882a593Smuzhiyun } else {
151*4882a593Smuzhiyun gva_n = fill_gva_list(flush->gva_list, 0,
152*4882a593Smuzhiyun info->start, info->end);
153*4882a593Smuzhiyun status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
154*4882a593Smuzhiyun gva_n, 0, flush, NULL);
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun goto check_status;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun do_ex_hypercall:
159*4882a593Smuzhiyun status = hyperv_flush_tlb_others_ex(cpus, info);
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun check_status:
162*4882a593Smuzhiyun local_irq_restore(flags);
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun if (!(status & HV_HYPERCALL_RESULT_MASK))
165*4882a593Smuzhiyun return;
166*4882a593Smuzhiyun do_native:
167*4882a593Smuzhiyun native_flush_tlb_others(cpus, info);
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun
hyperv_flush_tlb_others_ex(const struct cpumask * cpus,const struct flush_tlb_info * info)170*4882a593Smuzhiyun static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
171*4882a593Smuzhiyun const struct flush_tlb_info *info)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun int nr_bank = 0, max_gvas, gva_n;
174*4882a593Smuzhiyun struct hv_tlb_flush_ex **flush_pcpu;
175*4882a593Smuzhiyun struct hv_tlb_flush_ex *flush;
176*4882a593Smuzhiyun u64 status;
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
179*4882a593Smuzhiyun return U64_MAX;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun flush_pcpu = (struct hv_tlb_flush_ex **)
182*4882a593Smuzhiyun this_cpu_ptr(hyperv_pcpu_input_arg);
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun flush = *flush_pcpu;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun if (info->mm) {
187*4882a593Smuzhiyun /*
188*4882a593Smuzhiyun * AddressSpace argument must match the CR3 with PCID bits
189*4882a593Smuzhiyun * stripped out.
190*4882a593Smuzhiyun */
191*4882a593Smuzhiyun flush->address_space = virt_to_phys(info->mm->pgd);
192*4882a593Smuzhiyun flush->address_space &= CR3_ADDR_MASK;
193*4882a593Smuzhiyun flush->flags = 0;
194*4882a593Smuzhiyun } else {
195*4882a593Smuzhiyun flush->address_space = 0;
196*4882a593Smuzhiyun flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun flush->hv_vp_set.valid_bank_mask = 0;
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
202*4882a593Smuzhiyun nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
203*4882a593Smuzhiyun if (nr_bank < 0)
204*4882a593Smuzhiyun return U64_MAX;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun /*
207*4882a593Smuzhiyun * We can flush not more than max_gvas with one hypercall. Flush the
208*4882a593Smuzhiyun * whole address space if we were asked to do more.
209*4882a593Smuzhiyun */
210*4882a593Smuzhiyun max_gvas =
211*4882a593Smuzhiyun (PAGE_SIZE - sizeof(*flush) - nr_bank *
212*4882a593Smuzhiyun sizeof(flush->hv_vp_set.bank_contents[0])) /
213*4882a593Smuzhiyun sizeof(flush->gva_list[0]);
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun if (info->end == TLB_FLUSH_ALL) {
216*4882a593Smuzhiyun flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
217*4882a593Smuzhiyun status = hv_do_rep_hypercall(
218*4882a593Smuzhiyun HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
219*4882a593Smuzhiyun 0, nr_bank, flush, NULL);
220*4882a593Smuzhiyun } else if (info->end &&
221*4882a593Smuzhiyun ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
222*4882a593Smuzhiyun status = hv_do_rep_hypercall(
223*4882a593Smuzhiyun HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
224*4882a593Smuzhiyun 0, nr_bank, flush, NULL);
225*4882a593Smuzhiyun } else {
226*4882a593Smuzhiyun gva_n = fill_gva_list(flush->gva_list, nr_bank,
227*4882a593Smuzhiyun info->start, info->end);
228*4882a593Smuzhiyun status = hv_do_rep_hypercall(
229*4882a593Smuzhiyun HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
230*4882a593Smuzhiyun gva_n, nr_bank, flush, NULL);
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun return status;
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
hyperv_setup_mmu_ops(void)236*4882a593Smuzhiyun void hyperv_setup_mmu_ops(void)
237*4882a593Smuzhiyun {
238*4882a593Smuzhiyun if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
239*4882a593Smuzhiyun return;
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun pr_info("Using hypercall for remote TLB flush\n");
242*4882a593Smuzhiyun pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
243*4882a593Smuzhiyun pv_ops.mmu.tlb_remove_table = tlb_remove_table;
244*4882a593Smuzhiyun }
245