1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Xen stolen ticks accounting.
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun #include <linux/kernel.h>
6*4882a593Smuzhiyun #include <linux/kernel_stat.h>
7*4882a593Smuzhiyun #include <linux/math64.h>
8*4882a593Smuzhiyun #include <linux/gfp.h>
9*4882a593Smuzhiyun #include <linux/slab.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include <asm/paravirt.h>
12*4882a593Smuzhiyun #include <asm/xen/hypervisor.h>
13*4882a593Smuzhiyun #include <asm/xen/hypercall.h>
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include <xen/events.h>
16*4882a593Smuzhiyun #include <xen/features.h>
17*4882a593Smuzhiyun #include <xen/interface/xen.h>
18*4882a593Smuzhiyun #include <xen/interface/vcpu.h>
19*4882a593Smuzhiyun #include <xen/xen-ops.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun /* runstate info updated by Xen */
22*4882a593Smuzhiyun static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate);
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun static DEFINE_PER_CPU(u64[4], old_runstate_time);
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun /* return an consistent snapshot of 64-bit time/counter value */
get64(const u64 * p)27*4882a593Smuzhiyun static u64 get64(const u64 *p)
28*4882a593Smuzhiyun {
29*4882a593Smuzhiyun u64 ret;
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun if (BITS_PER_LONG < 64) {
32*4882a593Smuzhiyun u32 *p32 = (u32 *)p;
33*4882a593Smuzhiyun u32 h, l, h2;
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun * Read high then low, and then make sure high is
37*4882a593Smuzhiyun * still the same; this will only loop if low wraps
38*4882a593Smuzhiyun * and carries into high.
39*4882a593Smuzhiyun * XXX some clean way to make this endian-proof?
40*4882a593Smuzhiyun */
41*4882a593Smuzhiyun do {
42*4882a593Smuzhiyun h = READ_ONCE(p32[1]);
43*4882a593Smuzhiyun l = READ_ONCE(p32[0]);
44*4882a593Smuzhiyun h2 = READ_ONCE(p32[1]);
45*4882a593Smuzhiyun } while(h2 != h);
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun ret = (((u64)h) << 32) | l;
48*4882a593Smuzhiyun } else
49*4882a593Smuzhiyun ret = READ_ONCE(*p);
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun return ret;
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun
xen_get_runstate_snapshot_cpu_delta(struct vcpu_runstate_info * res,unsigned int cpu)54*4882a593Smuzhiyun static void xen_get_runstate_snapshot_cpu_delta(
55*4882a593Smuzhiyun struct vcpu_runstate_info *res, unsigned int cpu)
56*4882a593Smuzhiyun {
57*4882a593Smuzhiyun u64 state_time;
58*4882a593Smuzhiyun struct vcpu_runstate_info *state;
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun BUG_ON(preemptible());
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun state = per_cpu_ptr(&xen_runstate, cpu);
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun do {
65*4882a593Smuzhiyun state_time = get64(&state->state_entry_time);
66*4882a593Smuzhiyun rmb(); /* Hypervisor might update data. */
67*4882a593Smuzhiyun *res = __READ_ONCE(*state);
68*4882a593Smuzhiyun rmb(); /* Hypervisor might update data. */
69*4882a593Smuzhiyun } while (get64(&state->state_entry_time) != state_time ||
70*4882a593Smuzhiyun (state_time & XEN_RUNSTATE_UPDATE));
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info * res,unsigned int cpu)73*4882a593Smuzhiyun static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res,
74*4882a593Smuzhiyun unsigned int cpu)
75*4882a593Smuzhiyun {
76*4882a593Smuzhiyun int i;
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun xen_get_runstate_snapshot_cpu_delta(res, cpu);
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun for (i = 0; i < 4; i++)
81*4882a593Smuzhiyun res->time[i] += per_cpu(old_runstate_time, cpu)[i];
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun
xen_manage_runstate_time(int action)84*4882a593Smuzhiyun void xen_manage_runstate_time(int action)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun static struct vcpu_runstate_info *runstate_delta;
87*4882a593Smuzhiyun struct vcpu_runstate_info state;
88*4882a593Smuzhiyun int cpu, i;
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun switch (action) {
91*4882a593Smuzhiyun case -1: /* backup runstate time before suspend */
92*4882a593Smuzhiyun if (unlikely(runstate_delta))
93*4882a593Smuzhiyun pr_warn_once("%s: memory leak as runstate_delta is not NULL\n",
94*4882a593Smuzhiyun __func__);
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun runstate_delta = kmalloc_array(num_possible_cpus(),
97*4882a593Smuzhiyun sizeof(*runstate_delta),
98*4882a593Smuzhiyun GFP_ATOMIC);
99*4882a593Smuzhiyun if (unlikely(!runstate_delta)) {
100*4882a593Smuzhiyun pr_warn("%s: failed to allocate runstate_delta\n",
101*4882a593Smuzhiyun __func__);
102*4882a593Smuzhiyun return;
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
106*4882a593Smuzhiyun xen_get_runstate_snapshot_cpu_delta(&state, cpu);
107*4882a593Smuzhiyun memcpy(runstate_delta[cpu].time, state.time,
108*4882a593Smuzhiyun sizeof(runstate_delta[cpu].time));
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun break;
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun case 0: /* backup runstate time after resume */
114*4882a593Smuzhiyun if (unlikely(!runstate_delta)) {
115*4882a593Smuzhiyun pr_warn("%s: cannot accumulate runstate time as runstate_delta is NULL\n",
116*4882a593Smuzhiyun __func__);
117*4882a593Smuzhiyun return;
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun for_each_possible_cpu(cpu) {
121*4882a593Smuzhiyun for (i = 0; i < 4; i++)
122*4882a593Smuzhiyun per_cpu(old_runstate_time, cpu)[i] +=
123*4882a593Smuzhiyun runstate_delta[cpu].time[i];
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun break;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun default: /* do not accumulate runstate time for checkpointing */
129*4882a593Smuzhiyun break;
130*4882a593Smuzhiyun }
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun if (action != -1 && runstate_delta) {
133*4882a593Smuzhiyun kfree(runstate_delta);
134*4882a593Smuzhiyun runstate_delta = NULL;
135*4882a593Smuzhiyun }
136*4882a593Smuzhiyun }
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun /*
139*4882a593Smuzhiyun * Runstate accounting
140*4882a593Smuzhiyun */
xen_get_runstate_snapshot(struct vcpu_runstate_info * res)141*4882a593Smuzhiyun void xen_get_runstate_snapshot(struct vcpu_runstate_info *res)
142*4882a593Smuzhiyun {
143*4882a593Smuzhiyun xen_get_runstate_snapshot_cpu(res, smp_processor_id());
144*4882a593Smuzhiyun }
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun /* return true when a vcpu could run but has no real cpu to run on */
xen_vcpu_stolen(int vcpu)147*4882a593Smuzhiyun bool xen_vcpu_stolen(int vcpu)
148*4882a593Smuzhiyun {
149*4882a593Smuzhiyun return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun
xen_steal_clock(int cpu)152*4882a593Smuzhiyun u64 xen_steal_clock(int cpu)
153*4882a593Smuzhiyun {
154*4882a593Smuzhiyun struct vcpu_runstate_info state;
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun xen_get_runstate_snapshot_cpu(&state, cpu);
157*4882a593Smuzhiyun return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
xen_setup_runstate_info(int cpu)160*4882a593Smuzhiyun void xen_setup_runstate_info(int cpu)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun struct vcpu_register_runstate_memory_area area;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun area.addr.v = &per_cpu(xen_runstate, cpu);
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
167*4882a593Smuzhiyun xen_vcpu_nr(cpu), &area))
168*4882a593Smuzhiyun BUG();
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun
xen_time_setup_guest(void)171*4882a593Smuzhiyun void __init xen_time_setup_guest(void)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun bool xen_runstate_remote;
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable,
176*4882a593Smuzhiyun VMASST_TYPE_runstate_update_flag);
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun pv_ops.time.steal_clock = xen_steal_clock;
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun static_key_slow_inc(¶virt_steal_enabled);
181*4882a593Smuzhiyun if (xen_runstate_remote)
182*4882a593Smuzhiyun static_key_slow_inc(¶virt_steal_rq_enabled);
183*4882a593Smuzhiyun }
184