xref: /OK3568_Linux_fs/kernel/arch/x86/xen/time.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Xen time implementation.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * This is implemented in terms of a clocksource driver which uses
6*4882a593Smuzhiyun  * the hypervisor clock as a nanosecond timebase, and a clockevent
7*4882a593Smuzhiyun  * driver which uses the hypervisor's timer mechanism.
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun #include <linux/kernel.h>
12*4882a593Smuzhiyun #include <linux/interrupt.h>
13*4882a593Smuzhiyun #include <linux/clocksource.h>
14*4882a593Smuzhiyun #include <linux/clockchips.h>
15*4882a593Smuzhiyun #include <linux/gfp.h>
16*4882a593Smuzhiyun #include <linux/slab.h>
17*4882a593Smuzhiyun #include <linux/pvclock_gtod.h>
18*4882a593Smuzhiyun #include <linux/timekeeper_internal.h>
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include <asm/pvclock.h>
21*4882a593Smuzhiyun #include <asm/xen/hypervisor.h>
22*4882a593Smuzhiyun #include <asm/xen/hypercall.h>
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun #include <xen/events.h>
25*4882a593Smuzhiyun #include <xen/features.h>
26*4882a593Smuzhiyun #include <xen/interface/xen.h>
27*4882a593Smuzhiyun #include <xen/interface/vcpu.h>
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun #include "xen-ops.h"
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun /* Minimum amount of time until next clock event fires */
32*4882a593Smuzhiyun #define TIMER_SLOP	100000
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun static u64 xen_sched_clock_offset __read_mostly;
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun /* Get the TSC speed from Xen */
xen_tsc_khz(void)37*4882a593Smuzhiyun static unsigned long xen_tsc_khz(void)
38*4882a593Smuzhiyun {
39*4882a593Smuzhiyun 	struct pvclock_vcpu_time_info *info =
40*4882a593Smuzhiyun 		&HYPERVISOR_shared_info->vcpu_info[0].time;
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun 	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
43*4882a593Smuzhiyun 	return pvclock_tsc_khz(info);
44*4882a593Smuzhiyun }
45*4882a593Smuzhiyun 
xen_clocksource_read(void)46*4882a593Smuzhiyun static u64 xen_clocksource_read(void)
47*4882a593Smuzhiyun {
48*4882a593Smuzhiyun         struct pvclock_vcpu_time_info *src;
49*4882a593Smuzhiyun 	u64 ret;
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun 	preempt_disable_notrace();
52*4882a593Smuzhiyun 	src = &__this_cpu_read(xen_vcpu)->time;
53*4882a593Smuzhiyun 	ret = pvclock_clocksource_read(src);
54*4882a593Smuzhiyun 	preempt_enable_notrace();
55*4882a593Smuzhiyun 	return ret;
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun 
xen_clocksource_get_cycles(struct clocksource * cs)58*4882a593Smuzhiyun static u64 xen_clocksource_get_cycles(struct clocksource *cs)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun 	return xen_clocksource_read();
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun 
xen_sched_clock(void)63*4882a593Smuzhiyun static u64 xen_sched_clock(void)
64*4882a593Smuzhiyun {
65*4882a593Smuzhiyun 	return xen_clocksource_read() - xen_sched_clock_offset;
66*4882a593Smuzhiyun }
67*4882a593Smuzhiyun 
xen_read_wallclock(struct timespec64 * ts)68*4882a593Smuzhiyun static void xen_read_wallclock(struct timespec64 *ts)
69*4882a593Smuzhiyun {
70*4882a593Smuzhiyun 	struct shared_info *s = HYPERVISOR_shared_info;
71*4882a593Smuzhiyun 	struct pvclock_wall_clock *wall_clock = &(s->wc);
72*4882a593Smuzhiyun         struct pvclock_vcpu_time_info *vcpu_time;
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	vcpu_time = &get_cpu_var(xen_vcpu)->time;
75*4882a593Smuzhiyun 	pvclock_read_wallclock(wall_clock, vcpu_time, ts);
76*4882a593Smuzhiyun 	put_cpu_var(xen_vcpu);
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun 
xen_get_wallclock(struct timespec64 * now)79*4882a593Smuzhiyun static void xen_get_wallclock(struct timespec64 *now)
80*4882a593Smuzhiyun {
81*4882a593Smuzhiyun 	xen_read_wallclock(now);
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun 
xen_set_wallclock(const struct timespec64 * now)84*4882a593Smuzhiyun static int xen_set_wallclock(const struct timespec64 *now)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun 	return -ENODEV;
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun 
xen_pvclock_gtod_notify(struct notifier_block * nb,unsigned long was_set,void * priv)89*4882a593Smuzhiyun static int xen_pvclock_gtod_notify(struct notifier_block *nb,
90*4882a593Smuzhiyun 				   unsigned long was_set, void *priv)
91*4882a593Smuzhiyun {
92*4882a593Smuzhiyun 	/* Protected by the calling core code serialization */
93*4882a593Smuzhiyun 	static struct timespec64 next_sync;
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	struct xen_platform_op op;
96*4882a593Smuzhiyun 	struct timespec64 now;
97*4882a593Smuzhiyun 	struct timekeeper *tk = priv;
98*4882a593Smuzhiyun 	static bool settime64_supported = true;
99*4882a593Smuzhiyun 	int ret;
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	now.tv_sec = tk->xtime_sec;
102*4882a593Smuzhiyun 	now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun 	/*
105*4882a593Smuzhiyun 	 * We only take the expensive HV call when the clock was set
106*4882a593Smuzhiyun 	 * or when the 11 minutes RTC synchronization time elapsed.
107*4882a593Smuzhiyun 	 */
108*4882a593Smuzhiyun 	if (!was_set && timespec64_compare(&now, &next_sync) < 0)
109*4882a593Smuzhiyun 		return NOTIFY_OK;
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun again:
112*4882a593Smuzhiyun 	if (settime64_supported) {
113*4882a593Smuzhiyun 		op.cmd = XENPF_settime64;
114*4882a593Smuzhiyun 		op.u.settime64.mbz = 0;
115*4882a593Smuzhiyun 		op.u.settime64.secs = now.tv_sec;
116*4882a593Smuzhiyun 		op.u.settime64.nsecs = now.tv_nsec;
117*4882a593Smuzhiyun 		op.u.settime64.system_time = xen_clocksource_read();
118*4882a593Smuzhiyun 	} else {
119*4882a593Smuzhiyun 		op.cmd = XENPF_settime32;
120*4882a593Smuzhiyun 		op.u.settime32.secs = now.tv_sec;
121*4882a593Smuzhiyun 		op.u.settime32.nsecs = now.tv_nsec;
122*4882a593Smuzhiyun 		op.u.settime32.system_time = xen_clocksource_read();
123*4882a593Smuzhiyun 	}
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	ret = HYPERVISOR_platform_op(&op);
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	if (ret == -ENOSYS && settime64_supported) {
128*4882a593Smuzhiyun 		settime64_supported = false;
129*4882a593Smuzhiyun 		goto again;
130*4882a593Smuzhiyun 	}
131*4882a593Smuzhiyun 	if (ret < 0)
132*4882a593Smuzhiyun 		return NOTIFY_BAD;
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	/*
135*4882a593Smuzhiyun 	 * Move the next drift compensation time 11 minutes
136*4882a593Smuzhiyun 	 * ahead. That's emulating the sync_cmos_clock() update for
137*4882a593Smuzhiyun 	 * the hardware RTC.
138*4882a593Smuzhiyun 	 */
139*4882a593Smuzhiyun 	next_sync = now;
140*4882a593Smuzhiyun 	next_sync.tv_sec += 11 * 60;
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	return NOTIFY_OK;
143*4882a593Smuzhiyun }
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun static struct notifier_block xen_pvclock_gtod_notifier = {
146*4882a593Smuzhiyun 	.notifier_call = xen_pvclock_gtod_notify,
147*4882a593Smuzhiyun };
148*4882a593Smuzhiyun 
xen_cs_enable(struct clocksource * cs)149*4882a593Smuzhiyun static int xen_cs_enable(struct clocksource *cs)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun 	vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
152*4882a593Smuzhiyun 	return 0;
153*4882a593Smuzhiyun }
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun static struct clocksource xen_clocksource __read_mostly = {
156*4882a593Smuzhiyun 	.name	= "xen",
157*4882a593Smuzhiyun 	.rating	= 400,
158*4882a593Smuzhiyun 	.read	= xen_clocksource_get_cycles,
159*4882a593Smuzhiyun 	.mask	= CLOCKSOURCE_MASK(64),
160*4882a593Smuzhiyun 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
161*4882a593Smuzhiyun 	.enable = xen_cs_enable,
162*4882a593Smuzhiyun };
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun /*
165*4882a593Smuzhiyun    Xen clockevent implementation
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun    Xen has two clockevent implementations:
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun    The old timer_op one works with all released versions of Xen prior
170*4882a593Smuzhiyun    to version 3.0.4.  This version of the hypervisor provides a
171*4882a593Smuzhiyun    single-shot timer with nanosecond resolution.  However, sharing the
172*4882a593Smuzhiyun    same event channel is a 100Hz tick which is delivered while the
173*4882a593Smuzhiyun    vcpu is running.  We don't care about or use this tick, but it will
174*4882a593Smuzhiyun    cause the core time code to think the timer fired too soon, and
175*4882a593Smuzhiyun    will end up resetting it each time.  It could be filtered, but
176*4882a593Smuzhiyun    doing so has complications when the ktime clocksource is not yet
177*4882a593Smuzhiyun    the xen clocksource (ie, at boot time).
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun    The new vcpu_op-based timer interface allows the tick timer period
180*4882a593Smuzhiyun    to be changed or turned off.  The tick timer is not useful as a
181*4882a593Smuzhiyun    periodic timer because events are only delivered to running vcpus.
182*4882a593Smuzhiyun    The one-shot timer can report when a timeout is in the past, so
183*4882a593Smuzhiyun    set_next_event is capable of returning -ETIME when appropriate.
184*4882a593Smuzhiyun    This interface is used when available.
185*4882a593Smuzhiyun */
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun /*
189*4882a593Smuzhiyun   Get a hypervisor absolute time.  In theory we could maintain an
190*4882a593Smuzhiyun   offset between the kernel's time and the hypervisor's time, and
191*4882a593Smuzhiyun   apply that to a kernel's absolute timeout.  Unfortunately the
192*4882a593Smuzhiyun   hypervisor and kernel times can drift even if the kernel is using
193*4882a593Smuzhiyun   the Xen clocksource, because ntp can warp the kernel's clocksource.
194*4882a593Smuzhiyun */
get_abs_timeout(unsigned long delta)195*4882a593Smuzhiyun static s64 get_abs_timeout(unsigned long delta)
196*4882a593Smuzhiyun {
197*4882a593Smuzhiyun 	return xen_clocksource_read() + delta;
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun 
xen_timerop_shutdown(struct clock_event_device * evt)200*4882a593Smuzhiyun static int xen_timerop_shutdown(struct clock_event_device *evt)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	/* cancel timeout */
203*4882a593Smuzhiyun 	HYPERVISOR_set_timer_op(0);
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	return 0;
206*4882a593Smuzhiyun }
207*4882a593Smuzhiyun 
xen_timerop_set_next_event(unsigned long delta,struct clock_event_device * evt)208*4882a593Smuzhiyun static int xen_timerop_set_next_event(unsigned long delta,
209*4882a593Smuzhiyun 				      struct clock_event_device *evt)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun 	WARN_ON(!clockevent_state_oneshot(evt));
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 	if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
214*4882a593Smuzhiyun 		BUG();
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun 	/* We may have missed the deadline, but there's no real way of
217*4882a593Smuzhiyun 	   knowing for sure.  If the event was in the past, then we'll
218*4882a593Smuzhiyun 	   get an immediate interrupt. */
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	return 0;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun static struct clock_event_device xen_timerop_clockevent __ro_after_init = {
224*4882a593Smuzhiyun 	.name			= "xen",
225*4882a593Smuzhiyun 	.features		= CLOCK_EVT_FEAT_ONESHOT,
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	.max_delta_ns		= 0xffffffff,
228*4882a593Smuzhiyun 	.max_delta_ticks	= 0xffffffff,
229*4882a593Smuzhiyun 	.min_delta_ns		= TIMER_SLOP,
230*4882a593Smuzhiyun 	.min_delta_ticks	= TIMER_SLOP,
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	.mult			= 1,
233*4882a593Smuzhiyun 	.shift			= 0,
234*4882a593Smuzhiyun 	.rating			= 500,
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	.set_state_shutdown	= xen_timerop_shutdown,
237*4882a593Smuzhiyun 	.set_next_event		= xen_timerop_set_next_event,
238*4882a593Smuzhiyun };
239*4882a593Smuzhiyun 
xen_vcpuop_shutdown(struct clock_event_device * evt)240*4882a593Smuzhiyun static int xen_vcpuop_shutdown(struct clock_event_device *evt)
241*4882a593Smuzhiyun {
242*4882a593Smuzhiyun 	int cpu = smp_processor_id();
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
245*4882a593Smuzhiyun 			       NULL) ||
246*4882a593Smuzhiyun 	    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
247*4882a593Smuzhiyun 			       NULL))
248*4882a593Smuzhiyun 		BUG();
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	return 0;
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun 
xen_vcpuop_set_oneshot(struct clock_event_device * evt)253*4882a593Smuzhiyun static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
254*4882a593Smuzhiyun {
255*4882a593Smuzhiyun 	int cpu = smp_processor_id();
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
258*4882a593Smuzhiyun 			       NULL))
259*4882a593Smuzhiyun 		BUG();
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	return 0;
262*4882a593Smuzhiyun }
263*4882a593Smuzhiyun 
xen_vcpuop_set_next_event(unsigned long delta,struct clock_event_device * evt)264*4882a593Smuzhiyun static int xen_vcpuop_set_next_event(unsigned long delta,
265*4882a593Smuzhiyun 				     struct clock_event_device *evt)
266*4882a593Smuzhiyun {
267*4882a593Smuzhiyun 	int cpu = smp_processor_id();
268*4882a593Smuzhiyun 	struct vcpu_set_singleshot_timer single;
269*4882a593Smuzhiyun 	int ret;
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	WARN_ON(!clockevent_state_oneshot(evt));
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	single.timeout_abs_ns = get_abs_timeout(delta);
274*4882a593Smuzhiyun 	/* Get an event anyway, even if the timeout is already expired */
275*4882a593Smuzhiyun 	single.flags = 0;
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
278*4882a593Smuzhiyun 				 &single);
279*4882a593Smuzhiyun 	BUG_ON(ret != 0);
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 	return ret;
282*4882a593Smuzhiyun }
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = {
285*4882a593Smuzhiyun 	.name = "xen",
286*4882a593Smuzhiyun 	.features = CLOCK_EVT_FEAT_ONESHOT,
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	.max_delta_ns = 0xffffffff,
289*4882a593Smuzhiyun 	.max_delta_ticks = 0xffffffff,
290*4882a593Smuzhiyun 	.min_delta_ns = TIMER_SLOP,
291*4882a593Smuzhiyun 	.min_delta_ticks = TIMER_SLOP,
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	.mult = 1,
294*4882a593Smuzhiyun 	.shift = 0,
295*4882a593Smuzhiyun 	.rating = 500,
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	.set_state_shutdown = xen_vcpuop_shutdown,
298*4882a593Smuzhiyun 	.set_state_oneshot = xen_vcpuop_set_oneshot,
299*4882a593Smuzhiyun 	.set_next_event = xen_vcpuop_set_next_event,
300*4882a593Smuzhiyun };
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun static const struct clock_event_device *xen_clockevent =
303*4882a593Smuzhiyun 	&xen_timerop_clockevent;
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun struct xen_clock_event_device {
306*4882a593Smuzhiyun 	struct clock_event_device evt;
307*4882a593Smuzhiyun 	char name[16];
308*4882a593Smuzhiyun };
309*4882a593Smuzhiyun static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
310*4882a593Smuzhiyun 
xen_timer_interrupt(int irq,void * dev_id)311*4882a593Smuzhiyun static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
312*4882a593Smuzhiyun {
313*4882a593Smuzhiyun 	struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
314*4882a593Smuzhiyun 	irqreturn_t ret;
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	ret = IRQ_NONE;
317*4882a593Smuzhiyun 	if (evt->event_handler) {
318*4882a593Smuzhiyun 		evt->event_handler(evt);
319*4882a593Smuzhiyun 		ret = IRQ_HANDLED;
320*4882a593Smuzhiyun 	}
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	return ret;
323*4882a593Smuzhiyun }
324*4882a593Smuzhiyun 
xen_teardown_timer(int cpu)325*4882a593Smuzhiyun void xen_teardown_timer(int cpu)
326*4882a593Smuzhiyun {
327*4882a593Smuzhiyun 	struct clock_event_device *evt;
328*4882a593Smuzhiyun 	evt = &per_cpu(xen_clock_events, cpu).evt;
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 	if (evt->irq >= 0) {
331*4882a593Smuzhiyun 		unbind_from_irqhandler(evt->irq, NULL);
332*4882a593Smuzhiyun 		evt->irq = -1;
333*4882a593Smuzhiyun 	}
334*4882a593Smuzhiyun }
335*4882a593Smuzhiyun 
xen_setup_timer(int cpu)336*4882a593Smuzhiyun void xen_setup_timer(int cpu)
337*4882a593Smuzhiyun {
338*4882a593Smuzhiyun 	struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
339*4882a593Smuzhiyun 	struct clock_event_device *evt = &xevt->evt;
340*4882a593Smuzhiyun 	int irq;
341*4882a593Smuzhiyun 
342*4882a593Smuzhiyun 	WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
343*4882a593Smuzhiyun 	if (evt->irq >= 0)
344*4882a593Smuzhiyun 		xen_teardown_timer(cpu);
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
351*4882a593Smuzhiyun 				      IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
352*4882a593Smuzhiyun 				      IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
353*4882a593Smuzhiyun 				      xevt->name, NULL);
354*4882a593Smuzhiyun 	(void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 	memcpy(evt, xen_clockevent, sizeof(*evt));
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	evt->cpumask = cpumask_of(cpu);
359*4882a593Smuzhiyun 	evt->irq = irq;
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun 
xen_setup_cpu_clockevents(void)363*4882a593Smuzhiyun void xen_setup_cpu_clockevents(void)
364*4882a593Smuzhiyun {
365*4882a593Smuzhiyun 	clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
366*4882a593Smuzhiyun }
367*4882a593Smuzhiyun 
xen_timer_resume(void)368*4882a593Smuzhiyun void xen_timer_resume(void)
369*4882a593Smuzhiyun {
370*4882a593Smuzhiyun 	int cpu;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	if (xen_clockevent != &xen_vcpuop_clockevent)
373*4882a593Smuzhiyun 		return;
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
376*4882a593Smuzhiyun 		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
377*4882a593Smuzhiyun 				       xen_vcpu_nr(cpu), NULL))
378*4882a593Smuzhiyun 			BUG();
379*4882a593Smuzhiyun 	}
380*4882a593Smuzhiyun }
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun static const struct pv_time_ops xen_time_ops __initconst = {
383*4882a593Smuzhiyun 	.sched_clock = xen_sched_clock,
384*4882a593Smuzhiyun 	.steal_clock = xen_steal_clock,
385*4882a593Smuzhiyun };
386*4882a593Smuzhiyun 
387*4882a593Smuzhiyun static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
388*4882a593Smuzhiyun static u64 xen_clock_value_saved;
389*4882a593Smuzhiyun 
xen_save_time_memory_area(void)390*4882a593Smuzhiyun void xen_save_time_memory_area(void)
391*4882a593Smuzhiyun {
392*4882a593Smuzhiyun 	struct vcpu_register_time_memory_area t;
393*4882a593Smuzhiyun 	int ret;
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun 	if (!xen_clock)
398*4882a593Smuzhiyun 		return;
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 	t.addr.v = NULL;
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
403*4882a593Smuzhiyun 	if (ret != 0)
404*4882a593Smuzhiyun 		pr_notice("Cannot save secondary vcpu_time_info (err %d)",
405*4882a593Smuzhiyun 			  ret);
406*4882a593Smuzhiyun 	else
407*4882a593Smuzhiyun 		clear_page(xen_clock);
408*4882a593Smuzhiyun }
409*4882a593Smuzhiyun 
xen_restore_time_memory_area(void)410*4882a593Smuzhiyun void xen_restore_time_memory_area(void)
411*4882a593Smuzhiyun {
412*4882a593Smuzhiyun 	struct vcpu_register_time_memory_area t;
413*4882a593Smuzhiyun 	int ret;
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 	if (!xen_clock)
416*4882a593Smuzhiyun 		goto out;
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	t.addr.v = &xen_clock->pvti;
419*4882a593Smuzhiyun 
420*4882a593Smuzhiyun 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	/*
423*4882a593Smuzhiyun 	 * We don't disable VDSO_CLOCKMODE_PVCLOCK entirely if it fails to
424*4882a593Smuzhiyun 	 * register the secondary time info with Xen or if we migrated to a
425*4882a593Smuzhiyun 	 * host without the necessary flags. On both of these cases what
426*4882a593Smuzhiyun 	 * happens is either process seeing a zeroed out pvti or seeing no
427*4882a593Smuzhiyun 	 * PVCLOCK_TSC_STABLE_BIT bit set. Userspace checks the latter and
428*4882a593Smuzhiyun 	 * if 0, it discards the data in pvti and fallbacks to a system
429*4882a593Smuzhiyun 	 * call for a reliable timestamp.
430*4882a593Smuzhiyun 	 */
431*4882a593Smuzhiyun 	if (ret != 0)
432*4882a593Smuzhiyun 		pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
433*4882a593Smuzhiyun 			  ret);
434*4882a593Smuzhiyun 
435*4882a593Smuzhiyun out:
436*4882a593Smuzhiyun 	/* Need pvclock_resume() before using xen_clocksource_read(). */
437*4882a593Smuzhiyun 	pvclock_resume();
438*4882a593Smuzhiyun 	xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun 
xen_setup_vsyscall_time_info(void)441*4882a593Smuzhiyun static void xen_setup_vsyscall_time_info(void)
442*4882a593Smuzhiyun {
443*4882a593Smuzhiyun 	struct vcpu_register_time_memory_area t;
444*4882a593Smuzhiyun 	struct pvclock_vsyscall_time_info *ti;
445*4882a593Smuzhiyun 	int ret;
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 	ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
448*4882a593Smuzhiyun 	if (!ti)
449*4882a593Smuzhiyun 		return;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	t.addr.v = &ti->pvti;
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
454*4882a593Smuzhiyun 	if (ret) {
455*4882a593Smuzhiyun 		pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret);
456*4882a593Smuzhiyun 		free_page((unsigned long)ti);
457*4882a593Smuzhiyun 		return;
458*4882a593Smuzhiyun 	}
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	/*
461*4882a593Smuzhiyun 	 * If primary time info had this bit set, secondary should too since
462*4882a593Smuzhiyun 	 * it's the same data on both just different memory regions. But we
463*4882a593Smuzhiyun 	 * still check it in case hypervisor is buggy.
464*4882a593Smuzhiyun 	 */
465*4882a593Smuzhiyun 	if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
466*4882a593Smuzhiyun 		t.addr.v = NULL;
467*4882a593Smuzhiyun 		ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
468*4882a593Smuzhiyun 					 0, &t);
469*4882a593Smuzhiyun 		if (!ret)
470*4882a593Smuzhiyun 			free_page((unsigned long)ti);
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 		pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n");
473*4882a593Smuzhiyun 		return;
474*4882a593Smuzhiyun 	}
475*4882a593Smuzhiyun 
476*4882a593Smuzhiyun 	xen_clock = ti;
477*4882a593Smuzhiyun 	pvclock_set_pvti_cpu0_va(xen_clock);
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 	xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun 
xen_time_init(void)482*4882a593Smuzhiyun static void __init xen_time_init(void)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun 	struct pvclock_vcpu_time_info *pvti;
485*4882a593Smuzhiyun 	int cpu = smp_processor_id();
486*4882a593Smuzhiyun 	struct timespec64 tp;
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun 	/* As Dom0 is never moved, no penalty on using TSC there */
489*4882a593Smuzhiyun 	if (xen_initial_domain())
490*4882a593Smuzhiyun 		xen_clocksource.rating = 275;
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
495*4882a593Smuzhiyun 			       NULL) == 0) {
496*4882a593Smuzhiyun 		/* Successfully turned off 100Hz tick, so we have the
497*4882a593Smuzhiyun 		   vcpuop-based timer interface */
498*4882a593Smuzhiyun 		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
499*4882a593Smuzhiyun 		xen_clockevent = &xen_vcpuop_clockevent;
500*4882a593Smuzhiyun 	}
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun 	/* Set initial system time with full resolution */
503*4882a593Smuzhiyun 	xen_read_wallclock(&tp);
504*4882a593Smuzhiyun 	do_settimeofday64(&tp);
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun 	setup_force_cpu_cap(X86_FEATURE_TSC);
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	/*
509*4882a593Smuzhiyun 	 * We check ahead on the primary time info if this
510*4882a593Smuzhiyun 	 * bit is supported hence speeding up Xen clocksource.
511*4882a593Smuzhiyun 	 */
512*4882a593Smuzhiyun 	pvti = &__this_cpu_read(xen_vcpu)->time;
513*4882a593Smuzhiyun 	if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
514*4882a593Smuzhiyun 		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
515*4882a593Smuzhiyun 		xen_setup_vsyscall_time_info();
516*4882a593Smuzhiyun 	}
517*4882a593Smuzhiyun 
518*4882a593Smuzhiyun 	xen_setup_runstate_info(cpu);
519*4882a593Smuzhiyun 	xen_setup_timer(cpu);
520*4882a593Smuzhiyun 	xen_setup_cpu_clockevents();
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 	xen_time_setup_guest();
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 	if (xen_initial_domain())
525*4882a593Smuzhiyun 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun 
xen_init_time_ops(void)528*4882a593Smuzhiyun void __init xen_init_time_ops(void)
529*4882a593Smuzhiyun {
530*4882a593Smuzhiyun 	xen_sched_clock_offset = xen_clocksource_read();
531*4882a593Smuzhiyun 	pv_ops.time = xen_time_ops;
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	x86_init.timers.timer_init = xen_time_init;
534*4882a593Smuzhiyun 	x86_init.timers.setup_percpu_clockev = x86_init_noop;
535*4882a593Smuzhiyun 	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	x86_platform.calibrate_tsc = xen_tsc_khz;
538*4882a593Smuzhiyun 	x86_platform.get_wallclock = xen_get_wallclock;
539*4882a593Smuzhiyun 	/* Dom0 uses the native method to set the hardware RTC. */
540*4882a593Smuzhiyun 	if (!xen_initial_domain())
541*4882a593Smuzhiyun 		x86_platform.set_wallclock = xen_set_wallclock;
542*4882a593Smuzhiyun }
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun #ifdef CONFIG_XEN_PVHVM
xen_hvm_setup_cpu_clockevents(void)545*4882a593Smuzhiyun static void xen_hvm_setup_cpu_clockevents(void)
546*4882a593Smuzhiyun {
547*4882a593Smuzhiyun 	int cpu = smp_processor_id();
548*4882a593Smuzhiyun 	xen_setup_runstate_info(cpu);
549*4882a593Smuzhiyun 	/*
550*4882a593Smuzhiyun 	 * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
551*4882a593Smuzhiyun 	 * doing it xen_hvm_cpu_notify (which gets called by smp_init during
552*4882a593Smuzhiyun 	 * early bootup and also during CPU hotplug events).
553*4882a593Smuzhiyun 	 */
554*4882a593Smuzhiyun 	xen_setup_cpu_clockevents();
555*4882a593Smuzhiyun }
556*4882a593Smuzhiyun 
xen_hvm_init_time_ops(void)557*4882a593Smuzhiyun void __init xen_hvm_init_time_ops(void)
558*4882a593Smuzhiyun {
559*4882a593Smuzhiyun 	static bool hvm_time_initialized;
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	if (hvm_time_initialized)
562*4882a593Smuzhiyun 		return;
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 	/*
565*4882a593Smuzhiyun 	 * vector callback is needed otherwise we cannot receive interrupts
566*4882a593Smuzhiyun 	 * on cpu > 0 and at this point we don't know how many cpus are
567*4882a593Smuzhiyun 	 * available.
568*4882a593Smuzhiyun 	 */
569*4882a593Smuzhiyun 	if (!xen_have_vector_callback)
570*4882a593Smuzhiyun 		return;
571*4882a593Smuzhiyun 
572*4882a593Smuzhiyun 	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
573*4882a593Smuzhiyun 		pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
574*4882a593Smuzhiyun 		return;
575*4882a593Smuzhiyun 	}
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 	/*
578*4882a593Smuzhiyun 	 * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'.
579*4882a593Smuzhiyun 	 * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest
580*4882a593Smuzhiyun 	 * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access
581*4882a593Smuzhiyun 	 * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic.
582*4882a593Smuzhiyun 	 *
583*4882a593Smuzhiyun 	 * The xen_hvm_init_time_ops() should be called again later after
584*4882a593Smuzhiyun 	 * __this_cpu_read(xen_vcpu) is available.
585*4882a593Smuzhiyun 	 */
586*4882a593Smuzhiyun 	if (!__this_cpu_read(xen_vcpu)) {
587*4882a593Smuzhiyun 		pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
588*4882a593Smuzhiyun 			xen_vcpu_nr(0));
589*4882a593Smuzhiyun 		return;
590*4882a593Smuzhiyun 	}
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun 	xen_sched_clock_offset = xen_clocksource_read();
593*4882a593Smuzhiyun 	pv_ops.time = xen_time_ops;
594*4882a593Smuzhiyun 	x86_init.timers.setup_percpu_clockev = xen_time_init;
595*4882a593Smuzhiyun 	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 	x86_platform.calibrate_tsc = xen_tsc_khz;
598*4882a593Smuzhiyun 	x86_platform.get_wallclock = xen_get_wallclock;
599*4882a593Smuzhiyun 	x86_platform.set_wallclock = xen_set_wallclock;
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun 	hvm_time_initialized = true;
602*4882a593Smuzhiyun }
603*4882a593Smuzhiyun #endif
604*4882a593Smuzhiyun 
605*4882a593Smuzhiyun /* Kernel parameter to specify Xen timer slop */
parse_xen_timer_slop(char * ptr)606*4882a593Smuzhiyun static int __init parse_xen_timer_slop(char *ptr)
607*4882a593Smuzhiyun {
608*4882a593Smuzhiyun 	unsigned long slop = memparse(ptr, NULL);
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	xen_timerop_clockevent.min_delta_ns = slop;
611*4882a593Smuzhiyun 	xen_timerop_clockevent.min_delta_ticks = slop;
612*4882a593Smuzhiyun 	xen_vcpuop_clockevent.min_delta_ns = slop;
613*4882a593Smuzhiyun 	xen_vcpuop_clockevent.min_delta_ticks = slop;
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 	return 0;
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun early_param("xen_timer_slop", parse_xen_timer_slop);
618