1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * KVM dirty page logging test
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2018, Red Hat, Inc.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #define _GNU_SOURCE /* for program_invocation_name */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <stdio.h>
11*4882a593Smuzhiyun #include <stdlib.h>
12*4882a593Smuzhiyun #include <unistd.h>
13*4882a593Smuzhiyun #include <time.h>
14*4882a593Smuzhiyun #include <pthread.h>
15*4882a593Smuzhiyun #include <linux/bitmap.h>
16*4882a593Smuzhiyun #include <linux/bitops.h>
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun #include "test_util.h"
19*4882a593Smuzhiyun #include "kvm_util.h"
20*4882a593Smuzhiyun #include "processor.h"
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #define VCPU_ID 1
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun /* The memory slot index to track dirty pages */
25*4882a593Smuzhiyun #define TEST_MEM_SLOT_INDEX 1
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun /* Default guest test virtual memory offset */
28*4882a593Smuzhiyun #define DEFAULT_GUEST_TEST_MEM 0xc0000000
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun /* How many pages to dirty for each guest loop */
31*4882a593Smuzhiyun #define TEST_PAGES_PER_LOOP 1024
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
34*4882a593Smuzhiyun #define TEST_HOST_LOOP_N 32UL
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun /* Interval for each host loop (ms) */
37*4882a593Smuzhiyun #define TEST_HOST_LOOP_INTERVAL 10UL
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun /* Dirty bitmaps are always little endian, so we need to swap on big endian */
40*4882a593Smuzhiyun #if defined(__s390x__)
41*4882a593Smuzhiyun # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
42*4882a593Smuzhiyun # define test_bit_le(nr, addr) \
43*4882a593Smuzhiyun test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
44*4882a593Smuzhiyun # define set_bit_le(nr, addr) \
45*4882a593Smuzhiyun set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
46*4882a593Smuzhiyun # define clear_bit_le(nr, addr) \
47*4882a593Smuzhiyun clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
48*4882a593Smuzhiyun # define test_and_set_bit_le(nr, addr) \
49*4882a593Smuzhiyun test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
50*4882a593Smuzhiyun # define test_and_clear_bit_le(nr, addr) \
51*4882a593Smuzhiyun test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
52*4882a593Smuzhiyun #else
53*4882a593Smuzhiyun # define test_bit_le test_bit
54*4882a593Smuzhiyun # define set_bit_le set_bit
55*4882a593Smuzhiyun # define clear_bit_le clear_bit
56*4882a593Smuzhiyun # define test_and_set_bit_le test_and_set_bit
57*4882a593Smuzhiyun # define test_and_clear_bit_le test_and_clear_bit
58*4882a593Smuzhiyun #endif
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun /*
61*4882a593Smuzhiyun * Guest/Host shared variables. Ensure addr_gva2hva() and/or
62*4882a593Smuzhiyun * sync_global_to/from_guest() are used when accessing from
63*4882a593Smuzhiyun * the host. READ/WRITE_ONCE() should also be used with anything
64*4882a593Smuzhiyun * that may change.
65*4882a593Smuzhiyun */
66*4882a593Smuzhiyun static uint64_t host_page_size;
67*4882a593Smuzhiyun static uint64_t guest_page_size;
68*4882a593Smuzhiyun static uint64_t guest_num_pages;
69*4882a593Smuzhiyun static uint64_t random_array[TEST_PAGES_PER_LOOP];
70*4882a593Smuzhiyun static uint64_t iteration;
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun /*
73*4882a593Smuzhiyun * Guest physical memory offset of the testing memory slot.
74*4882a593Smuzhiyun * This will be set to the topmost valid physical address minus
75*4882a593Smuzhiyun * the test memory size.
76*4882a593Smuzhiyun */
77*4882a593Smuzhiyun static uint64_t guest_test_phys_mem;
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun /*
80*4882a593Smuzhiyun * Guest virtual memory offset of the testing memory slot.
81*4882a593Smuzhiyun * Must not conflict with identity mapped test code.
82*4882a593Smuzhiyun */
83*4882a593Smuzhiyun static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun /*
86*4882a593Smuzhiyun * Continuously write to the first 8 bytes of a random pages within
87*4882a593Smuzhiyun * the testing memory region.
88*4882a593Smuzhiyun */
guest_code(void)89*4882a593Smuzhiyun static void guest_code(void)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun uint64_t addr;
92*4882a593Smuzhiyun int i;
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun /*
95*4882a593Smuzhiyun * On s390x, all pages of a 1M segment are initially marked as dirty
96*4882a593Smuzhiyun * when a page of the segment is written to for the very first time.
97*4882a593Smuzhiyun * To compensate this specialty in this test, we need to touch all
98*4882a593Smuzhiyun * pages during the first iteration.
99*4882a593Smuzhiyun */
100*4882a593Smuzhiyun for (i = 0; i < guest_num_pages; i++) {
101*4882a593Smuzhiyun addr = guest_test_virt_mem + i * guest_page_size;
102*4882a593Smuzhiyun *(uint64_t *)addr = READ_ONCE(iteration);
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun while (true) {
106*4882a593Smuzhiyun for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
107*4882a593Smuzhiyun addr = guest_test_virt_mem;
108*4882a593Smuzhiyun addr += (READ_ONCE(random_array[i]) % guest_num_pages)
109*4882a593Smuzhiyun * guest_page_size;
110*4882a593Smuzhiyun addr &= ~(host_page_size - 1);
111*4882a593Smuzhiyun *(uint64_t *)addr = READ_ONCE(iteration);
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun /* Tell the host that we need more random numbers */
115*4882a593Smuzhiyun GUEST_SYNC(1);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun }
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun /* Host variables */
120*4882a593Smuzhiyun static bool host_quit;
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun /* Points to the test VM memory region on which we track dirty logs */
123*4882a593Smuzhiyun static void *host_test_mem;
124*4882a593Smuzhiyun static uint64_t host_num_pages;
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun /* For statistics only */
127*4882a593Smuzhiyun static uint64_t host_dirty_count;
128*4882a593Smuzhiyun static uint64_t host_clear_count;
129*4882a593Smuzhiyun static uint64_t host_track_next_count;
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun enum log_mode_t {
132*4882a593Smuzhiyun /* Only use KVM_GET_DIRTY_LOG for logging */
133*4882a593Smuzhiyun LOG_MODE_DIRTY_LOG = 0,
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
136*4882a593Smuzhiyun LOG_MODE_CLEAR_LOG = 1,
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun LOG_MODE_NUM,
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun /* Run all supported modes */
141*4882a593Smuzhiyun LOG_MODE_ALL = LOG_MODE_NUM,
142*4882a593Smuzhiyun };
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun /* Mode of logging to test. Default is to run all supported modes */
145*4882a593Smuzhiyun static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
146*4882a593Smuzhiyun /* Logging mode for current run */
147*4882a593Smuzhiyun static enum log_mode_t host_log_mode;
148*4882a593Smuzhiyun
clear_log_supported(void)149*4882a593Smuzhiyun static bool clear_log_supported(void)
150*4882a593Smuzhiyun {
151*4882a593Smuzhiyun return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun
clear_log_create_vm_done(struct kvm_vm * vm)154*4882a593Smuzhiyun static void clear_log_create_vm_done(struct kvm_vm *vm)
155*4882a593Smuzhiyun {
156*4882a593Smuzhiyun struct kvm_enable_cap cap = {};
157*4882a593Smuzhiyun u64 manual_caps;
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
160*4882a593Smuzhiyun TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
161*4882a593Smuzhiyun manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
162*4882a593Smuzhiyun KVM_DIRTY_LOG_INITIALLY_SET);
163*4882a593Smuzhiyun cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
164*4882a593Smuzhiyun cap.args[0] = manual_caps;
165*4882a593Smuzhiyun vm_enable_cap(vm, &cap);
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun
dirty_log_collect_dirty_pages(struct kvm_vm * vm,int slot,void * bitmap,uint32_t num_pages)168*4882a593Smuzhiyun static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
169*4882a593Smuzhiyun void *bitmap, uint32_t num_pages)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun kvm_vm_get_dirty_log(vm, slot, bitmap);
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
clear_log_collect_dirty_pages(struct kvm_vm * vm,int slot,void * bitmap,uint32_t num_pages)174*4882a593Smuzhiyun static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
175*4882a593Smuzhiyun void *bitmap, uint32_t num_pages)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun kvm_vm_get_dirty_log(vm, slot, bitmap);
178*4882a593Smuzhiyun kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
179*4882a593Smuzhiyun }
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun struct log_mode {
182*4882a593Smuzhiyun const char *name;
183*4882a593Smuzhiyun /* Return true if this mode is supported, otherwise false */
184*4882a593Smuzhiyun bool (*supported)(void);
185*4882a593Smuzhiyun /* Hook when the vm creation is done (before vcpu creation) */
186*4882a593Smuzhiyun void (*create_vm_done)(struct kvm_vm *vm);
187*4882a593Smuzhiyun /* Hook to collect the dirty pages into the bitmap provided */
188*4882a593Smuzhiyun void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
189*4882a593Smuzhiyun void *bitmap, uint32_t num_pages);
190*4882a593Smuzhiyun } log_modes[LOG_MODE_NUM] = {
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun .name = "dirty-log",
193*4882a593Smuzhiyun .collect_dirty_pages = dirty_log_collect_dirty_pages,
194*4882a593Smuzhiyun },
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun .name = "clear-log",
197*4882a593Smuzhiyun .supported = clear_log_supported,
198*4882a593Smuzhiyun .create_vm_done = clear_log_create_vm_done,
199*4882a593Smuzhiyun .collect_dirty_pages = clear_log_collect_dirty_pages,
200*4882a593Smuzhiyun },
201*4882a593Smuzhiyun };
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun /*
204*4882a593Smuzhiyun * We use this bitmap to track some pages that should have its dirty
205*4882a593Smuzhiyun * bit set in the _next_ iteration. For example, if we detected the
206*4882a593Smuzhiyun * page value changed to current iteration but at the same time the
207*4882a593Smuzhiyun * page bit is cleared in the latest bitmap, then the system must
208*4882a593Smuzhiyun * report that write in the next get dirty log call.
209*4882a593Smuzhiyun */
210*4882a593Smuzhiyun static unsigned long *host_bmap_track;
211*4882a593Smuzhiyun
log_modes_dump(void)212*4882a593Smuzhiyun static void log_modes_dump(void)
213*4882a593Smuzhiyun {
214*4882a593Smuzhiyun int i;
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun printf("all");
217*4882a593Smuzhiyun for (i = 0; i < LOG_MODE_NUM; i++)
218*4882a593Smuzhiyun printf(", %s", log_modes[i].name);
219*4882a593Smuzhiyun printf("\n");
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun
log_mode_supported(void)222*4882a593Smuzhiyun static bool log_mode_supported(void)
223*4882a593Smuzhiyun {
224*4882a593Smuzhiyun struct log_mode *mode = &log_modes[host_log_mode];
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun if (mode->supported)
227*4882a593Smuzhiyun return mode->supported();
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun return true;
230*4882a593Smuzhiyun }
231*4882a593Smuzhiyun
log_mode_create_vm_done(struct kvm_vm * vm)232*4882a593Smuzhiyun static void log_mode_create_vm_done(struct kvm_vm *vm)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun struct log_mode *mode = &log_modes[host_log_mode];
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun if (mode->create_vm_done)
237*4882a593Smuzhiyun mode->create_vm_done(vm);
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun
log_mode_collect_dirty_pages(struct kvm_vm * vm,int slot,void * bitmap,uint32_t num_pages)240*4882a593Smuzhiyun static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
241*4882a593Smuzhiyun void *bitmap, uint32_t num_pages)
242*4882a593Smuzhiyun {
243*4882a593Smuzhiyun struct log_mode *mode = &log_modes[host_log_mode];
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun TEST_ASSERT(mode->collect_dirty_pages != NULL,
246*4882a593Smuzhiyun "collect_dirty_pages() is required for any log mode!");
247*4882a593Smuzhiyun mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
248*4882a593Smuzhiyun }
249*4882a593Smuzhiyun
generate_random_array(uint64_t * guest_array,uint64_t size)250*4882a593Smuzhiyun static void generate_random_array(uint64_t *guest_array, uint64_t size)
251*4882a593Smuzhiyun {
252*4882a593Smuzhiyun uint64_t i;
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun for (i = 0; i < size; i++)
255*4882a593Smuzhiyun guest_array[i] = random();
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
vcpu_worker(void * data)258*4882a593Smuzhiyun static void *vcpu_worker(void *data)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun int ret;
261*4882a593Smuzhiyun struct kvm_vm *vm = data;
262*4882a593Smuzhiyun uint64_t *guest_array;
263*4882a593Smuzhiyun uint64_t pages_count = 0;
264*4882a593Smuzhiyun struct kvm_run *run;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun run = vcpu_state(vm, VCPU_ID);
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
269*4882a593Smuzhiyun generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun while (!READ_ONCE(host_quit)) {
272*4882a593Smuzhiyun /* Let the guest dirty the random pages */
273*4882a593Smuzhiyun ret = _vcpu_run(vm, VCPU_ID);
274*4882a593Smuzhiyun TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
275*4882a593Smuzhiyun if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
276*4882a593Smuzhiyun pages_count += TEST_PAGES_PER_LOOP;
277*4882a593Smuzhiyun generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
278*4882a593Smuzhiyun } else {
279*4882a593Smuzhiyun TEST_FAIL("Invalid guest sync status: "
280*4882a593Smuzhiyun "exit_reason=%s\n",
281*4882a593Smuzhiyun exit_reason_str(run->exit_reason));
282*4882a593Smuzhiyun }
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun pr_info("Dirtied %"PRIu64" pages\n", pages_count);
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun return NULL;
288*4882a593Smuzhiyun }
289*4882a593Smuzhiyun
vm_dirty_log_verify(enum vm_guest_mode mode,unsigned long * bmap)290*4882a593Smuzhiyun static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
291*4882a593Smuzhiyun {
292*4882a593Smuzhiyun uint64_t step = vm_num_host_pages(mode, 1);
293*4882a593Smuzhiyun uint64_t page;
294*4882a593Smuzhiyun uint64_t *value_ptr;
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun for (page = 0; page < host_num_pages; page += step) {
297*4882a593Smuzhiyun value_ptr = host_test_mem + page * host_page_size;
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun /* If this is a special page that we were tracking... */
300*4882a593Smuzhiyun if (test_and_clear_bit_le(page, host_bmap_track)) {
301*4882a593Smuzhiyun host_track_next_count++;
302*4882a593Smuzhiyun TEST_ASSERT(test_bit_le(page, bmap),
303*4882a593Smuzhiyun "Page %"PRIu64" should have its dirty bit "
304*4882a593Smuzhiyun "set in this iteration but it is missing",
305*4882a593Smuzhiyun page);
306*4882a593Smuzhiyun }
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun if (test_and_clear_bit_le(page, bmap)) {
309*4882a593Smuzhiyun host_dirty_count++;
310*4882a593Smuzhiyun /*
311*4882a593Smuzhiyun * If the bit is set, the value written onto
312*4882a593Smuzhiyun * the corresponding page should be either the
313*4882a593Smuzhiyun * previous iteration number or the current one.
314*4882a593Smuzhiyun */
315*4882a593Smuzhiyun TEST_ASSERT(*value_ptr == iteration ||
316*4882a593Smuzhiyun *value_ptr == iteration - 1,
317*4882a593Smuzhiyun "Set page %"PRIu64" value %"PRIu64
318*4882a593Smuzhiyun " incorrect (iteration=%"PRIu64")",
319*4882a593Smuzhiyun page, *value_ptr, iteration);
320*4882a593Smuzhiyun } else {
321*4882a593Smuzhiyun host_clear_count++;
322*4882a593Smuzhiyun /*
323*4882a593Smuzhiyun * If cleared, the value written can be any
324*4882a593Smuzhiyun * value smaller or equals to the iteration
325*4882a593Smuzhiyun * number. Note that the value can be exactly
326*4882a593Smuzhiyun * (iteration-1) if that write can happen
327*4882a593Smuzhiyun * like this:
328*4882a593Smuzhiyun *
329*4882a593Smuzhiyun * (1) increase loop count to "iteration-1"
330*4882a593Smuzhiyun * (2) write to page P happens (with value
331*4882a593Smuzhiyun * "iteration-1")
332*4882a593Smuzhiyun * (3) get dirty log for "iteration-1"; we'll
333*4882a593Smuzhiyun * see that page P bit is set (dirtied),
334*4882a593Smuzhiyun * and not set the bit in host_bmap_track
335*4882a593Smuzhiyun * (4) increase loop count to "iteration"
336*4882a593Smuzhiyun * (which is current iteration)
337*4882a593Smuzhiyun * (5) get dirty log for current iteration,
338*4882a593Smuzhiyun * we'll see that page P is cleared, with
339*4882a593Smuzhiyun * value "iteration-1".
340*4882a593Smuzhiyun */
341*4882a593Smuzhiyun TEST_ASSERT(*value_ptr <= iteration,
342*4882a593Smuzhiyun "Clear page %"PRIu64" value %"PRIu64
343*4882a593Smuzhiyun " incorrect (iteration=%"PRIu64")",
344*4882a593Smuzhiyun page, *value_ptr, iteration);
345*4882a593Smuzhiyun if (*value_ptr == iteration) {
346*4882a593Smuzhiyun /*
347*4882a593Smuzhiyun * This page is _just_ modified; it
348*4882a593Smuzhiyun * should report its dirtyness in the
349*4882a593Smuzhiyun * next run
350*4882a593Smuzhiyun */
351*4882a593Smuzhiyun set_bit_le(page, host_bmap_track);
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun }
354*4882a593Smuzhiyun }
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun
create_vm(enum vm_guest_mode mode,uint32_t vcpuid,uint64_t extra_mem_pages,void * guest_code)357*4882a593Smuzhiyun static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
358*4882a593Smuzhiyun uint64_t extra_mem_pages, void *guest_code)
359*4882a593Smuzhiyun {
360*4882a593Smuzhiyun struct kvm_vm *vm;
361*4882a593Smuzhiyun uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
366*4882a593Smuzhiyun kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
367*4882a593Smuzhiyun #ifdef __x86_64__
368*4882a593Smuzhiyun vm_create_irqchip(vm);
369*4882a593Smuzhiyun #endif
370*4882a593Smuzhiyun log_mode_create_vm_done(vm);
371*4882a593Smuzhiyun vm_vcpu_add_default(vm, vcpuid, guest_code);
372*4882a593Smuzhiyun return vm;
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun
375*4882a593Smuzhiyun #define DIRTY_MEM_BITS 30 /* 1G */
376*4882a593Smuzhiyun #define PAGE_SHIFT_4K 12
377*4882a593Smuzhiyun
run_test(enum vm_guest_mode mode,unsigned long iterations,unsigned long interval,uint64_t phys_offset)378*4882a593Smuzhiyun static void run_test(enum vm_guest_mode mode, unsigned long iterations,
379*4882a593Smuzhiyun unsigned long interval, uint64_t phys_offset)
380*4882a593Smuzhiyun {
381*4882a593Smuzhiyun pthread_t vcpu_thread;
382*4882a593Smuzhiyun struct kvm_vm *vm;
383*4882a593Smuzhiyun unsigned long *bmap;
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun if (!log_mode_supported()) {
386*4882a593Smuzhiyun print_skip("Log mode '%s' not supported",
387*4882a593Smuzhiyun log_modes[host_log_mode].name);
388*4882a593Smuzhiyun return;
389*4882a593Smuzhiyun }
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun /*
392*4882a593Smuzhiyun * We reserve page table for 2 times of extra dirty mem which
393*4882a593Smuzhiyun * will definitely cover the original (1G+) test range. Here
394*4882a593Smuzhiyun * we do the calculation with 4K page size which is the
395*4882a593Smuzhiyun * smallest so the page number will be enough for all archs
396*4882a593Smuzhiyun * (e.g., 64K page size guest will need even less memory for
397*4882a593Smuzhiyun * page tables).
398*4882a593Smuzhiyun */
399*4882a593Smuzhiyun vm = create_vm(mode, VCPU_ID,
400*4882a593Smuzhiyun 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
401*4882a593Smuzhiyun guest_code);
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun guest_page_size = vm_get_page_size(vm);
404*4882a593Smuzhiyun /*
405*4882a593Smuzhiyun * A little more than 1G of guest page sized pages. Cover the
406*4882a593Smuzhiyun * case where the size is not aligned to 64 pages.
407*4882a593Smuzhiyun */
408*4882a593Smuzhiyun guest_num_pages = (1ul << (DIRTY_MEM_BITS -
409*4882a593Smuzhiyun vm_get_page_shift(vm))) + 3;
410*4882a593Smuzhiyun guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun host_page_size = getpagesize();
413*4882a593Smuzhiyun host_num_pages = vm_num_host_pages(mode, guest_num_pages);
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun if (!phys_offset) {
416*4882a593Smuzhiyun guest_test_phys_mem = (vm_get_max_gfn(vm) -
417*4882a593Smuzhiyun guest_num_pages) * guest_page_size;
418*4882a593Smuzhiyun guest_test_phys_mem &= ~(host_page_size - 1);
419*4882a593Smuzhiyun } else {
420*4882a593Smuzhiyun guest_test_phys_mem = phys_offset;
421*4882a593Smuzhiyun }
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun #ifdef __s390x__
424*4882a593Smuzhiyun /* Align to 1M (segment size) */
425*4882a593Smuzhiyun guest_test_phys_mem &= ~((1 << 20) - 1);
426*4882a593Smuzhiyun #endif
427*4882a593Smuzhiyun
428*4882a593Smuzhiyun pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun bmap = bitmap_alloc(host_num_pages);
431*4882a593Smuzhiyun host_bmap_track = bitmap_alloc(host_num_pages);
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun /* Add an extra memory slot for testing dirty logging */
434*4882a593Smuzhiyun vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
435*4882a593Smuzhiyun guest_test_phys_mem,
436*4882a593Smuzhiyun TEST_MEM_SLOT_INDEX,
437*4882a593Smuzhiyun guest_num_pages,
438*4882a593Smuzhiyun KVM_MEM_LOG_DIRTY_PAGES);
439*4882a593Smuzhiyun
440*4882a593Smuzhiyun /* Do mapping for the dirty track memory slot */
441*4882a593Smuzhiyun virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun /* Cache the HVA pointer of the region */
444*4882a593Smuzhiyun host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
445*4882a593Smuzhiyun
446*4882a593Smuzhiyun #ifdef __x86_64__
447*4882a593Smuzhiyun vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
448*4882a593Smuzhiyun #endif
449*4882a593Smuzhiyun ucall_init(vm, NULL);
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun /* Export the shared variables to the guest */
452*4882a593Smuzhiyun sync_global_to_guest(vm, host_page_size);
453*4882a593Smuzhiyun sync_global_to_guest(vm, guest_page_size);
454*4882a593Smuzhiyun sync_global_to_guest(vm, guest_test_virt_mem);
455*4882a593Smuzhiyun sync_global_to_guest(vm, guest_num_pages);
456*4882a593Smuzhiyun
457*4882a593Smuzhiyun /* Start the iterations */
458*4882a593Smuzhiyun iteration = 1;
459*4882a593Smuzhiyun sync_global_to_guest(vm, iteration);
460*4882a593Smuzhiyun host_quit = false;
461*4882a593Smuzhiyun host_dirty_count = 0;
462*4882a593Smuzhiyun host_clear_count = 0;
463*4882a593Smuzhiyun host_track_next_count = 0;
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun while (iteration < iterations) {
468*4882a593Smuzhiyun /* Give the vcpu thread some time to dirty some pages */
469*4882a593Smuzhiyun usleep(interval * 1000);
470*4882a593Smuzhiyun log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
471*4882a593Smuzhiyun bmap, host_num_pages);
472*4882a593Smuzhiyun vm_dirty_log_verify(mode, bmap);
473*4882a593Smuzhiyun iteration++;
474*4882a593Smuzhiyun sync_global_to_guest(vm, iteration);
475*4882a593Smuzhiyun }
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun /* Tell the vcpu thread to quit */
478*4882a593Smuzhiyun host_quit = true;
479*4882a593Smuzhiyun pthread_join(vcpu_thread, NULL);
480*4882a593Smuzhiyun
481*4882a593Smuzhiyun pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
482*4882a593Smuzhiyun "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
483*4882a593Smuzhiyun host_track_next_count);
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun free(bmap);
486*4882a593Smuzhiyun free(host_bmap_track);
487*4882a593Smuzhiyun ucall_uninit(vm);
488*4882a593Smuzhiyun kvm_vm_free(vm);
489*4882a593Smuzhiyun }
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun struct guest_mode {
492*4882a593Smuzhiyun bool supported;
493*4882a593Smuzhiyun bool enabled;
494*4882a593Smuzhiyun };
495*4882a593Smuzhiyun static struct guest_mode guest_modes[NUM_VM_MODES];
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun #define guest_mode_init(mode, supported, enabled) ({ \
498*4882a593Smuzhiyun guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
499*4882a593Smuzhiyun })
500*4882a593Smuzhiyun
help(char * name)501*4882a593Smuzhiyun static void help(char *name)
502*4882a593Smuzhiyun {
503*4882a593Smuzhiyun int i;
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun puts("");
506*4882a593Smuzhiyun printf("usage: %s [-h] [-i iterations] [-I interval] "
507*4882a593Smuzhiyun "[-p offset] [-m mode]\n", name);
508*4882a593Smuzhiyun puts("");
509*4882a593Smuzhiyun printf(" -i: specify iteration counts (default: %"PRIu64")\n",
510*4882a593Smuzhiyun TEST_HOST_LOOP_N);
511*4882a593Smuzhiyun printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
512*4882a593Smuzhiyun TEST_HOST_LOOP_INTERVAL);
513*4882a593Smuzhiyun printf(" -p: specify guest physical test memory offset\n"
514*4882a593Smuzhiyun " Warning: a low offset can conflict with the loaded test code.\n");
515*4882a593Smuzhiyun printf(" -M: specify the host logging mode "
516*4882a593Smuzhiyun "(default: run all log modes). Supported modes: \n\t");
517*4882a593Smuzhiyun log_modes_dump();
518*4882a593Smuzhiyun printf(" -m: specify the guest mode ID to test "
519*4882a593Smuzhiyun "(default: test all supported modes)\n"
520*4882a593Smuzhiyun " This option may be used multiple times.\n"
521*4882a593Smuzhiyun " Guest mode IDs:\n");
522*4882a593Smuzhiyun for (i = 0; i < NUM_VM_MODES; ++i) {
523*4882a593Smuzhiyun printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
524*4882a593Smuzhiyun guest_modes[i].supported ? " (supported)" : "");
525*4882a593Smuzhiyun }
526*4882a593Smuzhiyun puts("");
527*4882a593Smuzhiyun exit(0);
528*4882a593Smuzhiyun }
529*4882a593Smuzhiyun
main(int argc,char * argv[])530*4882a593Smuzhiyun int main(int argc, char *argv[])
531*4882a593Smuzhiyun {
532*4882a593Smuzhiyun unsigned long iterations = TEST_HOST_LOOP_N;
533*4882a593Smuzhiyun unsigned long interval = TEST_HOST_LOOP_INTERVAL;
534*4882a593Smuzhiyun bool mode_selected = false;
535*4882a593Smuzhiyun uint64_t phys_offset = 0;
536*4882a593Smuzhiyun unsigned int mode;
537*4882a593Smuzhiyun int opt, i, j;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun #ifdef __x86_64__
540*4882a593Smuzhiyun guest_mode_init(VM_MODE_PXXV48_4K, true, true);
541*4882a593Smuzhiyun #endif
542*4882a593Smuzhiyun #ifdef __aarch64__
543*4882a593Smuzhiyun guest_mode_init(VM_MODE_P40V48_4K, true, true);
544*4882a593Smuzhiyun guest_mode_init(VM_MODE_P40V48_64K, true, true);
545*4882a593Smuzhiyun
546*4882a593Smuzhiyun {
547*4882a593Smuzhiyun unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun if (limit >= 52)
550*4882a593Smuzhiyun guest_mode_init(VM_MODE_P52V48_64K, true, true);
551*4882a593Smuzhiyun if (limit >= 48) {
552*4882a593Smuzhiyun guest_mode_init(VM_MODE_P48V48_4K, true, true);
553*4882a593Smuzhiyun guest_mode_init(VM_MODE_P48V48_64K, true, true);
554*4882a593Smuzhiyun }
555*4882a593Smuzhiyun }
556*4882a593Smuzhiyun #endif
557*4882a593Smuzhiyun #ifdef __s390x__
558*4882a593Smuzhiyun guest_mode_init(VM_MODE_P40V48_4K, true, true);
559*4882a593Smuzhiyun #endif
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
562*4882a593Smuzhiyun switch (opt) {
563*4882a593Smuzhiyun case 'i':
564*4882a593Smuzhiyun iterations = strtol(optarg, NULL, 10);
565*4882a593Smuzhiyun break;
566*4882a593Smuzhiyun case 'I':
567*4882a593Smuzhiyun interval = strtol(optarg, NULL, 10);
568*4882a593Smuzhiyun break;
569*4882a593Smuzhiyun case 'p':
570*4882a593Smuzhiyun phys_offset = strtoull(optarg, NULL, 0);
571*4882a593Smuzhiyun break;
572*4882a593Smuzhiyun case 'm':
573*4882a593Smuzhiyun if (!mode_selected) {
574*4882a593Smuzhiyun for (i = 0; i < NUM_VM_MODES; ++i)
575*4882a593Smuzhiyun guest_modes[i].enabled = false;
576*4882a593Smuzhiyun mode_selected = true;
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun mode = strtoul(optarg, NULL, 10);
579*4882a593Smuzhiyun TEST_ASSERT(mode < NUM_VM_MODES,
580*4882a593Smuzhiyun "Guest mode ID %d too big", mode);
581*4882a593Smuzhiyun guest_modes[mode].enabled = true;
582*4882a593Smuzhiyun break;
583*4882a593Smuzhiyun case 'M':
584*4882a593Smuzhiyun if (!strcmp(optarg, "all")) {
585*4882a593Smuzhiyun host_log_mode_option = LOG_MODE_ALL;
586*4882a593Smuzhiyun break;
587*4882a593Smuzhiyun }
588*4882a593Smuzhiyun for (i = 0; i < LOG_MODE_NUM; i++) {
589*4882a593Smuzhiyun if (!strcmp(optarg, log_modes[i].name)) {
590*4882a593Smuzhiyun pr_info("Setting log mode to: '%s'\n",
591*4882a593Smuzhiyun optarg);
592*4882a593Smuzhiyun host_log_mode_option = i;
593*4882a593Smuzhiyun break;
594*4882a593Smuzhiyun }
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun if (i == LOG_MODE_NUM) {
597*4882a593Smuzhiyun printf("Log mode '%s' invalid. Please choose "
598*4882a593Smuzhiyun "from: ", optarg);
599*4882a593Smuzhiyun log_modes_dump();
600*4882a593Smuzhiyun exit(1);
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun break;
603*4882a593Smuzhiyun case 'h':
604*4882a593Smuzhiyun default:
605*4882a593Smuzhiyun help(argv[0]);
606*4882a593Smuzhiyun break;
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
611*4882a593Smuzhiyun TEST_ASSERT(interval > 0, "Interval must be greater than zero");
612*4882a593Smuzhiyun
613*4882a593Smuzhiyun pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
614*4882a593Smuzhiyun iterations, interval);
615*4882a593Smuzhiyun
616*4882a593Smuzhiyun srandom(time(0));
617*4882a593Smuzhiyun
618*4882a593Smuzhiyun for (i = 0; i < NUM_VM_MODES; ++i) {
619*4882a593Smuzhiyun if (!guest_modes[i].enabled)
620*4882a593Smuzhiyun continue;
621*4882a593Smuzhiyun TEST_ASSERT(guest_modes[i].supported,
622*4882a593Smuzhiyun "Guest mode ID %d (%s) not supported.",
623*4882a593Smuzhiyun i, vm_guest_mode_string(i));
624*4882a593Smuzhiyun if (host_log_mode_option == LOG_MODE_ALL) {
625*4882a593Smuzhiyun /* Run each log mode */
626*4882a593Smuzhiyun for (j = 0; j < LOG_MODE_NUM; j++) {
627*4882a593Smuzhiyun pr_info("Testing Log Mode '%s'\n",
628*4882a593Smuzhiyun log_modes[j].name);
629*4882a593Smuzhiyun host_log_mode = j;
630*4882a593Smuzhiyun run_test(i, iterations, interval, phys_offset);
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun } else {
633*4882a593Smuzhiyun host_log_mode = host_log_mode_option;
634*4882a593Smuzhiyun run_test(i, iterations, interval, phys_offset);
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun }
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun return 0;
639*4882a593Smuzhiyun }
640