1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * An implementation of the host initiated guest snapshot for Hyper-V.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2013, Microsoft, Inc.
6*4882a593Smuzhiyun * Author : K. Y. Srinivasan <kys@microsoft.com>
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <sys/types.h>
11*4882a593Smuzhiyun #include <sys/poll.h>
12*4882a593Smuzhiyun #include <sys/ioctl.h>
13*4882a593Smuzhiyun #include <sys/stat.h>
14*4882a593Smuzhiyun #include <sys/sysmacros.h>
15*4882a593Smuzhiyun #include <fcntl.h>
16*4882a593Smuzhiyun #include <stdio.h>
17*4882a593Smuzhiyun #include <mntent.h>
18*4882a593Smuzhiyun #include <stdlib.h>
19*4882a593Smuzhiyun #include <unistd.h>
20*4882a593Smuzhiyun #include <string.h>
21*4882a593Smuzhiyun #include <ctype.h>
22*4882a593Smuzhiyun #include <errno.h>
23*4882a593Smuzhiyun #include <linux/fs.h>
24*4882a593Smuzhiyun #include <linux/major.h>
25*4882a593Smuzhiyun #include <linux/hyperv.h>
26*4882a593Smuzhiyun #include <syslog.h>
27*4882a593Smuzhiyun #include <getopt.h>
28*4882a593Smuzhiyun #include <stdbool.h>
29*4882a593Smuzhiyun #include <dirent.h>
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun static bool fs_frozen;
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun /* Don't use syslog() in the function since that can cause write to disk */
vss_do_freeze(char * dir,unsigned int cmd)34*4882a593Smuzhiyun static int vss_do_freeze(char *dir, unsigned int cmd)
35*4882a593Smuzhiyun {
36*4882a593Smuzhiyun int ret, fd = open(dir, O_RDONLY);
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun if (fd < 0)
39*4882a593Smuzhiyun return 1;
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun ret = ioctl(fd, cmd, 0);
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun /*
44*4882a593Smuzhiyun * If a partition is mounted more than once, only the first
45*4882a593Smuzhiyun * FREEZE/THAW can succeed and the later ones will get
46*4882a593Smuzhiyun * EBUSY/EINVAL respectively: there could be 2 cases:
47*4882a593Smuzhiyun * 1) a user may mount the same partition to different directories
48*4882a593Smuzhiyun * by mistake or on purpose;
49*4882a593Smuzhiyun * 2) The subvolume of btrfs appears to have the same partition
50*4882a593Smuzhiyun * mounted more than once.
51*4882a593Smuzhiyun */
52*4882a593Smuzhiyun if (ret) {
53*4882a593Smuzhiyun if ((cmd == FIFREEZE && errno == EBUSY) ||
54*4882a593Smuzhiyun (cmd == FITHAW && errno == EINVAL)) {
55*4882a593Smuzhiyun close(fd);
56*4882a593Smuzhiyun return 0;
57*4882a593Smuzhiyun }
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun close(fd);
61*4882a593Smuzhiyun return !!ret;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun
is_dev_loop(const char * blkname)64*4882a593Smuzhiyun static bool is_dev_loop(const char *blkname)
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun char *buffer;
67*4882a593Smuzhiyun DIR *dir;
68*4882a593Smuzhiyun struct dirent *entry;
69*4882a593Smuzhiyun bool ret = false;
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun buffer = malloc(PATH_MAX);
72*4882a593Smuzhiyun if (!buffer) {
73*4882a593Smuzhiyun syslog(LOG_ERR, "Can't allocate memory!");
74*4882a593Smuzhiyun exit(1);
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun snprintf(buffer, PATH_MAX, "%s/loop", blkname);
78*4882a593Smuzhiyun if (!access(buffer, R_OK | X_OK)) {
79*4882a593Smuzhiyun ret = true;
80*4882a593Smuzhiyun goto free_buffer;
81*4882a593Smuzhiyun } else if (errno != ENOENT) {
82*4882a593Smuzhiyun syslog(LOG_ERR, "Can't access: %s; error:%d %s!",
83*4882a593Smuzhiyun buffer, errno, strerror(errno));
84*4882a593Smuzhiyun }
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun snprintf(buffer, PATH_MAX, "%s/slaves", blkname);
87*4882a593Smuzhiyun dir = opendir(buffer);
88*4882a593Smuzhiyun if (!dir) {
89*4882a593Smuzhiyun if (errno != ENOENT)
90*4882a593Smuzhiyun syslog(LOG_ERR, "Can't opendir: %s; error:%d %s!",
91*4882a593Smuzhiyun buffer, errno, strerror(errno));
92*4882a593Smuzhiyun goto free_buffer;
93*4882a593Smuzhiyun }
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun while ((entry = readdir(dir)) != NULL) {
96*4882a593Smuzhiyun if (strcmp(entry->d_name, ".") == 0 ||
97*4882a593Smuzhiyun strcmp(entry->d_name, "..") == 0)
98*4882a593Smuzhiyun continue;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun snprintf(buffer, PATH_MAX, "%s/slaves/%s", blkname,
101*4882a593Smuzhiyun entry->d_name);
102*4882a593Smuzhiyun if (is_dev_loop(buffer)) {
103*4882a593Smuzhiyun ret = true;
104*4882a593Smuzhiyun break;
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun closedir(dir);
108*4882a593Smuzhiyun free_buffer:
109*4882a593Smuzhiyun free(buffer);
110*4882a593Smuzhiyun return ret;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
vss_operate(int operation)113*4882a593Smuzhiyun static int vss_operate(int operation)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun char match[] = "/dev/";
116*4882a593Smuzhiyun FILE *mounts;
117*4882a593Smuzhiyun struct mntent *ent;
118*4882a593Smuzhiyun struct stat sb;
119*4882a593Smuzhiyun char errdir[1024] = {0};
120*4882a593Smuzhiyun char blkdir[23]; /* /sys/dev/block/XXX:XXX */
121*4882a593Smuzhiyun unsigned int cmd;
122*4882a593Smuzhiyun int error = 0, root_seen = 0, save_errno = 0;
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun switch (operation) {
125*4882a593Smuzhiyun case VSS_OP_FREEZE:
126*4882a593Smuzhiyun cmd = FIFREEZE;
127*4882a593Smuzhiyun break;
128*4882a593Smuzhiyun case VSS_OP_THAW:
129*4882a593Smuzhiyun cmd = FITHAW;
130*4882a593Smuzhiyun break;
131*4882a593Smuzhiyun default:
132*4882a593Smuzhiyun return -1;
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun mounts = setmntent("/proc/mounts", "r");
136*4882a593Smuzhiyun if (mounts == NULL)
137*4882a593Smuzhiyun return -1;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun while ((ent = getmntent(mounts))) {
140*4882a593Smuzhiyun if (strncmp(ent->mnt_fsname, match, strlen(match)))
141*4882a593Smuzhiyun continue;
142*4882a593Smuzhiyun if (stat(ent->mnt_fsname, &sb)) {
143*4882a593Smuzhiyun syslog(LOG_ERR, "Can't stat: %s; error:%d %s!",
144*4882a593Smuzhiyun ent->mnt_fsname, errno, strerror(errno));
145*4882a593Smuzhiyun } else {
146*4882a593Smuzhiyun sprintf(blkdir, "/sys/dev/block/%d:%d",
147*4882a593Smuzhiyun major(sb.st_rdev), minor(sb.st_rdev));
148*4882a593Smuzhiyun if (is_dev_loop(blkdir))
149*4882a593Smuzhiyun continue;
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun if (hasmntopt(ent, MNTOPT_RO) != NULL)
152*4882a593Smuzhiyun continue;
153*4882a593Smuzhiyun if (strcmp(ent->mnt_type, "vfat") == 0)
154*4882a593Smuzhiyun continue;
155*4882a593Smuzhiyun if (strcmp(ent->mnt_dir, "/") == 0) {
156*4882a593Smuzhiyun root_seen = 1;
157*4882a593Smuzhiyun continue;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun error |= vss_do_freeze(ent->mnt_dir, cmd);
160*4882a593Smuzhiyun if (operation == VSS_OP_FREEZE) {
161*4882a593Smuzhiyun if (error)
162*4882a593Smuzhiyun goto err;
163*4882a593Smuzhiyun fs_frozen = true;
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun }
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun endmntent(mounts);
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun if (root_seen) {
170*4882a593Smuzhiyun error |= vss_do_freeze("/", cmd);
171*4882a593Smuzhiyun if (operation == VSS_OP_FREEZE) {
172*4882a593Smuzhiyun if (error)
173*4882a593Smuzhiyun goto err;
174*4882a593Smuzhiyun fs_frozen = true;
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun if (operation == VSS_OP_THAW && !error)
179*4882a593Smuzhiyun fs_frozen = false;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun goto out;
182*4882a593Smuzhiyun err:
183*4882a593Smuzhiyun save_errno = errno;
184*4882a593Smuzhiyun if (ent) {
185*4882a593Smuzhiyun strncpy(errdir, ent->mnt_dir, sizeof(errdir)-1);
186*4882a593Smuzhiyun endmntent(mounts);
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun vss_operate(VSS_OP_THAW);
189*4882a593Smuzhiyun fs_frozen = false;
190*4882a593Smuzhiyun /* Call syslog after we thaw all filesystems */
191*4882a593Smuzhiyun if (ent)
192*4882a593Smuzhiyun syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s",
193*4882a593Smuzhiyun errdir, save_errno, strerror(save_errno));
194*4882a593Smuzhiyun else
195*4882a593Smuzhiyun syslog(LOG_ERR, "FREEZE of / failed; error:%d %s", save_errno,
196*4882a593Smuzhiyun strerror(save_errno));
197*4882a593Smuzhiyun out:
198*4882a593Smuzhiyun return error;
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun
print_usage(char * argv[])201*4882a593Smuzhiyun void print_usage(char *argv[])
202*4882a593Smuzhiyun {
203*4882a593Smuzhiyun fprintf(stderr, "Usage: %s [options]\n"
204*4882a593Smuzhiyun "Options are:\n"
205*4882a593Smuzhiyun " -n, --no-daemon stay in foreground, don't daemonize\n"
206*4882a593Smuzhiyun " -h, --help print this help\n", argv[0]);
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
main(int argc,char * argv[])209*4882a593Smuzhiyun int main(int argc, char *argv[])
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun int vss_fd = -1, len;
212*4882a593Smuzhiyun int error;
213*4882a593Smuzhiyun struct pollfd pfd;
214*4882a593Smuzhiyun int op;
215*4882a593Smuzhiyun struct hv_vss_msg vss_msg[1];
216*4882a593Smuzhiyun int daemonize = 1, long_index = 0, opt;
217*4882a593Smuzhiyun int in_handshake;
218*4882a593Smuzhiyun __u32 kernel_modver;
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun static struct option long_options[] = {
221*4882a593Smuzhiyun {"help", no_argument, 0, 'h' },
222*4882a593Smuzhiyun {"no-daemon", no_argument, 0, 'n' },
223*4882a593Smuzhiyun {0, 0, 0, 0 }
224*4882a593Smuzhiyun };
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun while ((opt = getopt_long(argc, argv, "hn", long_options,
227*4882a593Smuzhiyun &long_index)) != -1) {
228*4882a593Smuzhiyun switch (opt) {
229*4882a593Smuzhiyun case 'n':
230*4882a593Smuzhiyun daemonize = 0;
231*4882a593Smuzhiyun break;
232*4882a593Smuzhiyun case 'h':
233*4882a593Smuzhiyun print_usage(argv);
234*4882a593Smuzhiyun exit(0);
235*4882a593Smuzhiyun default:
236*4882a593Smuzhiyun print_usage(argv);
237*4882a593Smuzhiyun exit(EXIT_FAILURE);
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun if (daemonize && daemon(1, 0))
242*4882a593Smuzhiyun return 1;
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun openlog("Hyper-V VSS", 0, LOG_USER);
245*4882a593Smuzhiyun syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun reopen_vss_fd:
248*4882a593Smuzhiyun if (vss_fd != -1)
249*4882a593Smuzhiyun close(vss_fd);
250*4882a593Smuzhiyun if (fs_frozen) {
251*4882a593Smuzhiyun if (vss_operate(VSS_OP_THAW) || fs_frozen) {
252*4882a593Smuzhiyun syslog(LOG_ERR, "failed to thaw file system: err=%d",
253*4882a593Smuzhiyun errno);
254*4882a593Smuzhiyun exit(EXIT_FAILURE);
255*4882a593Smuzhiyun }
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun in_handshake = 1;
259*4882a593Smuzhiyun vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
260*4882a593Smuzhiyun if (vss_fd < 0) {
261*4882a593Smuzhiyun syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
262*4882a593Smuzhiyun errno, strerror(errno));
263*4882a593Smuzhiyun exit(EXIT_FAILURE);
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun /*
266*4882a593Smuzhiyun * Register ourselves with the kernel.
267*4882a593Smuzhiyun */
268*4882a593Smuzhiyun vss_msg->vss_hdr.operation = VSS_OP_REGISTER1;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
271*4882a593Smuzhiyun if (len < 0) {
272*4882a593Smuzhiyun syslog(LOG_ERR, "registration to kernel failed; error: %d %s",
273*4882a593Smuzhiyun errno, strerror(errno));
274*4882a593Smuzhiyun close(vss_fd);
275*4882a593Smuzhiyun exit(EXIT_FAILURE);
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun pfd.fd = vss_fd;
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun while (1) {
281*4882a593Smuzhiyun pfd.events = POLLIN;
282*4882a593Smuzhiyun pfd.revents = 0;
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun if (poll(&pfd, 1, -1) < 0) {
285*4882a593Smuzhiyun syslog(LOG_ERR, "poll failed; error:%d %s", errno, strerror(errno));
286*4882a593Smuzhiyun if (errno == EINVAL) {
287*4882a593Smuzhiyun close(vss_fd);
288*4882a593Smuzhiyun exit(EXIT_FAILURE);
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun else
291*4882a593Smuzhiyun continue;
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun len = read(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun if (in_handshake) {
297*4882a593Smuzhiyun if (len != sizeof(kernel_modver)) {
298*4882a593Smuzhiyun syslog(LOG_ERR, "invalid version negotiation");
299*4882a593Smuzhiyun exit(EXIT_FAILURE);
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun kernel_modver = *(__u32 *)vss_msg;
302*4882a593Smuzhiyun in_handshake = 0;
303*4882a593Smuzhiyun syslog(LOG_INFO, "VSS: kernel module version: %d",
304*4882a593Smuzhiyun kernel_modver);
305*4882a593Smuzhiyun continue;
306*4882a593Smuzhiyun }
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun if (len != sizeof(struct hv_vss_msg)) {
309*4882a593Smuzhiyun syslog(LOG_ERR, "read failed; error:%d %s",
310*4882a593Smuzhiyun errno, strerror(errno));
311*4882a593Smuzhiyun goto reopen_vss_fd;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun op = vss_msg->vss_hdr.operation;
315*4882a593Smuzhiyun error = HV_S_OK;
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun switch (op) {
318*4882a593Smuzhiyun case VSS_OP_FREEZE:
319*4882a593Smuzhiyun case VSS_OP_THAW:
320*4882a593Smuzhiyun error = vss_operate(op);
321*4882a593Smuzhiyun syslog(LOG_INFO, "VSS: op=%s: %s\n",
322*4882a593Smuzhiyun op == VSS_OP_FREEZE ? "FREEZE" : "THAW",
323*4882a593Smuzhiyun error ? "failed" : "succeeded");
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun if (error) {
326*4882a593Smuzhiyun error = HV_E_FAIL;
327*4882a593Smuzhiyun syslog(LOG_ERR, "op=%d failed!", op);
328*4882a593Smuzhiyun syslog(LOG_ERR, "report it with these files:");
329*4882a593Smuzhiyun syslog(LOG_ERR, "/etc/fstab and /proc/mounts");
330*4882a593Smuzhiyun }
331*4882a593Smuzhiyun break;
332*4882a593Smuzhiyun case VSS_OP_HOT_BACKUP:
333*4882a593Smuzhiyun syslog(LOG_INFO, "VSS: op=CHECK HOT BACKUP\n");
334*4882a593Smuzhiyun break;
335*4882a593Smuzhiyun default:
336*4882a593Smuzhiyun syslog(LOG_ERR, "Illegal op:%d\n", op);
337*4882a593Smuzhiyun }
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun /*
340*4882a593Smuzhiyun * The write() may return an error due to the faked VSS_OP_THAW
341*4882a593Smuzhiyun * message upon hibernation. Ignore the error by resetting the
342*4882a593Smuzhiyun * dev file, i.e. closing and re-opening it.
343*4882a593Smuzhiyun */
344*4882a593Smuzhiyun vss_msg->error = error;
345*4882a593Smuzhiyun len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
346*4882a593Smuzhiyun if (len != sizeof(struct hv_vss_msg)) {
347*4882a593Smuzhiyun syslog(LOG_ERR, "write failed; error: %d %s", errno,
348*4882a593Smuzhiyun strerror(errno));
349*4882a593Smuzhiyun goto reopen_vss_fd;
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun }
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun close(vss_fd);
354*4882a593Smuzhiyun exit(0);
355*4882a593Smuzhiyun }
356