1*4882a593Smuzhiyun #define _GNU_SOURCE
2*4882a593Smuzhiyun #include <fcntl.h>
3*4882a593Smuzhiyun #include <limits.h>
4*4882a593Smuzhiyun #include <signal.h>
5*4882a593Smuzhiyun #include <stdio.h>
6*4882a593Smuzhiyun #include <stdlib.h>
7*4882a593Smuzhiyun #include <stdbool.h>
8*4882a593Smuzhiyun #include <string.h>
9*4882a593Smuzhiyun #include <unistd.h>
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include <sys/mman.h>
12*4882a593Smuzhiyun #include <sys/wait.h>
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #ifndef MADV_PAGEOUT
15*4882a593Smuzhiyun #define MADV_PAGEOUT 21
16*4882a593Smuzhiyun #endif
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun #define BASE_ADDR ((void *)(1UL << 30))
19*4882a593Smuzhiyun static unsigned long hpage_pmd_size;
20*4882a593Smuzhiyun static unsigned long page_size;
21*4882a593Smuzhiyun static int hpage_pmd_nr;
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24*4882a593Smuzhiyun #define PID_SMAPS "/proc/self/smaps"
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun enum thp_enabled {
27*4882a593Smuzhiyun THP_ALWAYS,
28*4882a593Smuzhiyun THP_MADVISE,
29*4882a593Smuzhiyun THP_NEVER,
30*4882a593Smuzhiyun };
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun static const char *thp_enabled_strings[] = {
33*4882a593Smuzhiyun "always",
34*4882a593Smuzhiyun "madvise",
35*4882a593Smuzhiyun "never",
36*4882a593Smuzhiyun NULL
37*4882a593Smuzhiyun };
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun enum thp_defrag {
40*4882a593Smuzhiyun THP_DEFRAG_ALWAYS,
41*4882a593Smuzhiyun THP_DEFRAG_DEFER,
42*4882a593Smuzhiyun THP_DEFRAG_DEFER_MADVISE,
43*4882a593Smuzhiyun THP_DEFRAG_MADVISE,
44*4882a593Smuzhiyun THP_DEFRAG_NEVER,
45*4882a593Smuzhiyun };
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun static const char *thp_defrag_strings[] = {
48*4882a593Smuzhiyun "always",
49*4882a593Smuzhiyun "defer",
50*4882a593Smuzhiyun "defer+madvise",
51*4882a593Smuzhiyun "madvise",
52*4882a593Smuzhiyun "never",
53*4882a593Smuzhiyun NULL
54*4882a593Smuzhiyun };
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun enum shmem_enabled {
57*4882a593Smuzhiyun SHMEM_ALWAYS,
58*4882a593Smuzhiyun SHMEM_WITHIN_SIZE,
59*4882a593Smuzhiyun SHMEM_ADVISE,
60*4882a593Smuzhiyun SHMEM_NEVER,
61*4882a593Smuzhiyun SHMEM_DENY,
62*4882a593Smuzhiyun SHMEM_FORCE,
63*4882a593Smuzhiyun };
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun static const char *shmem_enabled_strings[] = {
66*4882a593Smuzhiyun "always",
67*4882a593Smuzhiyun "within_size",
68*4882a593Smuzhiyun "advise",
69*4882a593Smuzhiyun "never",
70*4882a593Smuzhiyun "deny",
71*4882a593Smuzhiyun "force",
72*4882a593Smuzhiyun NULL
73*4882a593Smuzhiyun };
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun struct khugepaged_settings {
76*4882a593Smuzhiyun bool defrag;
77*4882a593Smuzhiyun unsigned int alloc_sleep_millisecs;
78*4882a593Smuzhiyun unsigned int scan_sleep_millisecs;
79*4882a593Smuzhiyun unsigned int max_ptes_none;
80*4882a593Smuzhiyun unsigned int max_ptes_swap;
81*4882a593Smuzhiyun unsigned int max_ptes_shared;
82*4882a593Smuzhiyun unsigned long pages_to_scan;
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun struct settings {
86*4882a593Smuzhiyun enum thp_enabled thp_enabled;
87*4882a593Smuzhiyun enum thp_defrag thp_defrag;
88*4882a593Smuzhiyun enum shmem_enabled shmem_enabled;
89*4882a593Smuzhiyun bool debug_cow;
90*4882a593Smuzhiyun bool use_zero_page;
91*4882a593Smuzhiyun struct khugepaged_settings khugepaged;
92*4882a593Smuzhiyun };
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun static struct settings default_settings = {
95*4882a593Smuzhiyun .thp_enabled = THP_MADVISE,
96*4882a593Smuzhiyun .thp_defrag = THP_DEFRAG_ALWAYS,
97*4882a593Smuzhiyun .shmem_enabled = SHMEM_NEVER,
98*4882a593Smuzhiyun .debug_cow = 0,
99*4882a593Smuzhiyun .use_zero_page = 0,
100*4882a593Smuzhiyun .khugepaged = {
101*4882a593Smuzhiyun .defrag = 1,
102*4882a593Smuzhiyun .alloc_sleep_millisecs = 10,
103*4882a593Smuzhiyun .scan_sleep_millisecs = 10,
104*4882a593Smuzhiyun },
105*4882a593Smuzhiyun };
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun static struct settings saved_settings;
108*4882a593Smuzhiyun static bool skip_settings_restore;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun static int exit_status;
111*4882a593Smuzhiyun
success(const char * msg)112*4882a593Smuzhiyun static void success(const char *msg)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun printf(" \e[32m%s\e[0m\n", msg);
115*4882a593Smuzhiyun }
116*4882a593Smuzhiyun
fail(const char * msg)117*4882a593Smuzhiyun static void fail(const char *msg)
118*4882a593Smuzhiyun {
119*4882a593Smuzhiyun printf(" \e[31m%s\e[0m\n", msg);
120*4882a593Smuzhiyun exit_status++;
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun
read_file(const char * path,char * buf,size_t buflen)123*4882a593Smuzhiyun static int read_file(const char *path, char *buf, size_t buflen)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun int fd;
126*4882a593Smuzhiyun ssize_t numread;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun fd = open(path, O_RDONLY);
129*4882a593Smuzhiyun if (fd == -1)
130*4882a593Smuzhiyun return 0;
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun numread = read(fd, buf, buflen - 1);
133*4882a593Smuzhiyun if (numread < 1) {
134*4882a593Smuzhiyun close(fd);
135*4882a593Smuzhiyun return 0;
136*4882a593Smuzhiyun }
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun buf[numread] = '\0';
139*4882a593Smuzhiyun close(fd);
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun return (unsigned int) numread;
142*4882a593Smuzhiyun }
143*4882a593Smuzhiyun
write_file(const char * path,const char * buf,size_t buflen)144*4882a593Smuzhiyun static int write_file(const char *path, const char *buf, size_t buflen)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun int fd;
147*4882a593Smuzhiyun ssize_t numwritten;
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun fd = open(path, O_WRONLY);
150*4882a593Smuzhiyun if (fd == -1)
151*4882a593Smuzhiyun return 0;
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun numwritten = write(fd, buf, buflen - 1);
154*4882a593Smuzhiyun close(fd);
155*4882a593Smuzhiyun if (numwritten < 1)
156*4882a593Smuzhiyun return 0;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun return (unsigned int) numwritten;
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun
read_string(const char * name,const char * strings[])161*4882a593Smuzhiyun static int read_string(const char *name, const char *strings[])
162*4882a593Smuzhiyun {
163*4882a593Smuzhiyun char path[PATH_MAX];
164*4882a593Smuzhiyun char buf[256];
165*4882a593Smuzhiyun char *c;
166*4882a593Smuzhiyun int ret;
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
169*4882a593Smuzhiyun if (ret >= PATH_MAX) {
170*4882a593Smuzhiyun printf("%s: Pathname is too long\n", __func__);
171*4882a593Smuzhiyun exit(EXIT_FAILURE);
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun if (!read_file(path, buf, sizeof(buf))) {
175*4882a593Smuzhiyun perror(path);
176*4882a593Smuzhiyun exit(EXIT_FAILURE);
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun c = strchr(buf, '[');
180*4882a593Smuzhiyun if (!c) {
181*4882a593Smuzhiyun printf("%s: Parse failure\n", __func__);
182*4882a593Smuzhiyun exit(EXIT_FAILURE);
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun c++;
186*4882a593Smuzhiyun memmove(buf, c, sizeof(buf) - (c - buf));
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun c = strchr(buf, ']');
189*4882a593Smuzhiyun if (!c) {
190*4882a593Smuzhiyun printf("%s: Parse failure\n", __func__);
191*4882a593Smuzhiyun exit(EXIT_FAILURE);
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun *c = '\0';
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun ret = 0;
196*4882a593Smuzhiyun while (strings[ret]) {
197*4882a593Smuzhiyun if (!strcmp(strings[ret], buf))
198*4882a593Smuzhiyun return ret;
199*4882a593Smuzhiyun ret++;
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun printf("Failed to parse %s\n", name);
203*4882a593Smuzhiyun exit(EXIT_FAILURE);
204*4882a593Smuzhiyun }
205*4882a593Smuzhiyun
write_string(const char * name,const char * val)206*4882a593Smuzhiyun static void write_string(const char *name, const char *val)
207*4882a593Smuzhiyun {
208*4882a593Smuzhiyun char path[PATH_MAX];
209*4882a593Smuzhiyun int ret;
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
212*4882a593Smuzhiyun if (ret >= PATH_MAX) {
213*4882a593Smuzhiyun printf("%s: Pathname is too long\n", __func__);
214*4882a593Smuzhiyun exit(EXIT_FAILURE);
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun if (!write_file(path, val, strlen(val) + 1)) {
218*4882a593Smuzhiyun perror(path);
219*4882a593Smuzhiyun exit(EXIT_FAILURE);
220*4882a593Smuzhiyun }
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun
read_num(const char * name)223*4882a593Smuzhiyun static const unsigned long read_num(const char *name)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun char path[PATH_MAX];
226*4882a593Smuzhiyun char buf[21];
227*4882a593Smuzhiyun int ret;
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
230*4882a593Smuzhiyun if (ret >= PATH_MAX) {
231*4882a593Smuzhiyun printf("%s: Pathname is too long\n", __func__);
232*4882a593Smuzhiyun exit(EXIT_FAILURE);
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun ret = read_file(path, buf, sizeof(buf));
236*4882a593Smuzhiyun if (ret < 0) {
237*4882a593Smuzhiyun perror("read_file(read_num)");
238*4882a593Smuzhiyun exit(EXIT_FAILURE);
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun return strtoul(buf, NULL, 10);
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun
write_num(const char * name,unsigned long num)244*4882a593Smuzhiyun static void write_num(const char *name, unsigned long num)
245*4882a593Smuzhiyun {
246*4882a593Smuzhiyun char path[PATH_MAX];
247*4882a593Smuzhiyun char buf[21];
248*4882a593Smuzhiyun int ret;
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
251*4882a593Smuzhiyun if (ret >= PATH_MAX) {
252*4882a593Smuzhiyun printf("%s: Pathname is too long\n", __func__);
253*4882a593Smuzhiyun exit(EXIT_FAILURE);
254*4882a593Smuzhiyun }
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun sprintf(buf, "%ld", num);
257*4882a593Smuzhiyun if (!write_file(path, buf, strlen(buf) + 1)) {
258*4882a593Smuzhiyun perror(path);
259*4882a593Smuzhiyun exit(EXIT_FAILURE);
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun
write_settings(struct settings * settings)263*4882a593Smuzhiyun static void write_settings(struct settings *settings)
264*4882a593Smuzhiyun {
265*4882a593Smuzhiyun struct khugepaged_settings *khugepaged = &settings->khugepaged;
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
268*4882a593Smuzhiyun write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
269*4882a593Smuzhiyun write_string("shmem_enabled",
270*4882a593Smuzhiyun shmem_enabled_strings[settings->shmem_enabled]);
271*4882a593Smuzhiyun write_num("debug_cow", settings->debug_cow);
272*4882a593Smuzhiyun write_num("use_zero_page", settings->use_zero_page);
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun write_num("khugepaged/defrag", khugepaged->defrag);
275*4882a593Smuzhiyun write_num("khugepaged/alloc_sleep_millisecs",
276*4882a593Smuzhiyun khugepaged->alloc_sleep_millisecs);
277*4882a593Smuzhiyun write_num("khugepaged/scan_sleep_millisecs",
278*4882a593Smuzhiyun khugepaged->scan_sleep_millisecs);
279*4882a593Smuzhiyun write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
280*4882a593Smuzhiyun write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
281*4882a593Smuzhiyun write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
282*4882a593Smuzhiyun write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
restore_settings(int sig)285*4882a593Smuzhiyun static void restore_settings(int sig)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun if (skip_settings_restore)
288*4882a593Smuzhiyun goto out;
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun printf("Restore THP and khugepaged settings...");
291*4882a593Smuzhiyun write_settings(&saved_settings);
292*4882a593Smuzhiyun success("OK");
293*4882a593Smuzhiyun if (sig)
294*4882a593Smuzhiyun exit(EXIT_FAILURE);
295*4882a593Smuzhiyun out:
296*4882a593Smuzhiyun exit(exit_status);
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
save_settings(void)299*4882a593Smuzhiyun static void save_settings(void)
300*4882a593Smuzhiyun {
301*4882a593Smuzhiyun printf("Save THP and khugepaged settings...");
302*4882a593Smuzhiyun saved_settings = (struct settings) {
303*4882a593Smuzhiyun .thp_enabled = read_string("enabled", thp_enabled_strings),
304*4882a593Smuzhiyun .thp_defrag = read_string("defrag", thp_defrag_strings),
305*4882a593Smuzhiyun .shmem_enabled =
306*4882a593Smuzhiyun read_string("shmem_enabled", shmem_enabled_strings),
307*4882a593Smuzhiyun .debug_cow = read_num("debug_cow"),
308*4882a593Smuzhiyun .use_zero_page = read_num("use_zero_page"),
309*4882a593Smuzhiyun };
310*4882a593Smuzhiyun saved_settings.khugepaged = (struct khugepaged_settings) {
311*4882a593Smuzhiyun .defrag = read_num("khugepaged/defrag"),
312*4882a593Smuzhiyun .alloc_sleep_millisecs =
313*4882a593Smuzhiyun read_num("khugepaged/alloc_sleep_millisecs"),
314*4882a593Smuzhiyun .scan_sleep_millisecs =
315*4882a593Smuzhiyun read_num("khugepaged/scan_sleep_millisecs"),
316*4882a593Smuzhiyun .max_ptes_none = read_num("khugepaged/max_ptes_none"),
317*4882a593Smuzhiyun .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
318*4882a593Smuzhiyun .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
319*4882a593Smuzhiyun .pages_to_scan = read_num("khugepaged/pages_to_scan"),
320*4882a593Smuzhiyun };
321*4882a593Smuzhiyun success("OK");
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun signal(SIGTERM, restore_settings);
324*4882a593Smuzhiyun signal(SIGINT, restore_settings);
325*4882a593Smuzhiyun signal(SIGHUP, restore_settings);
326*4882a593Smuzhiyun signal(SIGQUIT, restore_settings);
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun
adjust_settings(void)329*4882a593Smuzhiyun static void adjust_settings(void)
330*4882a593Smuzhiyun {
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun printf("Adjust settings...");
333*4882a593Smuzhiyun write_settings(&default_settings);
334*4882a593Smuzhiyun success("OK");
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun #define MAX_LINE_LENGTH 500
338*4882a593Smuzhiyun
check_for_pattern(FILE * fp,char * pattern,char * buf)339*4882a593Smuzhiyun static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
342*4882a593Smuzhiyun if (!strncmp(buf, pattern, strlen(pattern)))
343*4882a593Smuzhiyun return true;
344*4882a593Smuzhiyun }
345*4882a593Smuzhiyun return false;
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun
check_huge(void * addr)348*4882a593Smuzhiyun static bool check_huge(void *addr)
349*4882a593Smuzhiyun {
350*4882a593Smuzhiyun bool thp = false;
351*4882a593Smuzhiyun int ret;
352*4882a593Smuzhiyun FILE *fp;
353*4882a593Smuzhiyun char buffer[MAX_LINE_LENGTH];
354*4882a593Smuzhiyun char addr_pattern[MAX_LINE_LENGTH];
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
357*4882a593Smuzhiyun (unsigned long) addr);
358*4882a593Smuzhiyun if (ret >= MAX_LINE_LENGTH) {
359*4882a593Smuzhiyun printf("%s: Pattern is too long\n", __func__);
360*4882a593Smuzhiyun exit(EXIT_FAILURE);
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun fp = fopen(PID_SMAPS, "r");
365*4882a593Smuzhiyun if (!fp) {
366*4882a593Smuzhiyun printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
367*4882a593Smuzhiyun exit(EXIT_FAILURE);
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun if (!check_for_pattern(fp, addr_pattern, buffer))
370*4882a593Smuzhiyun goto err_out;
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
373*4882a593Smuzhiyun hpage_pmd_size >> 10);
374*4882a593Smuzhiyun if (ret >= MAX_LINE_LENGTH) {
375*4882a593Smuzhiyun printf("%s: Pattern is too long\n", __func__);
376*4882a593Smuzhiyun exit(EXIT_FAILURE);
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun /*
379*4882a593Smuzhiyun * Fetch the AnonHugePages: in the same block and check whether it got
380*4882a593Smuzhiyun * the expected number of hugeepages next.
381*4882a593Smuzhiyun */
382*4882a593Smuzhiyun if (!check_for_pattern(fp, "AnonHugePages:", buffer))
383*4882a593Smuzhiyun goto err_out;
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
386*4882a593Smuzhiyun goto err_out;
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun thp = true;
389*4882a593Smuzhiyun err_out:
390*4882a593Smuzhiyun fclose(fp);
391*4882a593Smuzhiyun return thp;
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun
check_swap(void * addr,unsigned long size)395*4882a593Smuzhiyun static bool check_swap(void *addr, unsigned long size)
396*4882a593Smuzhiyun {
397*4882a593Smuzhiyun bool swap = false;
398*4882a593Smuzhiyun int ret;
399*4882a593Smuzhiyun FILE *fp;
400*4882a593Smuzhiyun char buffer[MAX_LINE_LENGTH];
401*4882a593Smuzhiyun char addr_pattern[MAX_LINE_LENGTH];
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
404*4882a593Smuzhiyun (unsigned long) addr);
405*4882a593Smuzhiyun if (ret >= MAX_LINE_LENGTH) {
406*4882a593Smuzhiyun printf("%s: Pattern is too long\n", __func__);
407*4882a593Smuzhiyun exit(EXIT_FAILURE);
408*4882a593Smuzhiyun }
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun fp = fopen(PID_SMAPS, "r");
412*4882a593Smuzhiyun if (!fp) {
413*4882a593Smuzhiyun printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
414*4882a593Smuzhiyun exit(EXIT_FAILURE);
415*4882a593Smuzhiyun }
416*4882a593Smuzhiyun if (!check_for_pattern(fp, addr_pattern, buffer))
417*4882a593Smuzhiyun goto err_out;
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
420*4882a593Smuzhiyun size >> 10);
421*4882a593Smuzhiyun if (ret >= MAX_LINE_LENGTH) {
422*4882a593Smuzhiyun printf("%s: Pattern is too long\n", __func__);
423*4882a593Smuzhiyun exit(EXIT_FAILURE);
424*4882a593Smuzhiyun }
425*4882a593Smuzhiyun /*
426*4882a593Smuzhiyun * Fetch the Swap: in the same block and check whether it got
427*4882a593Smuzhiyun * the expected number of hugeepages next.
428*4882a593Smuzhiyun */
429*4882a593Smuzhiyun if (!check_for_pattern(fp, "Swap:", buffer))
430*4882a593Smuzhiyun goto err_out;
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
433*4882a593Smuzhiyun goto err_out;
434*4882a593Smuzhiyun
435*4882a593Smuzhiyun swap = true;
436*4882a593Smuzhiyun err_out:
437*4882a593Smuzhiyun fclose(fp);
438*4882a593Smuzhiyun return swap;
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun
alloc_mapping(void)441*4882a593Smuzhiyun static void *alloc_mapping(void)
442*4882a593Smuzhiyun {
443*4882a593Smuzhiyun void *p;
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
446*4882a593Smuzhiyun MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
447*4882a593Smuzhiyun if (p != BASE_ADDR) {
448*4882a593Smuzhiyun printf("Failed to allocate VMA at %p\n", BASE_ADDR);
449*4882a593Smuzhiyun exit(EXIT_FAILURE);
450*4882a593Smuzhiyun }
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun return p;
453*4882a593Smuzhiyun }
454*4882a593Smuzhiyun
fill_memory(int * p,unsigned long start,unsigned long end)455*4882a593Smuzhiyun static void fill_memory(int *p, unsigned long start, unsigned long end)
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun int i;
458*4882a593Smuzhiyun
459*4882a593Smuzhiyun for (i = start / page_size; i < end / page_size; i++)
460*4882a593Smuzhiyun p[i * page_size / sizeof(*p)] = i + 0xdead0000;
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun
validate_memory(int * p,unsigned long start,unsigned long end)463*4882a593Smuzhiyun static void validate_memory(int *p, unsigned long start, unsigned long end)
464*4882a593Smuzhiyun {
465*4882a593Smuzhiyun int i;
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun for (i = start / page_size; i < end / page_size; i++) {
468*4882a593Smuzhiyun if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
469*4882a593Smuzhiyun printf("Page %d is corrupted: %#x\n",
470*4882a593Smuzhiyun i, p[i * page_size / sizeof(*p)]);
471*4882a593Smuzhiyun exit(EXIT_FAILURE);
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun }
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun #define TICK 500000
wait_for_scan(const char * msg,char * p)477*4882a593Smuzhiyun static bool wait_for_scan(const char *msg, char *p)
478*4882a593Smuzhiyun {
479*4882a593Smuzhiyun int full_scans;
480*4882a593Smuzhiyun int timeout = 6; /* 3 seconds */
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun /* Sanity check */
483*4882a593Smuzhiyun if (check_huge(p)) {
484*4882a593Smuzhiyun printf("Unexpected huge page\n");
485*4882a593Smuzhiyun exit(EXIT_FAILURE);
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun /* Wait until the second full_scan completed */
491*4882a593Smuzhiyun full_scans = read_num("khugepaged/full_scans") + 2;
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun printf("%s...", msg);
494*4882a593Smuzhiyun while (timeout--) {
495*4882a593Smuzhiyun if (check_huge(p))
496*4882a593Smuzhiyun break;
497*4882a593Smuzhiyun if (read_num("khugepaged/full_scans") >= full_scans)
498*4882a593Smuzhiyun break;
499*4882a593Smuzhiyun printf(".");
500*4882a593Smuzhiyun usleep(TICK);
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun
503*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun return timeout == -1;
506*4882a593Smuzhiyun }
507*4882a593Smuzhiyun
alloc_at_fault(void)508*4882a593Smuzhiyun static void alloc_at_fault(void)
509*4882a593Smuzhiyun {
510*4882a593Smuzhiyun struct settings settings = default_settings;
511*4882a593Smuzhiyun char *p;
512*4882a593Smuzhiyun
513*4882a593Smuzhiyun settings.thp_enabled = THP_ALWAYS;
514*4882a593Smuzhiyun write_settings(&settings);
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun p = alloc_mapping();
517*4882a593Smuzhiyun *p = 1;
518*4882a593Smuzhiyun printf("Allocate huge page on fault...");
519*4882a593Smuzhiyun if (check_huge(p))
520*4882a593Smuzhiyun success("OK");
521*4882a593Smuzhiyun else
522*4882a593Smuzhiyun fail("Fail");
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun write_settings(&default_settings);
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun madvise(p, page_size, MADV_DONTNEED);
527*4882a593Smuzhiyun printf("Split huge PMD on MADV_DONTNEED...");
528*4882a593Smuzhiyun if (!check_huge(p))
529*4882a593Smuzhiyun success("OK");
530*4882a593Smuzhiyun else
531*4882a593Smuzhiyun fail("Fail");
532*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
533*4882a593Smuzhiyun }
534*4882a593Smuzhiyun
collapse_full(void)535*4882a593Smuzhiyun static void collapse_full(void)
536*4882a593Smuzhiyun {
537*4882a593Smuzhiyun void *p;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun p = alloc_mapping();
540*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
541*4882a593Smuzhiyun if (wait_for_scan("Collapse fully populated PTE table", p))
542*4882a593Smuzhiyun fail("Timeout");
543*4882a593Smuzhiyun else if (check_huge(p))
544*4882a593Smuzhiyun success("OK");
545*4882a593Smuzhiyun else
546*4882a593Smuzhiyun fail("Fail");
547*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
548*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
549*4882a593Smuzhiyun }
550*4882a593Smuzhiyun
collapse_empty(void)551*4882a593Smuzhiyun static void collapse_empty(void)
552*4882a593Smuzhiyun {
553*4882a593Smuzhiyun void *p;
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun p = alloc_mapping();
556*4882a593Smuzhiyun if (wait_for_scan("Do not collapse empty PTE table", p))
557*4882a593Smuzhiyun fail("Timeout");
558*4882a593Smuzhiyun else if (check_huge(p))
559*4882a593Smuzhiyun fail("Fail");
560*4882a593Smuzhiyun else
561*4882a593Smuzhiyun success("OK");
562*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
563*4882a593Smuzhiyun }
564*4882a593Smuzhiyun
collapse_single_pte_entry(void)565*4882a593Smuzhiyun static void collapse_single_pte_entry(void)
566*4882a593Smuzhiyun {
567*4882a593Smuzhiyun void *p;
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun p = alloc_mapping();
570*4882a593Smuzhiyun fill_memory(p, 0, page_size);
571*4882a593Smuzhiyun if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
572*4882a593Smuzhiyun fail("Timeout");
573*4882a593Smuzhiyun else if (check_huge(p))
574*4882a593Smuzhiyun success("OK");
575*4882a593Smuzhiyun else
576*4882a593Smuzhiyun fail("Fail");
577*4882a593Smuzhiyun validate_memory(p, 0, page_size);
578*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
579*4882a593Smuzhiyun }
580*4882a593Smuzhiyun
collapse_max_ptes_none(void)581*4882a593Smuzhiyun static void collapse_max_ptes_none(void)
582*4882a593Smuzhiyun {
583*4882a593Smuzhiyun int max_ptes_none = hpage_pmd_nr / 2;
584*4882a593Smuzhiyun struct settings settings = default_settings;
585*4882a593Smuzhiyun void *p;
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun settings.khugepaged.max_ptes_none = max_ptes_none;
588*4882a593Smuzhiyun write_settings(&settings);
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun p = alloc_mapping();
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
593*4882a593Smuzhiyun if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
594*4882a593Smuzhiyun fail("Timeout");
595*4882a593Smuzhiyun else if (check_huge(p))
596*4882a593Smuzhiyun fail("Fail");
597*4882a593Smuzhiyun else
598*4882a593Smuzhiyun success("OK");
599*4882a593Smuzhiyun validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
602*4882a593Smuzhiyun if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
603*4882a593Smuzhiyun fail("Timeout");
604*4882a593Smuzhiyun else if (check_huge(p))
605*4882a593Smuzhiyun success("OK");
606*4882a593Smuzhiyun else
607*4882a593Smuzhiyun fail("Fail");
608*4882a593Smuzhiyun validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
611*4882a593Smuzhiyun write_settings(&default_settings);
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun
collapse_swapin_single_pte(void)614*4882a593Smuzhiyun static void collapse_swapin_single_pte(void)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun void *p;
617*4882a593Smuzhiyun p = alloc_mapping();
618*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun printf("Swapout one page...");
621*4882a593Smuzhiyun if (madvise(p, page_size, MADV_PAGEOUT)) {
622*4882a593Smuzhiyun perror("madvise(MADV_PAGEOUT)");
623*4882a593Smuzhiyun exit(EXIT_FAILURE);
624*4882a593Smuzhiyun }
625*4882a593Smuzhiyun if (check_swap(p, page_size)) {
626*4882a593Smuzhiyun success("OK");
627*4882a593Smuzhiyun } else {
628*4882a593Smuzhiyun fail("Fail");
629*4882a593Smuzhiyun goto out;
630*4882a593Smuzhiyun }
631*4882a593Smuzhiyun
632*4882a593Smuzhiyun if (wait_for_scan("Collapse with swapping in single PTE entry", p))
633*4882a593Smuzhiyun fail("Timeout");
634*4882a593Smuzhiyun else if (check_huge(p))
635*4882a593Smuzhiyun success("OK");
636*4882a593Smuzhiyun else
637*4882a593Smuzhiyun fail("Fail");
638*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
639*4882a593Smuzhiyun out:
640*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
641*4882a593Smuzhiyun }
642*4882a593Smuzhiyun
collapse_max_ptes_swap(void)643*4882a593Smuzhiyun static void collapse_max_ptes_swap(void)
644*4882a593Smuzhiyun {
645*4882a593Smuzhiyun int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
646*4882a593Smuzhiyun void *p;
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun p = alloc_mapping();
649*4882a593Smuzhiyun
650*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
651*4882a593Smuzhiyun printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
652*4882a593Smuzhiyun if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
653*4882a593Smuzhiyun perror("madvise(MADV_PAGEOUT)");
654*4882a593Smuzhiyun exit(EXIT_FAILURE);
655*4882a593Smuzhiyun }
656*4882a593Smuzhiyun if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
657*4882a593Smuzhiyun success("OK");
658*4882a593Smuzhiyun } else {
659*4882a593Smuzhiyun fail("Fail");
660*4882a593Smuzhiyun goto out;
661*4882a593Smuzhiyun }
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
664*4882a593Smuzhiyun fail("Timeout");
665*4882a593Smuzhiyun else if (check_huge(p))
666*4882a593Smuzhiyun fail("Fail");
667*4882a593Smuzhiyun else
668*4882a593Smuzhiyun success("OK");
669*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
670*4882a593Smuzhiyun
671*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
672*4882a593Smuzhiyun printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
673*4882a593Smuzhiyun if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
674*4882a593Smuzhiyun perror("madvise(MADV_PAGEOUT)");
675*4882a593Smuzhiyun exit(EXIT_FAILURE);
676*4882a593Smuzhiyun }
677*4882a593Smuzhiyun if (check_swap(p, max_ptes_swap * page_size)) {
678*4882a593Smuzhiyun success("OK");
679*4882a593Smuzhiyun } else {
680*4882a593Smuzhiyun fail("Fail");
681*4882a593Smuzhiyun goto out;
682*4882a593Smuzhiyun }
683*4882a593Smuzhiyun
684*4882a593Smuzhiyun if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
685*4882a593Smuzhiyun fail("Timeout");
686*4882a593Smuzhiyun else if (check_huge(p))
687*4882a593Smuzhiyun success("OK");
688*4882a593Smuzhiyun else
689*4882a593Smuzhiyun fail("Fail");
690*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
691*4882a593Smuzhiyun out:
692*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
693*4882a593Smuzhiyun }
694*4882a593Smuzhiyun
collapse_single_pte_entry_compound(void)695*4882a593Smuzhiyun static void collapse_single_pte_entry_compound(void)
696*4882a593Smuzhiyun {
697*4882a593Smuzhiyun void *p;
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun p = alloc_mapping();
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun printf("Allocate huge page...");
702*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
703*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
704*4882a593Smuzhiyun if (check_huge(p))
705*4882a593Smuzhiyun success("OK");
706*4882a593Smuzhiyun else
707*4882a593Smuzhiyun fail("Fail");
708*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
709*4882a593Smuzhiyun
710*4882a593Smuzhiyun printf("Split huge page leaving single PTE mapping compound page...");
711*4882a593Smuzhiyun madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
712*4882a593Smuzhiyun if (!check_huge(p))
713*4882a593Smuzhiyun success("OK");
714*4882a593Smuzhiyun else
715*4882a593Smuzhiyun fail("Fail");
716*4882a593Smuzhiyun
717*4882a593Smuzhiyun if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
718*4882a593Smuzhiyun fail("Timeout");
719*4882a593Smuzhiyun else if (check_huge(p))
720*4882a593Smuzhiyun success("OK");
721*4882a593Smuzhiyun else
722*4882a593Smuzhiyun fail("Fail");
723*4882a593Smuzhiyun validate_memory(p, 0, page_size);
724*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun
collapse_full_of_compound(void)727*4882a593Smuzhiyun static void collapse_full_of_compound(void)
728*4882a593Smuzhiyun {
729*4882a593Smuzhiyun void *p;
730*4882a593Smuzhiyun
731*4882a593Smuzhiyun p = alloc_mapping();
732*4882a593Smuzhiyun
733*4882a593Smuzhiyun printf("Allocate huge page...");
734*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
735*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
736*4882a593Smuzhiyun if (check_huge(p))
737*4882a593Smuzhiyun success("OK");
738*4882a593Smuzhiyun else
739*4882a593Smuzhiyun fail("Fail");
740*4882a593Smuzhiyun
741*4882a593Smuzhiyun printf("Split huge page leaving single PTE page table full of compound pages...");
742*4882a593Smuzhiyun madvise(p, page_size, MADV_NOHUGEPAGE);
743*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
744*4882a593Smuzhiyun if (!check_huge(p))
745*4882a593Smuzhiyun success("OK");
746*4882a593Smuzhiyun else
747*4882a593Smuzhiyun fail("Fail");
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun if (wait_for_scan("Collapse PTE table full of compound pages", p))
750*4882a593Smuzhiyun fail("Timeout");
751*4882a593Smuzhiyun else if (check_huge(p))
752*4882a593Smuzhiyun success("OK");
753*4882a593Smuzhiyun else
754*4882a593Smuzhiyun fail("Fail");
755*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
756*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
757*4882a593Smuzhiyun }
758*4882a593Smuzhiyun
collapse_compound_extreme(void)759*4882a593Smuzhiyun static void collapse_compound_extreme(void)
760*4882a593Smuzhiyun {
761*4882a593Smuzhiyun void *p;
762*4882a593Smuzhiyun int i;
763*4882a593Smuzhiyun
764*4882a593Smuzhiyun p = alloc_mapping();
765*4882a593Smuzhiyun for (i = 0; i < hpage_pmd_nr; i++) {
766*4882a593Smuzhiyun printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
767*4882a593Smuzhiyun i + 1, hpage_pmd_nr);
768*4882a593Smuzhiyun
769*4882a593Smuzhiyun madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
770*4882a593Smuzhiyun fill_memory(BASE_ADDR, 0, hpage_pmd_size);
771*4882a593Smuzhiyun if (!check_huge(BASE_ADDR)) {
772*4882a593Smuzhiyun printf("Failed to allocate huge page\n");
773*4882a593Smuzhiyun exit(EXIT_FAILURE);
774*4882a593Smuzhiyun }
775*4882a593Smuzhiyun madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun p = mremap(BASE_ADDR - i * page_size,
778*4882a593Smuzhiyun i * page_size + hpage_pmd_size,
779*4882a593Smuzhiyun (i + 1) * page_size,
780*4882a593Smuzhiyun MREMAP_MAYMOVE | MREMAP_FIXED,
781*4882a593Smuzhiyun BASE_ADDR + 2 * hpage_pmd_size);
782*4882a593Smuzhiyun if (p == MAP_FAILED) {
783*4882a593Smuzhiyun perror("mremap+unmap");
784*4882a593Smuzhiyun exit(EXIT_FAILURE);
785*4882a593Smuzhiyun }
786*4882a593Smuzhiyun
787*4882a593Smuzhiyun p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
788*4882a593Smuzhiyun (i + 1) * page_size,
789*4882a593Smuzhiyun (i + 1) * page_size + hpage_pmd_size,
790*4882a593Smuzhiyun MREMAP_MAYMOVE | MREMAP_FIXED,
791*4882a593Smuzhiyun BASE_ADDR - (i + 1) * page_size);
792*4882a593Smuzhiyun if (p == MAP_FAILED) {
793*4882a593Smuzhiyun perror("mremap+alloc");
794*4882a593Smuzhiyun exit(EXIT_FAILURE);
795*4882a593Smuzhiyun }
796*4882a593Smuzhiyun }
797*4882a593Smuzhiyun
798*4882a593Smuzhiyun munmap(BASE_ADDR, hpage_pmd_size);
799*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
800*4882a593Smuzhiyun if (!check_huge(p))
801*4882a593Smuzhiyun success("OK");
802*4882a593Smuzhiyun else
803*4882a593Smuzhiyun fail("Fail");
804*4882a593Smuzhiyun
805*4882a593Smuzhiyun if (wait_for_scan("Collapse PTE table full of different compound pages", p))
806*4882a593Smuzhiyun fail("Timeout");
807*4882a593Smuzhiyun else if (check_huge(p))
808*4882a593Smuzhiyun success("OK");
809*4882a593Smuzhiyun else
810*4882a593Smuzhiyun fail("Fail");
811*4882a593Smuzhiyun
812*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
813*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun
collapse_fork(void)816*4882a593Smuzhiyun static void collapse_fork(void)
817*4882a593Smuzhiyun {
818*4882a593Smuzhiyun int wstatus;
819*4882a593Smuzhiyun void *p;
820*4882a593Smuzhiyun
821*4882a593Smuzhiyun p = alloc_mapping();
822*4882a593Smuzhiyun
823*4882a593Smuzhiyun printf("Allocate small page...");
824*4882a593Smuzhiyun fill_memory(p, 0, page_size);
825*4882a593Smuzhiyun if (!check_huge(p))
826*4882a593Smuzhiyun success("OK");
827*4882a593Smuzhiyun else
828*4882a593Smuzhiyun fail("Fail");
829*4882a593Smuzhiyun
830*4882a593Smuzhiyun printf("Share small page over fork()...");
831*4882a593Smuzhiyun if (!fork()) {
832*4882a593Smuzhiyun /* Do not touch settings on child exit */
833*4882a593Smuzhiyun skip_settings_restore = true;
834*4882a593Smuzhiyun exit_status = 0;
835*4882a593Smuzhiyun
836*4882a593Smuzhiyun if (!check_huge(p))
837*4882a593Smuzhiyun success("OK");
838*4882a593Smuzhiyun else
839*4882a593Smuzhiyun fail("Fail");
840*4882a593Smuzhiyun
841*4882a593Smuzhiyun fill_memory(p, page_size, 2 * page_size);
842*4882a593Smuzhiyun
843*4882a593Smuzhiyun if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
844*4882a593Smuzhiyun fail("Timeout");
845*4882a593Smuzhiyun else if (check_huge(p))
846*4882a593Smuzhiyun success("OK");
847*4882a593Smuzhiyun else
848*4882a593Smuzhiyun fail("Fail");
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun validate_memory(p, 0, page_size);
851*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
852*4882a593Smuzhiyun exit(exit_status);
853*4882a593Smuzhiyun }
854*4882a593Smuzhiyun
855*4882a593Smuzhiyun wait(&wstatus);
856*4882a593Smuzhiyun exit_status += WEXITSTATUS(wstatus);
857*4882a593Smuzhiyun
858*4882a593Smuzhiyun printf("Check if parent still has small page...");
859*4882a593Smuzhiyun if (!check_huge(p))
860*4882a593Smuzhiyun success("OK");
861*4882a593Smuzhiyun else
862*4882a593Smuzhiyun fail("Fail");
863*4882a593Smuzhiyun validate_memory(p, 0, page_size);
864*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
865*4882a593Smuzhiyun }
866*4882a593Smuzhiyun
collapse_fork_compound(void)867*4882a593Smuzhiyun static void collapse_fork_compound(void)
868*4882a593Smuzhiyun {
869*4882a593Smuzhiyun int wstatus;
870*4882a593Smuzhiyun void *p;
871*4882a593Smuzhiyun
872*4882a593Smuzhiyun p = alloc_mapping();
873*4882a593Smuzhiyun
874*4882a593Smuzhiyun printf("Allocate huge page...");
875*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
876*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
877*4882a593Smuzhiyun if (check_huge(p))
878*4882a593Smuzhiyun success("OK");
879*4882a593Smuzhiyun else
880*4882a593Smuzhiyun fail("Fail");
881*4882a593Smuzhiyun
882*4882a593Smuzhiyun printf("Share huge page over fork()...");
883*4882a593Smuzhiyun if (!fork()) {
884*4882a593Smuzhiyun /* Do not touch settings on child exit */
885*4882a593Smuzhiyun skip_settings_restore = true;
886*4882a593Smuzhiyun exit_status = 0;
887*4882a593Smuzhiyun
888*4882a593Smuzhiyun if (check_huge(p))
889*4882a593Smuzhiyun success("OK");
890*4882a593Smuzhiyun else
891*4882a593Smuzhiyun fail("Fail");
892*4882a593Smuzhiyun
893*4882a593Smuzhiyun printf("Split huge page PMD in child process...");
894*4882a593Smuzhiyun madvise(p, page_size, MADV_NOHUGEPAGE);
895*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
896*4882a593Smuzhiyun if (!check_huge(p))
897*4882a593Smuzhiyun success("OK");
898*4882a593Smuzhiyun else
899*4882a593Smuzhiyun fail("Fail");
900*4882a593Smuzhiyun fill_memory(p, 0, page_size);
901*4882a593Smuzhiyun
902*4882a593Smuzhiyun write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
903*4882a593Smuzhiyun if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
904*4882a593Smuzhiyun fail("Timeout");
905*4882a593Smuzhiyun else if (check_huge(p))
906*4882a593Smuzhiyun success("OK");
907*4882a593Smuzhiyun else
908*4882a593Smuzhiyun fail("Fail");
909*4882a593Smuzhiyun write_num("khugepaged/max_ptes_shared",
910*4882a593Smuzhiyun default_settings.khugepaged.max_ptes_shared);
911*4882a593Smuzhiyun
912*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
913*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
914*4882a593Smuzhiyun exit(exit_status);
915*4882a593Smuzhiyun }
916*4882a593Smuzhiyun
917*4882a593Smuzhiyun wait(&wstatus);
918*4882a593Smuzhiyun exit_status += WEXITSTATUS(wstatus);
919*4882a593Smuzhiyun
920*4882a593Smuzhiyun printf("Check if parent still has huge page...");
921*4882a593Smuzhiyun if (check_huge(p))
922*4882a593Smuzhiyun success("OK");
923*4882a593Smuzhiyun else
924*4882a593Smuzhiyun fail("Fail");
925*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
926*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
927*4882a593Smuzhiyun }
928*4882a593Smuzhiyun
collapse_max_ptes_shared()929*4882a593Smuzhiyun static void collapse_max_ptes_shared()
930*4882a593Smuzhiyun {
931*4882a593Smuzhiyun int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
932*4882a593Smuzhiyun int wstatus;
933*4882a593Smuzhiyun void *p;
934*4882a593Smuzhiyun
935*4882a593Smuzhiyun p = alloc_mapping();
936*4882a593Smuzhiyun
937*4882a593Smuzhiyun printf("Allocate huge page...");
938*4882a593Smuzhiyun madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
939*4882a593Smuzhiyun fill_memory(p, 0, hpage_pmd_size);
940*4882a593Smuzhiyun if (check_huge(p))
941*4882a593Smuzhiyun success("OK");
942*4882a593Smuzhiyun else
943*4882a593Smuzhiyun fail("Fail");
944*4882a593Smuzhiyun
945*4882a593Smuzhiyun printf("Share huge page over fork()...");
946*4882a593Smuzhiyun if (!fork()) {
947*4882a593Smuzhiyun /* Do not touch settings on child exit */
948*4882a593Smuzhiyun skip_settings_restore = true;
949*4882a593Smuzhiyun exit_status = 0;
950*4882a593Smuzhiyun
951*4882a593Smuzhiyun if (check_huge(p))
952*4882a593Smuzhiyun success("OK");
953*4882a593Smuzhiyun else
954*4882a593Smuzhiyun fail("Fail");
955*4882a593Smuzhiyun
956*4882a593Smuzhiyun printf("Trigger CoW on page %d of %d...",
957*4882a593Smuzhiyun hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
958*4882a593Smuzhiyun fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
959*4882a593Smuzhiyun if (!check_huge(p))
960*4882a593Smuzhiyun success("OK");
961*4882a593Smuzhiyun else
962*4882a593Smuzhiyun fail("Fail");
963*4882a593Smuzhiyun
964*4882a593Smuzhiyun if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
965*4882a593Smuzhiyun fail("Timeout");
966*4882a593Smuzhiyun else if (!check_huge(p))
967*4882a593Smuzhiyun success("OK");
968*4882a593Smuzhiyun else
969*4882a593Smuzhiyun fail("Fail");
970*4882a593Smuzhiyun
971*4882a593Smuzhiyun printf("Trigger CoW on page %d of %d...",
972*4882a593Smuzhiyun hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
973*4882a593Smuzhiyun fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
974*4882a593Smuzhiyun if (!check_huge(p))
975*4882a593Smuzhiyun success("OK");
976*4882a593Smuzhiyun else
977*4882a593Smuzhiyun fail("Fail");
978*4882a593Smuzhiyun
979*4882a593Smuzhiyun
980*4882a593Smuzhiyun if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
981*4882a593Smuzhiyun fail("Timeout");
982*4882a593Smuzhiyun else if (check_huge(p))
983*4882a593Smuzhiyun success("OK");
984*4882a593Smuzhiyun else
985*4882a593Smuzhiyun fail("Fail");
986*4882a593Smuzhiyun
987*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
988*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
989*4882a593Smuzhiyun exit(exit_status);
990*4882a593Smuzhiyun }
991*4882a593Smuzhiyun
992*4882a593Smuzhiyun wait(&wstatus);
993*4882a593Smuzhiyun exit_status += WEXITSTATUS(wstatus);
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun printf("Check if parent still has huge page...");
996*4882a593Smuzhiyun if (check_huge(p))
997*4882a593Smuzhiyun success("OK");
998*4882a593Smuzhiyun else
999*4882a593Smuzhiyun fail("Fail");
1000*4882a593Smuzhiyun validate_memory(p, 0, hpage_pmd_size);
1001*4882a593Smuzhiyun munmap(p, hpage_pmd_size);
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun
main(void)1004*4882a593Smuzhiyun int main(void)
1005*4882a593Smuzhiyun {
1006*4882a593Smuzhiyun setbuf(stdout, NULL);
1007*4882a593Smuzhiyun
1008*4882a593Smuzhiyun page_size = getpagesize();
1009*4882a593Smuzhiyun hpage_pmd_size = read_num("hpage_pmd_size");
1010*4882a593Smuzhiyun hpage_pmd_nr = hpage_pmd_size / page_size;
1011*4882a593Smuzhiyun
1012*4882a593Smuzhiyun default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1013*4882a593Smuzhiyun default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1014*4882a593Smuzhiyun default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1015*4882a593Smuzhiyun default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1016*4882a593Smuzhiyun
1017*4882a593Smuzhiyun save_settings();
1018*4882a593Smuzhiyun adjust_settings();
1019*4882a593Smuzhiyun
1020*4882a593Smuzhiyun alloc_at_fault();
1021*4882a593Smuzhiyun collapse_full();
1022*4882a593Smuzhiyun collapse_empty();
1023*4882a593Smuzhiyun collapse_single_pte_entry();
1024*4882a593Smuzhiyun collapse_max_ptes_none();
1025*4882a593Smuzhiyun collapse_swapin_single_pte();
1026*4882a593Smuzhiyun collapse_max_ptes_swap();
1027*4882a593Smuzhiyun collapse_single_pte_entry_compound();
1028*4882a593Smuzhiyun collapse_full_of_compound();
1029*4882a593Smuzhiyun collapse_compound_extreme();
1030*4882a593Smuzhiyun collapse_fork();
1031*4882a593Smuzhiyun collapse_fork_compound();
1032*4882a593Smuzhiyun collapse_max_ptes_shared();
1033*4882a593Smuzhiyun
1034*4882a593Smuzhiyun restore_settings(0);
1035*4882a593Smuzhiyun }
1036