1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Collaborative memory management interface.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2008 IBM Corporation
6*4882a593Smuzhiyun * Author(s): Brian King (brking@linux.vnet.ibm.com),
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun #include <linux/ctype.h>
10*4882a593Smuzhiyun #include <linux/delay.h>
11*4882a593Smuzhiyun #include <linux/errno.h>
12*4882a593Smuzhiyun #include <linux/fs.h>
13*4882a593Smuzhiyun #include <linux/gfp.h>
14*4882a593Smuzhiyun #include <linux/kthread.h>
15*4882a593Smuzhiyun #include <linux/module.h>
16*4882a593Smuzhiyun #include <linux/oom.h>
17*4882a593Smuzhiyun #include <linux/reboot.h>
18*4882a593Smuzhiyun #include <linux/sched.h>
19*4882a593Smuzhiyun #include <linux/stringify.h>
20*4882a593Smuzhiyun #include <linux/swap.h>
21*4882a593Smuzhiyun #include <linux/device.h>
22*4882a593Smuzhiyun #include <linux/mount.h>
23*4882a593Smuzhiyun #include <linux/pseudo_fs.h>
24*4882a593Smuzhiyun #include <linux/magic.h>
25*4882a593Smuzhiyun #include <linux/balloon_compaction.h>
26*4882a593Smuzhiyun #include <asm/firmware.h>
27*4882a593Smuzhiyun #include <asm/hvcall.h>
28*4882a593Smuzhiyun #include <asm/mmu.h>
29*4882a593Smuzhiyun #include <linux/uaccess.h>
30*4882a593Smuzhiyun #include <linux/memory.h>
31*4882a593Smuzhiyun #include <asm/plpar_wrappers.h>
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun #include "pseries.h"
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun #define CMM_DRIVER_VERSION "1.0.0"
36*4882a593Smuzhiyun #define CMM_DEFAULT_DELAY 1
37*4882a593Smuzhiyun #define CMM_HOTPLUG_DELAY 5
38*4882a593Smuzhiyun #define CMM_DEBUG 0
39*4882a593Smuzhiyun #define CMM_DISABLE 0
40*4882a593Smuzhiyun #define CMM_OOM_KB 1024
41*4882a593Smuzhiyun #define CMM_MIN_MEM_MB 256
42*4882a593Smuzhiyun #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
43*4882a593Smuzhiyun #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #define CMM_MEM_HOTPLUG_PRI 1
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun static unsigned int delay = CMM_DEFAULT_DELAY;
48*4882a593Smuzhiyun static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
49*4882a593Smuzhiyun static unsigned int oom_kb = CMM_OOM_KB;
50*4882a593Smuzhiyun static unsigned int cmm_debug = CMM_DEBUG;
51*4882a593Smuzhiyun static unsigned int cmm_disabled = CMM_DISABLE;
52*4882a593Smuzhiyun static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
53*4882a593Smuzhiyun static bool __read_mostly simulate;
54*4882a593Smuzhiyun static unsigned long simulate_loan_target_kb;
55*4882a593Smuzhiyun static struct device cmm_dev;
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
58*4882a593Smuzhiyun MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
59*4882a593Smuzhiyun MODULE_LICENSE("GPL");
60*4882a593Smuzhiyun MODULE_VERSION(CMM_DRIVER_VERSION);
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun module_param_named(delay, delay, uint, 0644);
63*4882a593Smuzhiyun MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
64*4882a593Smuzhiyun "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
65*4882a593Smuzhiyun module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
66*4882a593Smuzhiyun MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
67*4882a593Smuzhiyun "before loaning resumes. "
68*4882a593Smuzhiyun "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
69*4882a593Smuzhiyun module_param_named(oom_kb, oom_kb, uint, 0644);
70*4882a593Smuzhiyun MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
71*4882a593Smuzhiyun "[Default=" __stringify(CMM_OOM_KB) "]");
72*4882a593Smuzhiyun module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
73*4882a593Smuzhiyun MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
74*4882a593Smuzhiyun "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
75*4882a593Smuzhiyun module_param_named(debug, cmm_debug, uint, 0644);
76*4882a593Smuzhiyun MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
77*4882a593Smuzhiyun "[Default=" __stringify(CMM_DEBUG) "]");
78*4882a593Smuzhiyun module_param_named(simulate, simulate, bool, 0444);
79*4882a593Smuzhiyun MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun static atomic_long_t loaned_pages;
84*4882a593Smuzhiyun static unsigned long loaned_pages_target;
85*4882a593Smuzhiyun static unsigned long oom_freed_pages;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun static DEFINE_MUTEX(hotplug_mutex);
88*4882a593Smuzhiyun static int hotplug_occurred; /* protected by the hotplug mutex */
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun static struct task_struct *cmm_thread_ptr;
91*4882a593Smuzhiyun static struct balloon_dev_info b_dev_info;
92*4882a593Smuzhiyun
plpar_page_set_loaned(struct page * page)93*4882a593Smuzhiyun static long plpar_page_set_loaned(struct page *page)
94*4882a593Smuzhiyun {
95*4882a593Smuzhiyun const unsigned long vpa = page_to_phys(page);
96*4882a593Smuzhiyun unsigned long cmo_page_sz = cmo_get_page_size();
97*4882a593Smuzhiyun long rc = 0;
98*4882a593Smuzhiyun int i;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun if (unlikely(simulate))
101*4882a593Smuzhiyun return 0;
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
104*4882a593Smuzhiyun rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
107*4882a593Smuzhiyun plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
108*4882a593Smuzhiyun vpa + i - cmo_page_sz, 0);
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun return rc;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
plpar_page_set_active(struct page * page)113*4882a593Smuzhiyun static long plpar_page_set_active(struct page *page)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun const unsigned long vpa = page_to_phys(page);
116*4882a593Smuzhiyun unsigned long cmo_page_sz = cmo_get_page_size();
117*4882a593Smuzhiyun long rc = 0;
118*4882a593Smuzhiyun int i;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun if (unlikely(simulate))
121*4882a593Smuzhiyun return 0;
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
124*4882a593Smuzhiyun rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
127*4882a593Smuzhiyun plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
128*4882a593Smuzhiyun vpa + i - cmo_page_sz, 0);
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun return rc;
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun /**
134*4882a593Smuzhiyun * cmm_alloc_pages - Allocate pages and mark them as loaned
135*4882a593Smuzhiyun * @nr: number of pages to allocate
136*4882a593Smuzhiyun *
137*4882a593Smuzhiyun * Return value:
138*4882a593Smuzhiyun * number of pages requested to be allocated which were not
139*4882a593Smuzhiyun **/
cmm_alloc_pages(long nr)140*4882a593Smuzhiyun static long cmm_alloc_pages(long nr)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun struct page *page;
143*4882a593Smuzhiyun long rc;
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun cmm_dbg("Begin request for %ld pages\n", nr);
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun while (nr) {
148*4882a593Smuzhiyun /* Exit if a hotplug operation is in progress or occurred */
149*4882a593Smuzhiyun if (mutex_trylock(&hotplug_mutex)) {
150*4882a593Smuzhiyun if (hotplug_occurred) {
151*4882a593Smuzhiyun mutex_unlock(&hotplug_mutex);
152*4882a593Smuzhiyun break;
153*4882a593Smuzhiyun }
154*4882a593Smuzhiyun mutex_unlock(&hotplug_mutex);
155*4882a593Smuzhiyun } else {
156*4882a593Smuzhiyun break;
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun page = balloon_page_alloc();
160*4882a593Smuzhiyun if (!page)
161*4882a593Smuzhiyun break;
162*4882a593Smuzhiyun rc = plpar_page_set_loaned(page);
163*4882a593Smuzhiyun if (rc) {
164*4882a593Smuzhiyun pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
165*4882a593Smuzhiyun __free_page(page);
166*4882a593Smuzhiyun break;
167*4882a593Smuzhiyun }
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun balloon_page_enqueue(&b_dev_info, page);
170*4882a593Smuzhiyun atomic_long_inc(&loaned_pages);
171*4882a593Smuzhiyun adjust_managed_page_count(page, -1);
172*4882a593Smuzhiyun nr--;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun cmm_dbg("End request with %ld pages unfulfilled\n", nr);
176*4882a593Smuzhiyun return nr;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun /**
180*4882a593Smuzhiyun * cmm_free_pages - Free pages and mark them as active
181*4882a593Smuzhiyun * @nr: number of pages to free
182*4882a593Smuzhiyun *
183*4882a593Smuzhiyun * Return value:
184*4882a593Smuzhiyun * number of pages requested to be freed which were not
185*4882a593Smuzhiyun **/
cmm_free_pages(long nr)186*4882a593Smuzhiyun static long cmm_free_pages(long nr)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun struct page *page;
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun cmm_dbg("Begin free of %ld pages.\n", nr);
191*4882a593Smuzhiyun while (nr) {
192*4882a593Smuzhiyun page = balloon_page_dequeue(&b_dev_info);
193*4882a593Smuzhiyun if (!page)
194*4882a593Smuzhiyun break;
195*4882a593Smuzhiyun plpar_page_set_active(page);
196*4882a593Smuzhiyun adjust_managed_page_count(page, 1);
197*4882a593Smuzhiyun __free_page(page);
198*4882a593Smuzhiyun atomic_long_dec(&loaned_pages);
199*4882a593Smuzhiyun nr--;
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun cmm_dbg("End request with %ld pages unfulfilled\n", nr);
202*4882a593Smuzhiyun return nr;
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun /**
206*4882a593Smuzhiyun * cmm_oom_notify - OOM notifier
207*4882a593Smuzhiyun * @self: notifier block struct
208*4882a593Smuzhiyun * @dummy: not used
209*4882a593Smuzhiyun * @parm: returned - number of pages freed
210*4882a593Smuzhiyun *
211*4882a593Smuzhiyun * Return value:
212*4882a593Smuzhiyun * NOTIFY_OK
213*4882a593Smuzhiyun **/
cmm_oom_notify(struct notifier_block * self,unsigned long dummy,void * parm)214*4882a593Smuzhiyun static int cmm_oom_notify(struct notifier_block *self,
215*4882a593Smuzhiyun unsigned long dummy, void *parm)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun unsigned long *freed = parm;
218*4882a593Smuzhiyun long nr = KB2PAGES(oom_kb);
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun cmm_dbg("OOM processing started\n");
221*4882a593Smuzhiyun nr = cmm_free_pages(nr);
222*4882a593Smuzhiyun loaned_pages_target = atomic_long_read(&loaned_pages);
223*4882a593Smuzhiyun *freed += KB2PAGES(oom_kb) - nr;
224*4882a593Smuzhiyun oom_freed_pages += KB2PAGES(oom_kb) - nr;
225*4882a593Smuzhiyun cmm_dbg("OOM processing complete\n");
226*4882a593Smuzhiyun return NOTIFY_OK;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /**
230*4882a593Smuzhiyun * cmm_get_mpp - Read memory performance parameters
231*4882a593Smuzhiyun *
232*4882a593Smuzhiyun * Makes hcall to query the current page loan request from the hypervisor.
233*4882a593Smuzhiyun *
234*4882a593Smuzhiyun * Return value:
235*4882a593Smuzhiyun * nothing
236*4882a593Smuzhiyun **/
cmm_get_mpp(void)237*4882a593Smuzhiyun static void cmm_get_mpp(void)
238*4882a593Smuzhiyun {
239*4882a593Smuzhiyun const long __loaned_pages = atomic_long_read(&loaned_pages);
240*4882a593Smuzhiyun const long total_pages = totalram_pages() + __loaned_pages;
241*4882a593Smuzhiyun int rc;
242*4882a593Smuzhiyun struct hvcall_mpp_data mpp_data;
243*4882a593Smuzhiyun signed long active_pages_target, page_loan_request, target;
244*4882a593Smuzhiyun signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun if (likely(!simulate)) {
247*4882a593Smuzhiyun rc = h_get_mpp(&mpp_data);
248*4882a593Smuzhiyun if (rc != H_SUCCESS)
249*4882a593Smuzhiyun return;
250*4882a593Smuzhiyun page_loan_request = div_s64((s64)mpp_data.loan_request,
251*4882a593Smuzhiyun PAGE_SIZE);
252*4882a593Smuzhiyun target = page_loan_request + __loaned_pages;
253*4882a593Smuzhiyun } else {
254*4882a593Smuzhiyun target = KB2PAGES(simulate_loan_target_kb);
255*4882a593Smuzhiyun page_loan_request = target - __loaned_pages;
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun if (target < 0 || total_pages < min_mem_pages)
259*4882a593Smuzhiyun target = 0;
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun if (target > oom_freed_pages)
262*4882a593Smuzhiyun target -= oom_freed_pages;
263*4882a593Smuzhiyun else
264*4882a593Smuzhiyun target = 0;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun active_pages_target = total_pages - target;
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun if (min_mem_pages > active_pages_target)
269*4882a593Smuzhiyun target = total_pages - min_mem_pages;
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun if (target < 0)
272*4882a593Smuzhiyun target = 0;
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun loaned_pages_target = target;
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
277*4882a593Smuzhiyun page_loan_request, __loaned_pages, loaned_pages_target,
278*4882a593Smuzhiyun oom_freed_pages, totalram_pages());
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun static struct notifier_block cmm_oom_nb = {
282*4882a593Smuzhiyun .notifier_call = cmm_oom_notify
283*4882a593Smuzhiyun };
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /**
286*4882a593Smuzhiyun * cmm_thread - CMM task thread
287*4882a593Smuzhiyun * @dummy: not used
288*4882a593Smuzhiyun *
289*4882a593Smuzhiyun * Return value:
290*4882a593Smuzhiyun * 0
291*4882a593Smuzhiyun **/
cmm_thread(void * dummy)292*4882a593Smuzhiyun static int cmm_thread(void *dummy)
293*4882a593Smuzhiyun {
294*4882a593Smuzhiyun unsigned long timeleft;
295*4882a593Smuzhiyun long __loaned_pages;
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun while (1) {
298*4882a593Smuzhiyun timeleft = msleep_interruptible(delay * 1000);
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun if (kthread_should_stop() || timeleft)
301*4882a593Smuzhiyun break;
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun if (mutex_trylock(&hotplug_mutex)) {
304*4882a593Smuzhiyun if (hotplug_occurred) {
305*4882a593Smuzhiyun hotplug_occurred = 0;
306*4882a593Smuzhiyun mutex_unlock(&hotplug_mutex);
307*4882a593Smuzhiyun cmm_dbg("Hotplug operation has occurred, "
308*4882a593Smuzhiyun "loaning activity suspended "
309*4882a593Smuzhiyun "for %d seconds.\n",
310*4882a593Smuzhiyun hotplug_delay);
311*4882a593Smuzhiyun timeleft = msleep_interruptible(hotplug_delay *
312*4882a593Smuzhiyun 1000);
313*4882a593Smuzhiyun if (kthread_should_stop() || timeleft)
314*4882a593Smuzhiyun break;
315*4882a593Smuzhiyun continue;
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun mutex_unlock(&hotplug_mutex);
318*4882a593Smuzhiyun } else {
319*4882a593Smuzhiyun cmm_dbg("Hotplug operation in progress, activity "
320*4882a593Smuzhiyun "suspended\n");
321*4882a593Smuzhiyun continue;
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun cmm_get_mpp();
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun __loaned_pages = atomic_long_read(&loaned_pages);
327*4882a593Smuzhiyun if (loaned_pages_target > __loaned_pages) {
328*4882a593Smuzhiyun if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
329*4882a593Smuzhiyun loaned_pages_target = __loaned_pages;
330*4882a593Smuzhiyun } else if (loaned_pages_target < __loaned_pages)
331*4882a593Smuzhiyun cmm_free_pages(__loaned_pages - loaned_pages_target);
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun return 0;
334*4882a593Smuzhiyun }
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun #define CMM_SHOW(name, format, args...) \
337*4882a593Smuzhiyun static ssize_t show_##name(struct device *dev, \
338*4882a593Smuzhiyun struct device_attribute *attr, \
339*4882a593Smuzhiyun char *buf) \
340*4882a593Smuzhiyun { \
341*4882a593Smuzhiyun return sprintf(buf, format, ##args); \
342*4882a593Smuzhiyun } \
343*4882a593Smuzhiyun static DEVICE_ATTR(name, 0444, show_##name, NULL)
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
346*4882a593Smuzhiyun CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
347*4882a593Smuzhiyun
show_oom_pages(struct device * dev,struct device_attribute * attr,char * buf)348*4882a593Smuzhiyun static ssize_t show_oom_pages(struct device *dev,
349*4882a593Smuzhiyun struct device_attribute *attr, char *buf)
350*4882a593Smuzhiyun {
351*4882a593Smuzhiyun return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun
store_oom_pages(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)354*4882a593Smuzhiyun static ssize_t store_oom_pages(struct device *dev,
355*4882a593Smuzhiyun struct device_attribute *attr,
356*4882a593Smuzhiyun const char *buf, size_t count)
357*4882a593Smuzhiyun {
358*4882a593Smuzhiyun unsigned long val = simple_strtoul (buf, NULL, 10);
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun if (!capable(CAP_SYS_ADMIN))
361*4882a593Smuzhiyun return -EPERM;
362*4882a593Smuzhiyun if (val != 0)
363*4882a593Smuzhiyun return -EBADMSG;
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun oom_freed_pages = 0;
366*4882a593Smuzhiyun return count;
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun
369*4882a593Smuzhiyun static DEVICE_ATTR(oom_freed_kb, 0644,
370*4882a593Smuzhiyun show_oom_pages, store_oom_pages);
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun static struct device_attribute *cmm_attrs[] = {
373*4882a593Smuzhiyun &dev_attr_loaned_kb,
374*4882a593Smuzhiyun &dev_attr_loaned_target_kb,
375*4882a593Smuzhiyun &dev_attr_oom_freed_kb,
376*4882a593Smuzhiyun };
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
379*4882a593Smuzhiyun simulate_loan_target_kb);
380*4882a593Smuzhiyun
381*4882a593Smuzhiyun static struct bus_type cmm_subsys = {
382*4882a593Smuzhiyun .name = "cmm",
383*4882a593Smuzhiyun .dev_name = "cmm",
384*4882a593Smuzhiyun };
385*4882a593Smuzhiyun
cmm_release_device(struct device * dev)386*4882a593Smuzhiyun static void cmm_release_device(struct device *dev)
387*4882a593Smuzhiyun {
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun /**
391*4882a593Smuzhiyun * cmm_sysfs_register - Register with sysfs
392*4882a593Smuzhiyun *
393*4882a593Smuzhiyun * Return value:
394*4882a593Smuzhiyun * 0 on success / other on failure
395*4882a593Smuzhiyun **/
cmm_sysfs_register(struct device * dev)396*4882a593Smuzhiyun static int cmm_sysfs_register(struct device *dev)
397*4882a593Smuzhiyun {
398*4882a593Smuzhiyun int i, rc;
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun if ((rc = subsys_system_register(&cmm_subsys, NULL)))
401*4882a593Smuzhiyun return rc;
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun dev->id = 0;
404*4882a593Smuzhiyun dev->bus = &cmm_subsys;
405*4882a593Smuzhiyun dev->release = cmm_release_device;
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun if ((rc = device_register(dev)))
408*4882a593Smuzhiyun goto subsys_unregister;
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
411*4882a593Smuzhiyun if ((rc = device_create_file(dev, cmm_attrs[i])))
412*4882a593Smuzhiyun goto fail;
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun if (!simulate)
416*4882a593Smuzhiyun return 0;
417*4882a593Smuzhiyun rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
418*4882a593Smuzhiyun if (rc)
419*4882a593Smuzhiyun goto fail;
420*4882a593Smuzhiyun return 0;
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun fail:
423*4882a593Smuzhiyun while (--i >= 0)
424*4882a593Smuzhiyun device_remove_file(dev, cmm_attrs[i]);
425*4882a593Smuzhiyun device_unregister(dev);
426*4882a593Smuzhiyun subsys_unregister:
427*4882a593Smuzhiyun bus_unregister(&cmm_subsys);
428*4882a593Smuzhiyun return rc;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun /**
432*4882a593Smuzhiyun * cmm_unregister_sysfs - Unregister from sysfs
433*4882a593Smuzhiyun *
434*4882a593Smuzhiyun **/
cmm_unregister_sysfs(struct device * dev)435*4882a593Smuzhiyun static void cmm_unregister_sysfs(struct device *dev)
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun int i;
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
440*4882a593Smuzhiyun device_remove_file(dev, cmm_attrs[i]);
441*4882a593Smuzhiyun device_unregister(dev);
442*4882a593Smuzhiyun bus_unregister(&cmm_subsys);
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun /**
446*4882a593Smuzhiyun * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
447*4882a593Smuzhiyun *
448*4882a593Smuzhiyun **/
cmm_reboot_notifier(struct notifier_block * nb,unsigned long action,void * unused)449*4882a593Smuzhiyun static int cmm_reboot_notifier(struct notifier_block *nb,
450*4882a593Smuzhiyun unsigned long action, void *unused)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun if (action == SYS_RESTART) {
453*4882a593Smuzhiyun if (cmm_thread_ptr)
454*4882a593Smuzhiyun kthread_stop(cmm_thread_ptr);
455*4882a593Smuzhiyun cmm_thread_ptr = NULL;
456*4882a593Smuzhiyun cmm_free_pages(atomic_long_read(&loaned_pages));
457*4882a593Smuzhiyun }
458*4882a593Smuzhiyun return NOTIFY_DONE;
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun static struct notifier_block cmm_reboot_nb = {
462*4882a593Smuzhiyun .notifier_call = cmm_reboot_notifier,
463*4882a593Smuzhiyun };
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun /**
466*4882a593Smuzhiyun * cmm_memory_cb - Handle memory hotplug notifier calls
467*4882a593Smuzhiyun * @self: notifier block struct
468*4882a593Smuzhiyun * @action: action to take
469*4882a593Smuzhiyun * @arg: struct memory_notify data for handler
470*4882a593Smuzhiyun *
471*4882a593Smuzhiyun * Return value:
472*4882a593Smuzhiyun * NOTIFY_OK or notifier error based on subfunction return value
473*4882a593Smuzhiyun *
474*4882a593Smuzhiyun **/
cmm_memory_cb(struct notifier_block * self,unsigned long action,void * arg)475*4882a593Smuzhiyun static int cmm_memory_cb(struct notifier_block *self,
476*4882a593Smuzhiyun unsigned long action, void *arg)
477*4882a593Smuzhiyun {
478*4882a593Smuzhiyun int ret = 0;
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun switch (action) {
481*4882a593Smuzhiyun case MEM_GOING_OFFLINE:
482*4882a593Smuzhiyun mutex_lock(&hotplug_mutex);
483*4882a593Smuzhiyun hotplug_occurred = 1;
484*4882a593Smuzhiyun break;
485*4882a593Smuzhiyun case MEM_OFFLINE:
486*4882a593Smuzhiyun case MEM_CANCEL_OFFLINE:
487*4882a593Smuzhiyun mutex_unlock(&hotplug_mutex);
488*4882a593Smuzhiyun cmm_dbg("Memory offline operation complete.\n");
489*4882a593Smuzhiyun break;
490*4882a593Smuzhiyun case MEM_GOING_ONLINE:
491*4882a593Smuzhiyun case MEM_ONLINE:
492*4882a593Smuzhiyun case MEM_CANCEL_ONLINE:
493*4882a593Smuzhiyun break;
494*4882a593Smuzhiyun }
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun return notifier_from_errno(ret);
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun
499*4882a593Smuzhiyun static struct notifier_block cmm_mem_nb = {
500*4882a593Smuzhiyun .notifier_call = cmm_memory_cb,
501*4882a593Smuzhiyun .priority = CMM_MEM_HOTPLUG_PRI
502*4882a593Smuzhiyun };
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun #ifdef CONFIG_BALLOON_COMPACTION
505*4882a593Smuzhiyun static struct vfsmount *balloon_mnt;
506*4882a593Smuzhiyun
cmm_init_fs_context(struct fs_context * fc)507*4882a593Smuzhiyun static int cmm_init_fs_context(struct fs_context *fc)
508*4882a593Smuzhiyun {
509*4882a593Smuzhiyun return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
510*4882a593Smuzhiyun }
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun static struct file_system_type balloon_fs = {
513*4882a593Smuzhiyun .name = "ppc-cmm",
514*4882a593Smuzhiyun .init_fs_context = cmm_init_fs_context,
515*4882a593Smuzhiyun .kill_sb = kill_anon_super,
516*4882a593Smuzhiyun };
517*4882a593Smuzhiyun
cmm_migratepage(struct balloon_dev_info * b_dev_info,struct page * newpage,struct page * page,enum migrate_mode mode)518*4882a593Smuzhiyun static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
519*4882a593Smuzhiyun struct page *newpage, struct page *page,
520*4882a593Smuzhiyun enum migrate_mode mode)
521*4882a593Smuzhiyun {
522*4882a593Smuzhiyun unsigned long flags;
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun /*
525*4882a593Smuzhiyun * loan/"inflate" the newpage first.
526*4882a593Smuzhiyun *
527*4882a593Smuzhiyun * We might race against the cmm_thread who might discover after our
528*4882a593Smuzhiyun * loan request that another page is to be unloaned. However, once
529*4882a593Smuzhiyun * the cmm_thread runs again later, this error will automatically
530*4882a593Smuzhiyun * be corrected.
531*4882a593Smuzhiyun */
532*4882a593Smuzhiyun if (plpar_page_set_loaned(newpage)) {
533*4882a593Smuzhiyun /* Unlikely, but possible. Tell the caller not to retry now. */
534*4882a593Smuzhiyun pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
535*4882a593Smuzhiyun return -EBUSY;
536*4882a593Smuzhiyun }
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun /* balloon page list reference */
539*4882a593Smuzhiyun get_page(newpage);
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun /*
542*4882a593Smuzhiyun * When we migrate a page to a different zone, we have to fixup the
543*4882a593Smuzhiyun * count of both involved zones as we adjusted the managed page count
544*4882a593Smuzhiyun * when inflating.
545*4882a593Smuzhiyun */
546*4882a593Smuzhiyun if (page_zone(page) != page_zone(newpage)) {
547*4882a593Smuzhiyun adjust_managed_page_count(page, 1);
548*4882a593Smuzhiyun adjust_managed_page_count(newpage, -1);
549*4882a593Smuzhiyun }
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun spin_lock_irqsave(&b_dev_info->pages_lock, flags);
552*4882a593Smuzhiyun balloon_page_insert(b_dev_info, newpage);
553*4882a593Smuzhiyun balloon_page_delete(page);
554*4882a593Smuzhiyun b_dev_info->isolated_pages--;
555*4882a593Smuzhiyun spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
556*4882a593Smuzhiyun
557*4882a593Smuzhiyun /*
558*4882a593Smuzhiyun * activate/"deflate" the old page. We ignore any errors just like the
559*4882a593Smuzhiyun * other callers.
560*4882a593Smuzhiyun */
561*4882a593Smuzhiyun plpar_page_set_active(page);
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun /* balloon page list reference */
564*4882a593Smuzhiyun put_page(page);
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun return MIGRATEPAGE_SUCCESS;
567*4882a593Smuzhiyun }
568*4882a593Smuzhiyun
cmm_balloon_compaction_init(void)569*4882a593Smuzhiyun static int cmm_balloon_compaction_init(void)
570*4882a593Smuzhiyun {
571*4882a593Smuzhiyun int rc;
572*4882a593Smuzhiyun
573*4882a593Smuzhiyun balloon_devinfo_init(&b_dev_info);
574*4882a593Smuzhiyun b_dev_info.migratepage = cmm_migratepage;
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun balloon_mnt = kern_mount(&balloon_fs);
577*4882a593Smuzhiyun if (IS_ERR(balloon_mnt)) {
578*4882a593Smuzhiyun rc = PTR_ERR(balloon_mnt);
579*4882a593Smuzhiyun balloon_mnt = NULL;
580*4882a593Smuzhiyun return rc;
581*4882a593Smuzhiyun }
582*4882a593Smuzhiyun
583*4882a593Smuzhiyun b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
584*4882a593Smuzhiyun if (IS_ERR(b_dev_info.inode)) {
585*4882a593Smuzhiyun rc = PTR_ERR(b_dev_info.inode);
586*4882a593Smuzhiyun b_dev_info.inode = NULL;
587*4882a593Smuzhiyun kern_unmount(balloon_mnt);
588*4882a593Smuzhiyun balloon_mnt = NULL;
589*4882a593Smuzhiyun return rc;
590*4882a593Smuzhiyun }
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
593*4882a593Smuzhiyun return 0;
594*4882a593Smuzhiyun }
cmm_balloon_compaction_deinit(void)595*4882a593Smuzhiyun static void cmm_balloon_compaction_deinit(void)
596*4882a593Smuzhiyun {
597*4882a593Smuzhiyun if (b_dev_info.inode)
598*4882a593Smuzhiyun iput(b_dev_info.inode);
599*4882a593Smuzhiyun b_dev_info.inode = NULL;
600*4882a593Smuzhiyun kern_unmount(balloon_mnt);
601*4882a593Smuzhiyun balloon_mnt = NULL;
602*4882a593Smuzhiyun }
603*4882a593Smuzhiyun #else /* CONFIG_BALLOON_COMPACTION */
cmm_balloon_compaction_init(void)604*4882a593Smuzhiyun static int cmm_balloon_compaction_init(void)
605*4882a593Smuzhiyun {
606*4882a593Smuzhiyun return 0;
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun
cmm_balloon_compaction_deinit(void)609*4882a593Smuzhiyun static void cmm_balloon_compaction_deinit(void)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun #endif /* CONFIG_BALLOON_COMPACTION */
613*4882a593Smuzhiyun
614*4882a593Smuzhiyun /**
615*4882a593Smuzhiyun * cmm_init - Module initialization
616*4882a593Smuzhiyun *
617*4882a593Smuzhiyun * Return value:
618*4882a593Smuzhiyun * 0 on success / other on failure
619*4882a593Smuzhiyun **/
cmm_init(void)620*4882a593Smuzhiyun static int cmm_init(void)
621*4882a593Smuzhiyun {
622*4882a593Smuzhiyun int rc;
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
625*4882a593Smuzhiyun return -EOPNOTSUPP;
626*4882a593Smuzhiyun
627*4882a593Smuzhiyun rc = cmm_balloon_compaction_init();
628*4882a593Smuzhiyun if (rc)
629*4882a593Smuzhiyun return rc;
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun rc = register_oom_notifier(&cmm_oom_nb);
632*4882a593Smuzhiyun if (rc < 0)
633*4882a593Smuzhiyun goto out_balloon_compaction;
634*4882a593Smuzhiyun
635*4882a593Smuzhiyun if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
636*4882a593Smuzhiyun goto out_oom_notifier;
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun if ((rc = cmm_sysfs_register(&cmm_dev)))
639*4882a593Smuzhiyun goto out_reboot_notifier;
640*4882a593Smuzhiyun
641*4882a593Smuzhiyun rc = register_memory_notifier(&cmm_mem_nb);
642*4882a593Smuzhiyun if (rc)
643*4882a593Smuzhiyun goto out_unregister_notifier;
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun if (cmm_disabled)
646*4882a593Smuzhiyun return 0;
647*4882a593Smuzhiyun
648*4882a593Smuzhiyun cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
649*4882a593Smuzhiyun if (IS_ERR(cmm_thread_ptr)) {
650*4882a593Smuzhiyun rc = PTR_ERR(cmm_thread_ptr);
651*4882a593Smuzhiyun goto out_unregister_notifier;
652*4882a593Smuzhiyun }
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun return 0;
655*4882a593Smuzhiyun out_unregister_notifier:
656*4882a593Smuzhiyun unregister_memory_notifier(&cmm_mem_nb);
657*4882a593Smuzhiyun cmm_unregister_sysfs(&cmm_dev);
658*4882a593Smuzhiyun out_reboot_notifier:
659*4882a593Smuzhiyun unregister_reboot_notifier(&cmm_reboot_nb);
660*4882a593Smuzhiyun out_oom_notifier:
661*4882a593Smuzhiyun unregister_oom_notifier(&cmm_oom_nb);
662*4882a593Smuzhiyun out_balloon_compaction:
663*4882a593Smuzhiyun cmm_balloon_compaction_deinit();
664*4882a593Smuzhiyun return rc;
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun
667*4882a593Smuzhiyun /**
668*4882a593Smuzhiyun * cmm_exit - Module exit
669*4882a593Smuzhiyun *
670*4882a593Smuzhiyun * Return value:
671*4882a593Smuzhiyun * nothing
672*4882a593Smuzhiyun **/
cmm_exit(void)673*4882a593Smuzhiyun static void cmm_exit(void)
674*4882a593Smuzhiyun {
675*4882a593Smuzhiyun if (cmm_thread_ptr)
676*4882a593Smuzhiyun kthread_stop(cmm_thread_ptr);
677*4882a593Smuzhiyun unregister_oom_notifier(&cmm_oom_nb);
678*4882a593Smuzhiyun unregister_reboot_notifier(&cmm_reboot_nb);
679*4882a593Smuzhiyun unregister_memory_notifier(&cmm_mem_nb);
680*4882a593Smuzhiyun cmm_free_pages(atomic_long_read(&loaned_pages));
681*4882a593Smuzhiyun cmm_unregister_sysfs(&cmm_dev);
682*4882a593Smuzhiyun cmm_balloon_compaction_deinit();
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun
685*4882a593Smuzhiyun /**
686*4882a593Smuzhiyun * cmm_set_disable - Disable/Enable CMM
687*4882a593Smuzhiyun *
688*4882a593Smuzhiyun * Return value:
689*4882a593Smuzhiyun * 0 on success / other on failure
690*4882a593Smuzhiyun **/
cmm_set_disable(const char * val,const struct kernel_param * kp)691*4882a593Smuzhiyun static int cmm_set_disable(const char *val, const struct kernel_param *kp)
692*4882a593Smuzhiyun {
693*4882a593Smuzhiyun int disable = simple_strtoul(val, NULL, 10);
694*4882a593Smuzhiyun
695*4882a593Smuzhiyun if (disable != 0 && disable != 1)
696*4882a593Smuzhiyun return -EINVAL;
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun if (disable && !cmm_disabled) {
699*4882a593Smuzhiyun if (cmm_thread_ptr)
700*4882a593Smuzhiyun kthread_stop(cmm_thread_ptr);
701*4882a593Smuzhiyun cmm_thread_ptr = NULL;
702*4882a593Smuzhiyun cmm_free_pages(atomic_long_read(&loaned_pages));
703*4882a593Smuzhiyun } else if (!disable && cmm_disabled) {
704*4882a593Smuzhiyun cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
705*4882a593Smuzhiyun if (IS_ERR(cmm_thread_ptr))
706*4882a593Smuzhiyun return PTR_ERR(cmm_thread_ptr);
707*4882a593Smuzhiyun }
708*4882a593Smuzhiyun
709*4882a593Smuzhiyun cmm_disabled = disable;
710*4882a593Smuzhiyun return 0;
711*4882a593Smuzhiyun }
712*4882a593Smuzhiyun
713*4882a593Smuzhiyun module_param_call(disable, cmm_set_disable, param_get_uint,
714*4882a593Smuzhiyun &cmm_disabled, 0644);
715*4882a593Smuzhiyun MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
716*4882a593Smuzhiyun "[Default=" __stringify(CMM_DISABLE) "]");
717*4882a593Smuzhiyun
718*4882a593Smuzhiyun module_init(cmm_init);
719*4882a593Smuzhiyun module_exit(cmm_exit);
720