xref: /OK3568_Linux_fs/kernel/arch/powerpc/platforms/pseries/cmm.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Collaborative memory management interface.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (C) 2008 IBM Corporation
6*4882a593Smuzhiyun  * Author(s): Brian King (brking@linux.vnet.ibm.com),
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include <linux/ctype.h>
10*4882a593Smuzhiyun #include <linux/delay.h>
11*4882a593Smuzhiyun #include <linux/errno.h>
12*4882a593Smuzhiyun #include <linux/fs.h>
13*4882a593Smuzhiyun #include <linux/gfp.h>
14*4882a593Smuzhiyun #include <linux/kthread.h>
15*4882a593Smuzhiyun #include <linux/module.h>
16*4882a593Smuzhiyun #include <linux/oom.h>
17*4882a593Smuzhiyun #include <linux/reboot.h>
18*4882a593Smuzhiyun #include <linux/sched.h>
19*4882a593Smuzhiyun #include <linux/stringify.h>
20*4882a593Smuzhiyun #include <linux/swap.h>
21*4882a593Smuzhiyun #include <linux/device.h>
22*4882a593Smuzhiyun #include <linux/mount.h>
23*4882a593Smuzhiyun #include <linux/pseudo_fs.h>
24*4882a593Smuzhiyun #include <linux/magic.h>
25*4882a593Smuzhiyun #include <linux/balloon_compaction.h>
26*4882a593Smuzhiyun #include <asm/firmware.h>
27*4882a593Smuzhiyun #include <asm/hvcall.h>
28*4882a593Smuzhiyun #include <asm/mmu.h>
29*4882a593Smuzhiyun #include <linux/uaccess.h>
30*4882a593Smuzhiyun #include <linux/memory.h>
31*4882a593Smuzhiyun #include <asm/plpar_wrappers.h>
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun #include "pseries.h"
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun #define CMM_DRIVER_VERSION	"1.0.0"
36*4882a593Smuzhiyun #define CMM_DEFAULT_DELAY	1
37*4882a593Smuzhiyun #define CMM_HOTPLUG_DELAY	5
38*4882a593Smuzhiyun #define CMM_DEBUG			0
39*4882a593Smuzhiyun #define CMM_DISABLE		0
40*4882a593Smuzhiyun #define CMM_OOM_KB		1024
41*4882a593Smuzhiyun #define CMM_MIN_MEM_MB		256
42*4882a593Smuzhiyun #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
43*4882a593Smuzhiyun #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun #define CMM_MEM_HOTPLUG_PRI	1
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun static unsigned int delay = CMM_DEFAULT_DELAY;
48*4882a593Smuzhiyun static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
49*4882a593Smuzhiyun static unsigned int oom_kb = CMM_OOM_KB;
50*4882a593Smuzhiyun static unsigned int cmm_debug = CMM_DEBUG;
51*4882a593Smuzhiyun static unsigned int cmm_disabled = CMM_DISABLE;
52*4882a593Smuzhiyun static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
53*4882a593Smuzhiyun static bool __read_mostly simulate;
54*4882a593Smuzhiyun static unsigned long simulate_loan_target_kb;
55*4882a593Smuzhiyun static struct device cmm_dev;
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
58*4882a593Smuzhiyun MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
59*4882a593Smuzhiyun MODULE_LICENSE("GPL");
60*4882a593Smuzhiyun MODULE_VERSION(CMM_DRIVER_VERSION);
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun module_param_named(delay, delay, uint, 0644);
63*4882a593Smuzhiyun MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
64*4882a593Smuzhiyun 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
65*4882a593Smuzhiyun module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
66*4882a593Smuzhiyun MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
67*4882a593Smuzhiyun 		 "before loaning resumes. "
68*4882a593Smuzhiyun 		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
69*4882a593Smuzhiyun module_param_named(oom_kb, oom_kb, uint, 0644);
70*4882a593Smuzhiyun MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
71*4882a593Smuzhiyun 		 "[Default=" __stringify(CMM_OOM_KB) "]");
72*4882a593Smuzhiyun module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
73*4882a593Smuzhiyun MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
74*4882a593Smuzhiyun 		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
75*4882a593Smuzhiyun module_param_named(debug, cmm_debug, uint, 0644);
76*4882a593Smuzhiyun MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
77*4882a593Smuzhiyun 		 "[Default=" __stringify(CMM_DEBUG) "]");
78*4882a593Smuzhiyun module_param_named(simulate, simulate, bool, 0444);
79*4882a593Smuzhiyun MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun static atomic_long_t loaned_pages;
84*4882a593Smuzhiyun static unsigned long loaned_pages_target;
85*4882a593Smuzhiyun static unsigned long oom_freed_pages;
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun static DEFINE_MUTEX(hotplug_mutex);
88*4882a593Smuzhiyun static int hotplug_occurred; /* protected by the hotplug mutex */
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun static struct task_struct *cmm_thread_ptr;
91*4882a593Smuzhiyun static struct balloon_dev_info b_dev_info;
92*4882a593Smuzhiyun 
plpar_page_set_loaned(struct page * page)93*4882a593Smuzhiyun static long plpar_page_set_loaned(struct page *page)
94*4882a593Smuzhiyun {
95*4882a593Smuzhiyun 	const unsigned long vpa = page_to_phys(page);
96*4882a593Smuzhiyun 	unsigned long cmo_page_sz = cmo_get_page_size();
97*4882a593Smuzhiyun 	long rc = 0;
98*4882a593Smuzhiyun 	int i;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	if (unlikely(simulate))
101*4882a593Smuzhiyun 		return 0;
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
104*4882a593Smuzhiyun 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
107*4882a593Smuzhiyun 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
108*4882a593Smuzhiyun 				   vpa + i - cmo_page_sz, 0);
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	return rc;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
plpar_page_set_active(struct page * page)113*4882a593Smuzhiyun static long plpar_page_set_active(struct page *page)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun 	const unsigned long vpa = page_to_phys(page);
116*4882a593Smuzhiyun 	unsigned long cmo_page_sz = cmo_get_page_size();
117*4882a593Smuzhiyun 	long rc = 0;
118*4882a593Smuzhiyun 	int i;
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	if (unlikely(simulate))
121*4882a593Smuzhiyun 		return 0;
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
124*4882a593Smuzhiyun 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
127*4882a593Smuzhiyun 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
128*4882a593Smuzhiyun 				   vpa + i - cmo_page_sz, 0);
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	return rc;
131*4882a593Smuzhiyun }
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun /**
134*4882a593Smuzhiyun  * cmm_alloc_pages - Allocate pages and mark them as loaned
135*4882a593Smuzhiyun  * @nr:	number of pages to allocate
136*4882a593Smuzhiyun  *
137*4882a593Smuzhiyun  * Return value:
138*4882a593Smuzhiyun  * 	number of pages requested to be allocated which were not
139*4882a593Smuzhiyun  **/
cmm_alloc_pages(long nr)140*4882a593Smuzhiyun static long cmm_alloc_pages(long nr)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun 	struct page *page;
143*4882a593Smuzhiyun 	long rc;
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	cmm_dbg("Begin request for %ld pages\n", nr);
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	while (nr) {
148*4882a593Smuzhiyun 		/* Exit if a hotplug operation is in progress or occurred */
149*4882a593Smuzhiyun 		if (mutex_trylock(&hotplug_mutex)) {
150*4882a593Smuzhiyun 			if (hotplug_occurred) {
151*4882a593Smuzhiyun 				mutex_unlock(&hotplug_mutex);
152*4882a593Smuzhiyun 				break;
153*4882a593Smuzhiyun 			}
154*4882a593Smuzhiyun 			mutex_unlock(&hotplug_mutex);
155*4882a593Smuzhiyun 		} else {
156*4882a593Smuzhiyun 			break;
157*4882a593Smuzhiyun 		}
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 		page = balloon_page_alloc();
160*4882a593Smuzhiyun 		if (!page)
161*4882a593Smuzhiyun 			break;
162*4882a593Smuzhiyun 		rc = plpar_page_set_loaned(page);
163*4882a593Smuzhiyun 		if (rc) {
164*4882a593Smuzhiyun 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
165*4882a593Smuzhiyun 			__free_page(page);
166*4882a593Smuzhiyun 			break;
167*4882a593Smuzhiyun 		}
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 		balloon_page_enqueue(&b_dev_info, page);
170*4882a593Smuzhiyun 		atomic_long_inc(&loaned_pages);
171*4882a593Smuzhiyun 		adjust_managed_page_count(page, -1);
172*4882a593Smuzhiyun 		nr--;
173*4882a593Smuzhiyun 	}
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
176*4882a593Smuzhiyun 	return nr;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun /**
180*4882a593Smuzhiyun  * cmm_free_pages - Free pages and mark them as active
181*4882a593Smuzhiyun  * @nr:	number of pages to free
182*4882a593Smuzhiyun  *
183*4882a593Smuzhiyun  * Return value:
184*4882a593Smuzhiyun  * 	number of pages requested to be freed which were not
185*4882a593Smuzhiyun  **/
cmm_free_pages(long nr)186*4882a593Smuzhiyun static long cmm_free_pages(long nr)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun 	struct page *page;
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	cmm_dbg("Begin free of %ld pages.\n", nr);
191*4882a593Smuzhiyun 	while (nr) {
192*4882a593Smuzhiyun 		page = balloon_page_dequeue(&b_dev_info);
193*4882a593Smuzhiyun 		if (!page)
194*4882a593Smuzhiyun 			break;
195*4882a593Smuzhiyun 		plpar_page_set_active(page);
196*4882a593Smuzhiyun 		adjust_managed_page_count(page, 1);
197*4882a593Smuzhiyun 		__free_page(page);
198*4882a593Smuzhiyun 		atomic_long_dec(&loaned_pages);
199*4882a593Smuzhiyun 		nr--;
200*4882a593Smuzhiyun 	}
201*4882a593Smuzhiyun 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
202*4882a593Smuzhiyun 	return nr;
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun /**
206*4882a593Smuzhiyun  * cmm_oom_notify - OOM notifier
207*4882a593Smuzhiyun  * @self:	notifier block struct
208*4882a593Smuzhiyun  * @dummy:	not used
209*4882a593Smuzhiyun  * @parm:	returned - number of pages freed
210*4882a593Smuzhiyun  *
211*4882a593Smuzhiyun  * Return value:
212*4882a593Smuzhiyun  * 	NOTIFY_OK
213*4882a593Smuzhiyun  **/
cmm_oom_notify(struct notifier_block * self,unsigned long dummy,void * parm)214*4882a593Smuzhiyun static int cmm_oom_notify(struct notifier_block *self,
215*4882a593Smuzhiyun 			  unsigned long dummy, void *parm)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun 	unsigned long *freed = parm;
218*4882a593Smuzhiyun 	long nr = KB2PAGES(oom_kb);
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	cmm_dbg("OOM processing started\n");
221*4882a593Smuzhiyun 	nr = cmm_free_pages(nr);
222*4882a593Smuzhiyun 	loaned_pages_target = atomic_long_read(&loaned_pages);
223*4882a593Smuzhiyun 	*freed += KB2PAGES(oom_kb) - nr;
224*4882a593Smuzhiyun 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
225*4882a593Smuzhiyun 	cmm_dbg("OOM processing complete\n");
226*4882a593Smuzhiyun 	return NOTIFY_OK;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun /**
230*4882a593Smuzhiyun  * cmm_get_mpp - Read memory performance parameters
231*4882a593Smuzhiyun  *
232*4882a593Smuzhiyun  * Makes hcall to query the current page loan request from the hypervisor.
233*4882a593Smuzhiyun  *
234*4882a593Smuzhiyun  * Return value:
235*4882a593Smuzhiyun  * 	nothing
236*4882a593Smuzhiyun  **/
cmm_get_mpp(void)237*4882a593Smuzhiyun static void cmm_get_mpp(void)
238*4882a593Smuzhiyun {
239*4882a593Smuzhiyun 	const long __loaned_pages = atomic_long_read(&loaned_pages);
240*4882a593Smuzhiyun 	const long total_pages = totalram_pages() + __loaned_pages;
241*4882a593Smuzhiyun 	int rc;
242*4882a593Smuzhiyun 	struct hvcall_mpp_data mpp_data;
243*4882a593Smuzhiyun 	signed long active_pages_target, page_loan_request, target;
244*4882a593Smuzhiyun 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	if (likely(!simulate)) {
247*4882a593Smuzhiyun 		rc = h_get_mpp(&mpp_data);
248*4882a593Smuzhiyun 		if (rc != H_SUCCESS)
249*4882a593Smuzhiyun 			return;
250*4882a593Smuzhiyun 		page_loan_request = div_s64((s64)mpp_data.loan_request,
251*4882a593Smuzhiyun 					    PAGE_SIZE);
252*4882a593Smuzhiyun 		target = page_loan_request + __loaned_pages;
253*4882a593Smuzhiyun 	} else {
254*4882a593Smuzhiyun 		target = KB2PAGES(simulate_loan_target_kb);
255*4882a593Smuzhiyun 		page_loan_request = target - __loaned_pages;
256*4882a593Smuzhiyun 	}
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	if (target < 0 || total_pages < min_mem_pages)
259*4882a593Smuzhiyun 		target = 0;
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	if (target > oom_freed_pages)
262*4882a593Smuzhiyun 		target -= oom_freed_pages;
263*4882a593Smuzhiyun 	else
264*4882a593Smuzhiyun 		target = 0;
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	active_pages_target = total_pages - target;
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	if (min_mem_pages > active_pages_target)
269*4882a593Smuzhiyun 		target = total_pages - min_mem_pages;
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	if (target < 0)
272*4882a593Smuzhiyun 		target = 0;
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 	loaned_pages_target = target;
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
277*4882a593Smuzhiyun 		page_loan_request, __loaned_pages, loaned_pages_target,
278*4882a593Smuzhiyun 		oom_freed_pages, totalram_pages());
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun static struct notifier_block cmm_oom_nb = {
282*4882a593Smuzhiyun 	.notifier_call = cmm_oom_notify
283*4882a593Smuzhiyun };
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun /**
286*4882a593Smuzhiyun  * cmm_thread - CMM task thread
287*4882a593Smuzhiyun  * @dummy:	not used
288*4882a593Smuzhiyun  *
289*4882a593Smuzhiyun  * Return value:
290*4882a593Smuzhiyun  * 	0
291*4882a593Smuzhiyun  **/
cmm_thread(void * dummy)292*4882a593Smuzhiyun static int cmm_thread(void *dummy)
293*4882a593Smuzhiyun {
294*4882a593Smuzhiyun 	unsigned long timeleft;
295*4882a593Smuzhiyun 	long __loaned_pages;
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	while (1) {
298*4882a593Smuzhiyun 		timeleft = msleep_interruptible(delay * 1000);
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 		if (kthread_should_stop() || timeleft)
301*4882a593Smuzhiyun 			break;
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 		if (mutex_trylock(&hotplug_mutex)) {
304*4882a593Smuzhiyun 			if (hotplug_occurred) {
305*4882a593Smuzhiyun 				hotplug_occurred = 0;
306*4882a593Smuzhiyun 				mutex_unlock(&hotplug_mutex);
307*4882a593Smuzhiyun 				cmm_dbg("Hotplug operation has occurred, "
308*4882a593Smuzhiyun 						"loaning activity suspended "
309*4882a593Smuzhiyun 						"for %d seconds.\n",
310*4882a593Smuzhiyun 						hotplug_delay);
311*4882a593Smuzhiyun 				timeleft = msleep_interruptible(hotplug_delay *
312*4882a593Smuzhiyun 						1000);
313*4882a593Smuzhiyun 				if (kthread_should_stop() || timeleft)
314*4882a593Smuzhiyun 					break;
315*4882a593Smuzhiyun 				continue;
316*4882a593Smuzhiyun 			}
317*4882a593Smuzhiyun 			mutex_unlock(&hotplug_mutex);
318*4882a593Smuzhiyun 		} else {
319*4882a593Smuzhiyun 			cmm_dbg("Hotplug operation in progress, activity "
320*4882a593Smuzhiyun 					"suspended\n");
321*4882a593Smuzhiyun 			continue;
322*4882a593Smuzhiyun 		}
323*4882a593Smuzhiyun 
324*4882a593Smuzhiyun 		cmm_get_mpp();
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 		__loaned_pages = atomic_long_read(&loaned_pages);
327*4882a593Smuzhiyun 		if (loaned_pages_target > __loaned_pages) {
328*4882a593Smuzhiyun 			if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
329*4882a593Smuzhiyun 				loaned_pages_target = __loaned_pages;
330*4882a593Smuzhiyun 		} else if (loaned_pages_target < __loaned_pages)
331*4882a593Smuzhiyun 			cmm_free_pages(__loaned_pages - loaned_pages_target);
332*4882a593Smuzhiyun 	}
333*4882a593Smuzhiyun 	return 0;
334*4882a593Smuzhiyun }
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun #define CMM_SHOW(name, format, args...)			\
337*4882a593Smuzhiyun 	static ssize_t show_##name(struct device *dev,	\
338*4882a593Smuzhiyun 				   struct device_attribute *attr,	\
339*4882a593Smuzhiyun 				   char *buf)			\
340*4882a593Smuzhiyun 	{							\
341*4882a593Smuzhiyun 		return sprintf(buf, format, ##args);		\
342*4882a593Smuzhiyun 	}							\
343*4882a593Smuzhiyun 	static DEVICE_ATTR(name, 0444, show_##name, NULL)
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
346*4882a593Smuzhiyun CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
347*4882a593Smuzhiyun 
show_oom_pages(struct device * dev,struct device_attribute * attr,char * buf)348*4882a593Smuzhiyun static ssize_t show_oom_pages(struct device *dev,
349*4882a593Smuzhiyun 			      struct device_attribute *attr, char *buf)
350*4882a593Smuzhiyun {
351*4882a593Smuzhiyun 	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
352*4882a593Smuzhiyun }
353*4882a593Smuzhiyun 
store_oom_pages(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)354*4882a593Smuzhiyun static ssize_t store_oom_pages(struct device *dev,
355*4882a593Smuzhiyun 			       struct device_attribute *attr,
356*4882a593Smuzhiyun 			       const char *buf, size_t count)
357*4882a593Smuzhiyun {
358*4882a593Smuzhiyun 	unsigned long val = simple_strtoul (buf, NULL, 10);
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	if (!capable(CAP_SYS_ADMIN))
361*4882a593Smuzhiyun 		return -EPERM;
362*4882a593Smuzhiyun 	if (val != 0)
363*4882a593Smuzhiyun 		return -EBADMSG;
364*4882a593Smuzhiyun 
365*4882a593Smuzhiyun 	oom_freed_pages = 0;
366*4882a593Smuzhiyun 	return count;
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun 
369*4882a593Smuzhiyun static DEVICE_ATTR(oom_freed_kb, 0644,
370*4882a593Smuzhiyun 		   show_oom_pages, store_oom_pages);
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun static struct device_attribute *cmm_attrs[] = {
373*4882a593Smuzhiyun 	&dev_attr_loaned_kb,
374*4882a593Smuzhiyun 	&dev_attr_loaned_target_kb,
375*4882a593Smuzhiyun 	&dev_attr_oom_freed_kb,
376*4882a593Smuzhiyun };
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
379*4882a593Smuzhiyun 			 simulate_loan_target_kb);
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun static struct bus_type cmm_subsys = {
382*4882a593Smuzhiyun 	.name = "cmm",
383*4882a593Smuzhiyun 	.dev_name = "cmm",
384*4882a593Smuzhiyun };
385*4882a593Smuzhiyun 
cmm_release_device(struct device * dev)386*4882a593Smuzhiyun static void cmm_release_device(struct device *dev)
387*4882a593Smuzhiyun {
388*4882a593Smuzhiyun }
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun /**
391*4882a593Smuzhiyun  * cmm_sysfs_register - Register with sysfs
392*4882a593Smuzhiyun  *
393*4882a593Smuzhiyun  * Return value:
394*4882a593Smuzhiyun  * 	0 on success / other on failure
395*4882a593Smuzhiyun  **/
cmm_sysfs_register(struct device * dev)396*4882a593Smuzhiyun static int cmm_sysfs_register(struct device *dev)
397*4882a593Smuzhiyun {
398*4882a593Smuzhiyun 	int i, rc;
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
401*4882a593Smuzhiyun 		return rc;
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun 	dev->id = 0;
404*4882a593Smuzhiyun 	dev->bus = &cmm_subsys;
405*4882a593Smuzhiyun 	dev->release = cmm_release_device;
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 	if ((rc = device_register(dev)))
408*4882a593Smuzhiyun 		goto subsys_unregister;
409*4882a593Smuzhiyun 
410*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
411*4882a593Smuzhiyun 		if ((rc = device_create_file(dev, cmm_attrs[i])))
412*4882a593Smuzhiyun 			goto fail;
413*4882a593Smuzhiyun 	}
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 	if (!simulate)
416*4882a593Smuzhiyun 		return 0;
417*4882a593Smuzhiyun 	rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
418*4882a593Smuzhiyun 	if (rc)
419*4882a593Smuzhiyun 		goto fail;
420*4882a593Smuzhiyun 	return 0;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun fail:
423*4882a593Smuzhiyun 	while (--i >= 0)
424*4882a593Smuzhiyun 		device_remove_file(dev, cmm_attrs[i]);
425*4882a593Smuzhiyun 	device_unregister(dev);
426*4882a593Smuzhiyun subsys_unregister:
427*4882a593Smuzhiyun 	bus_unregister(&cmm_subsys);
428*4882a593Smuzhiyun 	return rc;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun /**
432*4882a593Smuzhiyun  * cmm_unregister_sysfs - Unregister from sysfs
433*4882a593Smuzhiyun  *
434*4882a593Smuzhiyun  **/
cmm_unregister_sysfs(struct device * dev)435*4882a593Smuzhiyun static void cmm_unregister_sysfs(struct device *dev)
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun 	int i;
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
440*4882a593Smuzhiyun 		device_remove_file(dev, cmm_attrs[i]);
441*4882a593Smuzhiyun 	device_unregister(dev);
442*4882a593Smuzhiyun 	bus_unregister(&cmm_subsys);
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun 
445*4882a593Smuzhiyun /**
446*4882a593Smuzhiyun  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
447*4882a593Smuzhiyun  *
448*4882a593Smuzhiyun  **/
cmm_reboot_notifier(struct notifier_block * nb,unsigned long action,void * unused)449*4882a593Smuzhiyun static int cmm_reboot_notifier(struct notifier_block *nb,
450*4882a593Smuzhiyun 			       unsigned long action, void *unused)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun 	if (action == SYS_RESTART) {
453*4882a593Smuzhiyun 		if (cmm_thread_ptr)
454*4882a593Smuzhiyun 			kthread_stop(cmm_thread_ptr);
455*4882a593Smuzhiyun 		cmm_thread_ptr = NULL;
456*4882a593Smuzhiyun 		cmm_free_pages(atomic_long_read(&loaned_pages));
457*4882a593Smuzhiyun 	}
458*4882a593Smuzhiyun 	return NOTIFY_DONE;
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun static struct notifier_block cmm_reboot_nb = {
462*4882a593Smuzhiyun 	.notifier_call = cmm_reboot_notifier,
463*4882a593Smuzhiyun };
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun /**
466*4882a593Smuzhiyun  * cmm_memory_cb - Handle memory hotplug notifier calls
467*4882a593Smuzhiyun  * @self:	notifier block struct
468*4882a593Smuzhiyun  * @action:	action to take
469*4882a593Smuzhiyun  * @arg:	struct memory_notify data for handler
470*4882a593Smuzhiyun  *
471*4882a593Smuzhiyun  * Return value:
472*4882a593Smuzhiyun  *	NOTIFY_OK or notifier error based on subfunction return value
473*4882a593Smuzhiyun  *
474*4882a593Smuzhiyun  **/
cmm_memory_cb(struct notifier_block * self,unsigned long action,void * arg)475*4882a593Smuzhiyun static int cmm_memory_cb(struct notifier_block *self,
476*4882a593Smuzhiyun 			unsigned long action, void *arg)
477*4882a593Smuzhiyun {
478*4882a593Smuzhiyun 	int ret = 0;
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 	switch (action) {
481*4882a593Smuzhiyun 	case MEM_GOING_OFFLINE:
482*4882a593Smuzhiyun 		mutex_lock(&hotplug_mutex);
483*4882a593Smuzhiyun 		hotplug_occurred = 1;
484*4882a593Smuzhiyun 		break;
485*4882a593Smuzhiyun 	case MEM_OFFLINE:
486*4882a593Smuzhiyun 	case MEM_CANCEL_OFFLINE:
487*4882a593Smuzhiyun 		mutex_unlock(&hotplug_mutex);
488*4882a593Smuzhiyun 		cmm_dbg("Memory offline operation complete.\n");
489*4882a593Smuzhiyun 		break;
490*4882a593Smuzhiyun 	case MEM_GOING_ONLINE:
491*4882a593Smuzhiyun 	case MEM_ONLINE:
492*4882a593Smuzhiyun 	case MEM_CANCEL_ONLINE:
493*4882a593Smuzhiyun 		break;
494*4882a593Smuzhiyun 	}
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun 	return notifier_from_errno(ret);
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun static struct notifier_block cmm_mem_nb = {
500*4882a593Smuzhiyun 	.notifier_call = cmm_memory_cb,
501*4882a593Smuzhiyun 	.priority = CMM_MEM_HOTPLUG_PRI
502*4882a593Smuzhiyun };
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun #ifdef CONFIG_BALLOON_COMPACTION
505*4882a593Smuzhiyun static struct vfsmount *balloon_mnt;
506*4882a593Smuzhiyun 
cmm_init_fs_context(struct fs_context * fc)507*4882a593Smuzhiyun static int cmm_init_fs_context(struct fs_context *fc)
508*4882a593Smuzhiyun {
509*4882a593Smuzhiyun 	return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
510*4882a593Smuzhiyun }
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun static struct file_system_type balloon_fs = {
513*4882a593Smuzhiyun 	.name = "ppc-cmm",
514*4882a593Smuzhiyun 	.init_fs_context = cmm_init_fs_context,
515*4882a593Smuzhiyun 	.kill_sb = kill_anon_super,
516*4882a593Smuzhiyun };
517*4882a593Smuzhiyun 
cmm_migratepage(struct balloon_dev_info * b_dev_info,struct page * newpage,struct page * page,enum migrate_mode mode)518*4882a593Smuzhiyun static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
519*4882a593Smuzhiyun 			   struct page *newpage, struct page *page,
520*4882a593Smuzhiyun 			   enum migrate_mode mode)
521*4882a593Smuzhiyun {
522*4882a593Smuzhiyun 	unsigned long flags;
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 	/*
525*4882a593Smuzhiyun 	 * loan/"inflate" the newpage first.
526*4882a593Smuzhiyun 	 *
527*4882a593Smuzhiyun 	 * We might race against the cmm_thread who might discover after our
528*4882a593Smuzhiyun 	 * loan request that another page is to be unloaned. However, once
529*4882a593Smuzhiyun 	 * the cmm_thread runs again later, this error will automatically
530*4882a593Smuzhiyun 	 * be corrected.
531*4882a593Smuzhiyun 	 */
532*4882a593Smuzhiyun 	if (plpar_page_set_loaned(newpage)) {
533*4882a593Smuzhiyun 		/* Unlikely, but possible. Tell the caller not to retry now. */
534*4882a593Smuzhiyun 		pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
535*4882a593Smuzhiyun 		return -EBUSY;
536*4882a593Smuzhiyun 	}
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	/* balloon page list reference */
539*4882a593Smuzhiyun 	get_page(newpage);
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	/*
542*4882a593Smuzhiyun 	 * When we migrate a page to a different zone, we have to fixup the
543*4882a593Smuzhiyun 	 * count of both involved zones as we adjusted the managed page count
544*4882a593Smuzhiyun 	 * when inflating.
545*4882a593Smuzhiyun 	 */
546*4882a593Smuzhiyun 	if (page_zone(page) != page_zone(newpage)) {
547*4882a593Smuzhiyun 		adjust_managed_page_count(page, 1);
548*4882a593Smuzhiyun 		adjust_managed_page_count(newpage, -1);
549*4882a593Smuzhiyun 	}
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
552*4882a593Smuzhiyun 	balloon_page_insert(b_dev_info, newpage);
553*4882a593Smuzhiyun 	balloon_page_delete(page);
554*4882a593Smuzhiyun 	b_dev_info->isolated_pages--;
555*4882a593Smuzhiyun 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 	/*
558*4882a593Smuzhiyun 	 * activate/"deflate" the old page. We ignore any errors just like the
559*4882a593Smuzhiyun 	 * other callers.
560*4882a593Smuzhiyun 	 */
561*4882a593Smuzhiyun 	plpar_page_set_active(page);
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	/* balloon page list reference */
564*4882a593Smuzhiyun 	put_page(page);
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun 	return MIGRATEPAGE_SUCCESS;
567*4882a593Smuzhiyun }
568*4882a593Smuzhiyun 
cmm_balloon_compaction_init(void)569*4882a593Smuzhiyun static int cmm_balloon_compaction_init(void)
570*4882a593Smuzhiyun {
571*4882a593Smuzhiyun 	int rc;
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 	balloon_devinfo_init(&b_dev_info);
574*4882a593Smuzhiyun 	b_dev_info.migratepage = cmm_migratepage;
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun 	balloon_mnt = kern_mount(&balloon_fs);
577*4882a593Smuzhiyun 	if (IS_ERR(balloon_mnt)) {
578*4882a593Smuzhiyun 		rc = PTR_ERR(balloon_mnt);
579*4882a593Smuzhiyun 		balloon_mnt = NULL;
580*4882a593Smuzhiyun 		return rc;
581*4882a593Smuzhiyun 	}
582*4882a593Smuzhiyun 
583*4882a593Smuzhiyun 	b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
584*4882a593Smuzhiyun 	if (IS_ERR(b_dev_info.inode)) {
585*4882a593Smuzhiyun 		rc = PTR_ERR(b_dev_info.inode);
586*4882a593Smuzhiyun 		b_dev_info.inode = NULL;
587*4882a593Smuzhiyun 		kern_unmount(balloon_mnt);
588*4882a593Smuzhiyun 		balloon_mnt = NULL;
589*4882a593Smuzhiyun 		return rc;
590*4882a593Smuzhiyun 	}
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun 	b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
593*4882a593Smuzhiyun 	return 0;
594*4882a593Smuzhiyun }
cmm_balloon_compaction_deinit(void)595*4882a593Smuzhiyun static void cmm_balloon_compaction_deinit(void)
596*4882a593Smuzhiyun {
597*4882a593Smuzhiyun 	if (b_dev_info.inode)
598*4882a593Smuzhiyun 		iput(b_dev_info.inode);
599*4882a593Smuzhiyun 	b_dev_info.inode = NULL;
600*4882a593Smuzhiyun 	kern_unmount(balloon_mnt);
601*4882a593Smuzhiyun 	balloon_mnt = NULL;
602*4882a593Smuzhiyun }
603*4882a593Smuzhiyun #else /* CONFIG_BALLOON_COMPACTION */
cmm_balloon_compaction_init(void)604*4882a593Smuzhiyun static int cmm_balloon_compaction_init(void)
605*4882a593Smuzhiyun {
606*4882a593Smuzhiyun 	return 0;
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun 
cmm_balloon_compaction_deinit(void)609*4882a593Smuzhiyun static void cmm_balloon_compaction_deinit(void)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun }
612*4882a593Smuzhiyun #endif /* CONFIG_BALLOON_COMPACTION */
613*4882a593Smuzhiyun 
614*4882a593Smuzhiyun /**
615*4882a593Smuzhiyun  * cmm_init - Module initialization
616*4882a593Smuzhiyun  *
617*4882a593Smuzhiyun  * Return value:
618*4882a593Smuzhiyun  * 	0 on success / other on failure
619*4882a593Smuzhiyun  **/
cmm_init(void)620*4882a593Smuzhiyun static int cmm_init(void)
621*4882a593Smuzhiyun {
622*4882a593Smuzhiyun 	int rc;
623*4882a593Smuzhiyun 
624*4882a593Smuzhiyun 	if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
625*4882a593Smuzhiyun 		return -EOPNOTSUPP;
626*4882a593Smuzhiyun 
627*4882a593Smuzhiyun 	rc = cmm_balloon_compaction_init();
628*4882a593Smuzhiyun 	if (rc)
629*4882a593Smuzhiyun 		return rc;
630*4882a593Smuzhiyun 
631*4882a593Smuzhiyun 	rc = register_oom_notifier(&cmm_oom_nb);
632*4882a593Smuzhiyun 	if (rc < 0)
633*4882a593Smuzhiyun 		goto out_balloon_compaction;
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
636*4882a593Smuzhiyun 		goto out_oom_notifier;
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun 	if ((rc = cmm_sysfs_register(&cmm_dev)))
639*4882a593Smuzhiyun 		goto out_reboot_notifier;
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun 	rc = register_memory_notifier(&cmm_mem_nb);
642*4882a593Smuzhiyun 	if (rc)
643*4882a593Smuzhiyun 		goto out_unregister_notifier;
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun 	if (cmm_disabled)
646*4882a593Smuzhiyun 		return 0;
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
649*4882a593Smuzhiyun 	if (IS_ERR(cmm_thread_ptr)) {
650*4882a593Smuzhiyun 		rc = PTR_ERR(cmm_thread_ptr);
651*4882a593Smuzhiyun 		goto out_unregister_notifier;
652*4882a593Smuzhiyun 	}
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	return 0;
655*4882a593Smuzhiyun out_unregister_notifier:
656*4882a593Smuzhiyun 	unregister_memory_notifier(&cmm_mem_nb);
657*4882a593Smuzhiyun 	cmm_unregister_sysfs(&cmm_dev);
658*4882a593Smuzhiyun out_reboot_notifier:
659*4882a593Smuzhiyun 	unregister_reboot_notifier(&cmm_reboot_nb);
660*4882a593Smuzhiyun out_oom_notifier:
661*4882a593Smuzhiyun 	unregister_oom_notifier(&cmm_oom_nb);
662*4882a593Smuzhiyun out_balloon_compaction:
663*4882a593Smuzhiyun 	cmm_balloon_compaction_deinit();
664*4882a593Smuzhiyun 	return rc;
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun /**
668*4882a593Smuzhiyun  * cmm_exit - Module exit
669*4882a593Smuzhiyun  *
670*4882a593Smuzhiyun  * Return value:
671*4882a593Smuzhiyun  * 	nothing
672*4882a593Smuzhiyun  **/
cmm_exit(void)673*4882a593Smuzhiyun static void cmm_exit(void)
674*4882a593Smuzhiyun {
675*4882a593Smuzhiyun 	if (cmm_thread_ptr)
676*4882a593Smuzhiyun 		kthread_stop(cmm_thread_ptr);
677*4882a593Smuzhiyun 	unregister_oom_notifier(&cmm_oom_nb);
678*4882a593Smuzhiyun 	unregister_reboot_notifier(&cmm_reboot_nb);
679*4882a593Smuzhiyun 	unregister_memory_notifier(&cmm_mem_nb);
680*4882a593Smuzhiyun 	cmm_free_pages(atomic_long_read(&loaned_pages));
681*4882a593Smuzhiyun 	cmm_unregister_sysfs(&cmm_dev);
682*4882a593Smuzhiyun 	cmm_balloon_compaction_deinit();
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun 
685*4882a593Smuzhiyun /**
686*4882a593Smuzhiyun  * cmm_set_disable - Disable/Enable CMM
687*4882a593Smuzhiyun  *
688*4882a593Smuzhiyun  * Return value:
689*4882a593Smuzhiyun  * 	0 on success / other on failure
690*4882a593Smuzhiyun  **/
cmm_set_disable(const char * val,const struct kernel_param * kp)691*4882a593Smuzhiyun static int cmm_set_disable(const char *val, const struct kernel_param *kp)
692*4882a593Smuzhiyun {
693*4882a593Smuzhiyun 	int disable = simple_strtoul(val, NULL, 10);
694*4882a593Smuzhiyun 
695*4882a593Smuzhiyun 	if (disable != 0 && disable != 1)
696*4882a593Smuzhiyun 		return -EINVAL;
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	if (disable && !cmm_disabled) {
699*4882a593Smuzhiyun 		if (cmm_thread_ptr)
700*4882a593Smuzhiyun 			kthread_stop(cmm_thread_ptr);
701*4882a593Smuzhiyun 		cmm_thread_ptr = NULL;
702*4882a593Smuzhiyun 		cmm_free_pages(atomic_long_read(&loaned_pages));
703*4882a593Smuzhiyun 	} else if (!disable && cmm_disabled) {
704*4882a593Smuzhiyun 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
705*4882a593Smuzhiyun 		if (IS_ERR(cmm_thread_ptr))
706*4882a593Smuzhiyun 			return PTR_ERR(cmm_thread_ptr);
707*4882a593Smuzhiyun 	}
708*4882a593Smuzhiyun 
709*4882a593Smuzhiyun 	cmm_disabled = disable;
710*4882a593Smuzhiyun 	return 0;
711*4882a593Smuzhiyun }
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun module_param_call(disable, cmm_set_disable, param_get_uint,
714*4882a593Smuzhiyun 		  &cmm_disabled, 0644);
715*4882a593Smuzhiyun MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
716*4882a593Smuzhiyun 		 "[Default=" __stringify(CMM_DISABLE) "]");
717*4882a593Smuzhiyun 
718*4882a593Smuzhiyun module_init(cmm_init);
719*4882a593Smuzhiyun module_exit(cmm_exit);
720