xref: /OK3568_Linux_fs/kernel/arch/powerpc/platforms/powernv/memtrace.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) IBM Corporation, 2014, 2017
4*4882a593Smuzhiyun  * Anton Blanchard, Rashmica Gupta.
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #define pr_fmt(fmt) "memtrace: " fmt
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include <linux/bitops.h>
10*4882a593Smuzhiyun #include <linux/string.h>
11*4882a593Smuzhiyun #include <linux/memblock.h>
12*4882a593Smuzhiyun #include <linux/init.h>
13*4882a593Smuzhiyun #include <linux/moduleparam.h>
14*4882a593Smuzhiyun #include <linux/fs.h>
15*4882a593Smuzhiyun #include <linux/debugfs.h>
16*4882a593Smuzhiyun #include <linux/slab.h>
17*4882a593Smuzhiyun #include <linux/memory.h>
18*4882a593Smuzhiyun #include <linux/memory_hotplug.h>
19*4882a593Smuzhiyun #include <linux/numa.h>
20*4882a593Smuzhiyun #include <asm/machdep.h>
21*4882a593Smuzhiyun #include <asm/debugfs.h>
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun /* This enables us to keep track of the memory removed from each node. */
24*4882a593Smuzhiyun struct memtrace_entry {
25*4882a593Smuzhiyun 	void *mem;
26*4882a593Smuzhiyun 	u64 start;
27*4882a593Smuzhiyun 	u64 size;
28*4882a593Smuzhiyun 	u32 nid;
29*4882a593Smuzhiyun 	struct dentry *dir;
30*4882a593Smuzhiyun 	char name[16];
31*4882a593Smuzhiyun };
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun static DEFINE_MUTEX(memtrace_mutex);
34*4882a593Smuzhiyun static u64 memtrace_size;
35*4882a593Smuzhiyun 
36*4882a593Smuzhiyun static struct memtrace_entry *memtrace_array;
37*4882a593Smuzhiyun static unsigned int memtrace_array_nr;
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun 
memtrace_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)40*4882a593Smuzhiyun static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
41*4882a593Smuzhiyun 			     size_t count, loff_t *ppos)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun 	struct memtrace_entry *ent = filp->private_data;
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
46*4882a593Smuzhiyun }
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun static const struct file_operations memtrace_fops = {
49*4882a593Smuzhiyun 	.llseek = default_llseek,
50*4882a593Smuzhiyun 	.read	= memtrace_read,
51*4882a593Smuzhiyun 	.open	= simple_open,
52*4882a593Smuzhiyun };
53*4882a593Smuzhiyun 
check_memblock_online(struct memory_block * mem,void * arg)54*4882a593Smuzhiyun static int check_memblock_online(struct memory_block *mem, void *arg)
55*4882a593Smuzhiyun {
56*4882a593Smuzhiyun 	if (mem->state != MEM_ONLINE)
57*4882a593Smuzhiyun 		return -1;
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun 	return 0;
60*4882a593Smuzhiyun }
61*4882a593Smuzhiyun 
change_memblock_state(struct memory_block * mem,void * arg)62*4882a593Smuzhiyun static int change_memblock_state(struct memory_block *mem, void *arg)
63*4882a593Smuzhiyun {
64*4882a593Smuzhiyun 	unsigned long state = (unsigned long)arg;
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun 	mem->state = state;
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 	return 0;
69*4882a593Smuzhiyun }
70*4882a593Smuzhiyun 
memtrace_clear_range(unsigned long start_pfn,unsigned long nr_pages)71*4882a593Smuzhiyun static void memtrace_clear_range(unsigned long start_pfn,
72*4882a593Smuzhiyun 				 unsigned long nr_pages)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun 	unsigned long pfn;
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	/*
77*4882a593Smuzhiyun 	 * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
78*4882a593Smuzhiyun 	 * does not apply, avoid passing around "struct page" and use
79*4882a593Smuzhiyun 	 * clear_page() instead directly.
80*4882a593Smuzhiyun 	 */
81*4882a593Smuzhiyun 	for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
82*4882a593Smuzhiyun 		if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
83*4882a593Smuzhiyun 			cond_resched();
84*4882a593Smuzhiyun 		clear_page(__va(PFN_PHYS(pfn)));
85*4882a593Smuzhiyun 	}
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun /* called with device_hotplug_lock held */
memtrace_offline_pages(u32 nid,u64 start_pfn,u64 nr_pages)89*4882a593Smuzhiyun static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun 	const unsigned long start = PFN_PHYS(start_pfn);
92*4882a593Smuzhiyun 	const unsigned long size = PFN_PHYS(nr_pages);
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun 	if (walk_memory_blocks(start, size, NULL, check_memblock_online))
95*4882a593Smuzhiyun 		return false;
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE,
98*4882a593Smuzhiyun 			   change_memblock_state);
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	if (offline_pages(start_pfn, nr_pages)) {
101*4882a593Smuzhiyun 		walk_memory_blocks(start, size, (void *)MEM_ONLINE,
102*4882a593Smuzhiyun 				   change_memblock_state);
103*4882a593Smuzhiyun 		return false;
104*4882a593Smuzhiyun 	}
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun 	walk_memory_blocks(start, size, (void *)MEM_OFFLINE,
107*4882a593Smuzhiyun 			   change_memblock_state);
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	return true;
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
memtrace_alloc_node(u32 nid,u64 size)113*4882a593Smuzhiyun static u64 memtrace_alloc_node(u32 nid, u64 size)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun 	u64 start_pfn, end_pfn, nr_pages, pfn;
116*4882a593Smuzhiyun 	u64 base_pfn;
117*4882a593Smuzhiyun 	u64 bytes = memory_block_size_bytes();
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun 	if (!node_spanned_pages(nid))
120*4882a593Smuzhiyun 		return 0;
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	start_pfn = node_start_pfn(nid);
123*4882a593Smuzhiyun 	end_pfn = node_end_pfn(nid);
124*4882a593Smuzhiyun 	nr_pages = size >> PAGE_SHIFT;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	/* Trace memory needs to be aligned to the size */
127*4882a593Smuzhiyun 	end_pfn = round_down(end_pfn - nr_pages, nr_pages);
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun 	lock_device_hotplug();
130*4882a593Smuzhiyun 	for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
131*4882a593Smuzhiyun 		if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
132*4882a593Smuzhiyun 			/*
133*4882a593Smuzhiyun 			 * Clear the range while we still have a linear
134*4882a593Smuzhiyun 			 * mapping.
135*4882a593Smuzhiyun 			 */
136*4882a593Smuzhiyun 			memtrace_clear_range(base_pfn, nr_pages);
137*4882a593Smuzhiyun 			/*
138*4882a593Smuzhiyun 			 * Remove memory in memory block size chunks so that
139*4882a593Smuzhiyun 			 * iomem resources are always split to the same size and
140*4882a593Smuzhiyun 			 * we never try to remove memory that spans two iomem
141*4882a593Smuzhiyun 			 * resources.
142*4882a593Smuzhiyun 			 */
143*4882a593Smuzhiyun 			end_pfn = base_pfn + nr_pages;
144*4882a593Smuzhiyun 			for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
145*4882a593Smuzhiyun 				__remove_memory(nid, pfn << PAGE_SHIFT, bytes);
146*4882a593Smuzhiyun 			}
147*4882a593Smuzhiyun 			unlock_device_hotplug();
148*4882a593Smuzhiyun 			return base_pfn << PAGE_SHIFT;
149*4882a593Smuzhiyun 		}
150*4882a593Smuzhiyun 	}
151*4882a593Smuzhiyun 	unlock_device_hotplug();
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	return 0;
154*4882a593Smuzhiyun }
155*4882a593Smuzhiyun 
memtrace_init_regions_runtime(u64 size)156*4882a593Smuzhiyun static int memtrace_init_regions_runtime(u64 size)
157*4882a593Smuzhiyun {
158*4882a593Smuzhiyun 	u32 nid;
159*4882a593Smuzhiyun 	u64 m;
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	memtrace_array = kcalloc(num_online_nodes(),
162*4882a593Smuzhiyun 				sizeof(struct memtrace_entry), GFP_KERNEL);
163*4882a593Smuzhiyun 	if (!memtrace_array) {
164*4882a593Smuzhiyun 		pr_err("Failed to allocate memtrace_array\n");
165*4882a593Smuzhiyun 		return -EINVAL;
166*4882a593Smuzhiyun 	}
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun 	for_each_online_node(nid) {
169*4882a593Smuzhiyun 		m = memtrace_alloc_node(nid, size);
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 		/*
172*4882a593Smuzhiyun 		 * A node might not have any local memory, so warn but
173*4882a593Smuzhiyun 		 * continue on.
174*4882a593Smuzhiyun 		 */
175*4882a593Smuzhiyun 		if (!m) {
176*4882a593Smuzhiyun 			pr_err("Failed to allocate trace memory on node %d\n", nid);
177*4882a593Smuzhiyun 			continue;
178*4882a593Smuzhiyun 		}
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 		pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 		memtrace_array[memtrace_array_nr].start = m;
183*4882a593Smuzhiyun 		memtrace_array[memtrace_array_nr].size = size;
184*4882a593Smuzhiyun 		memtrace_array[memtrace_array_nr].nid = nid;
185*4882a593Smuzhiyun 		memtrace_array_nr++;
186*4882a593Smuzhiyun 	}
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	return 0;
189*4882a593Smuzhiyun }
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun static struct dentry *memtrace_debugfs_dir;
192*4882a593Smuzhiyun 
memtrace_init_debugfs(void)193*4882a593Smuzhiyun static int memtrace_init_debugfs(void)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun 	int ret = 0;
196*4882a593Smuzhiyun 	int i;
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 	for (i = 0; i < memtrace_array_nr; i++) {
199*4882a593Smuzhiyun 		struct dentry *dir;
200*4882a593Smuzhiyun 		struct memtrace_entry *ent = &memtrace_array[i];
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 		ent->mem = ioremap(ent->start, ent->size);
203*4882a593Smuzhiyun 		/* Warn but continue on */
204*4882a593Smuzhiyun 		if (!ent->mem) {
205*4882a593Smuzhiyun 			pr_err("Failed to map trace memory at 0x%llx\n",
206*4882a593Smuzhiyun 				 ent->start);
207*4882a593Smuzhiyun 			ret = -1;
208*4882a593Smuzhiyun 			continue;
209*4882a593Smuzhiyun 		}
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 		snprintf(ent->name, 16, "%08x", ent->nid);
212*4882a593Smuzhiyun 		dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 		ent->dir = dir;
215*4882a593Smuzhiyun 		debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
216*4882a593Smuzhiyun 		debugfs_create_x64("start", 0400, dir, &ent->start);
217*4882a593Smuzhiyun 		debugfs_create_x64("size", 0400, dir, &ent->size);
218*4882a593Smuzhiyun 	}
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	return ret;
221*4882a593Smuzhiyun }
222*4882a593Smuzhiyun 
online_mem_block(struct memory_block * mem,void * arg)223*4882a593Smuzhiyun static int online_mem_block(struct memory_block *mem, void *arg)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun 	return device_online(&mem->dev);
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun /*
229*4882a593Smuzhiyun  * Iterate through the chunks of memory we have removed from the kernel
230*4882a593Smuzhiyun  * and attempt to add them back to the kernel.
231*4882a593Smuzhiyun  */
memtrace_online(void)232*4882a593Smuzhiyun static int memtrace_online(void)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun 	int i, ret = 0;
235*4882a593Smuzhiyun 	struct memtrace_entry *ent;
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	for (i = memtrace_array_nr - 1; i >= 0; i--) {
238*4882a593Smuzhiyun 		ent = &memtrace_array[i];
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 		/* We have onlined this chunk previously */
241*4882a593Smuzhiyun 		if (ent->nid == NUMA_NO_NODE)
242*4882a593Smuzhiyun 			continue;
243*4882a593Smuzhiyun 
244*4882a593Smuzhiyun 		/* Remove from io mappings */
245*4882a593Smuzhiyun 		if (ent->mem) {
246*4882a593Smuzhiyun 			iounmap(ent->mem);
247*4882a593Smuzhiyun 			ent->mem = 0;
248*4882a593Smuzhiyun 		}
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 		if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
251*4882a593Smuzhiyun 			pr_err("Failed to add trace memory to node %d\n",
252*4882a593Smuzhiyun 				ent->nid);
253*4882a593Smuzhiyun 			ret += 1;
254*4882a593Smuzhiyun 			continue;
255*4882a593Smuzhiyun 		}
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 		lock_device_hotplug();
258*4882a593Smuzhiyun 		walk_memory_blocks(ent->start, ent->size, NULL,
259*4882a593Smuzhiyun 				   online_mem_block);
260*4882a593Smuzhiyun 		unlock_device_hotplug();
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 		/*
263*4882a593Smuzhiyun 		 * Memory was added successfully so clean up references to it
264*4882a593Smuzhiyun 		 * so on reentry we can tell that this chunk was added.
265*4882a593Smuzhiyun 		 */
266*4882a593Smuzhiyun 		debugfs_remove_recursive(ent->dir);
267*4882a593Smuzhiyun 		pr_info("Added trace memory back to node %d\n", ent->nid);
268*4882a593Smuzhiyun 		ent->size = ent->start = ent->nid = NUMA_NO_NODE;
269*4882a593Smuzhiyun 	}
270*4882a593Smuzhiyun 	if (ret)
271*4882a593Smuzhiyun 		return ret;
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	/* If all chunks of memory were added successfully, reset globals */
274*4882a593Smuzhiyun 	kfree(memtrace_array);
275*4882a593Smuzhiyun 	memtrace_array = NULL;
276*4882a593Smuzhiyun 	memtrace_size = 0;
277*4882a593Smuzhiyun 	memtrace_array_nr = 0;
278*4882a593Smuzhiyun 	return 0;
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun 
memtrace_enable_set(void * data,u64 val)281*4882a593Smuzhiyun static int memtrace_enable_set(void *data, u64 val)
282*4882a593Smuzhiyun {
283*4882a593Smuzhiyun 	int rc = -EAGAIN;
284*4882a593Smuzhiyun 	u64 bytes;
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun 	/*
287*4882a593Smuzhiyun 	 * Don't attempt to do anything if size isn't aligned to a memory
288*4882a593Smuzhiyun 	 * block or equal to zero.
289*4882a593Smuzhiyun 	 */
290*4882a593Smuzhiyun 	bytes = memory_block_size_bytes();
291*4882a593Smuzhiyun 	if (val & (bytes - 1)) {
292*4882a593Smuzhiyun 		pr_err("Value must be aligned with 0x%llx\n", bytes);
293*4882a593Smuzhiyun 		return -EINVAL;
294*4882a593Smuzhiyun 	}
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	mutex_lock(&memtrace_mutex);
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	/* Re-add/online previously removed/offlined memory */
299*4882a593Smuzhiyun 	if (memtrace_size) {
300*4882a593Smuzhiyun 		if (memtrace_online())
301*4882a593Smuzhiyun 			goto out_unlock;
302*4882a593Smuzhiyun 	}
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 	if (!val) {
305*4882a593Smuzhiyun 		rc = 0;
306*4882a593Smuzhiyun 		goto out_unlock;
307*4882a593Smuzhiyun 	}
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	/* Offline and remove memory */
310*4882a593Smuzhiyun 	if (memtrace_init_regions_runtime(val))
311*4882a593Smuzhiyun 		goto out_unlock;
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	if (memtrace_init_debugfs())
314*4882a593Smuzhiyun 		goto out_unlock;
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	memtrace_size = val;
317*4882a593Smuzhiyun 	rc = 0;
318*4882a593Smuzhiyun out_unlock:
319*4882a593Smuzhiyun 	mutex_unlock(&memtrace_mutex);
320*4882a593Smuzhiyun 	return rc;
321*4882a593Smuzhiyun }
322*4882a593Smuzhiyun 
memtrace_enable_get(void * data,u64 * val)323*4882a593Smuzhiyun static int memtrace_enable_get(void *data, u64 *val)
324*4882a593Smuzhiyun {
325*4882a593Smuzhiyun 	*val = memtrace_size;
326*4882a593Smuzhiyun 	return 0;
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
330*4882a593Smuzhiyun 					memtrace_enable_set, "0x%016llx\n");
331*4882a593Smuzhiyun 
memtrace_init(void)332*4882a593Smuzhiyun static int memtrace_init(void)
333*4882a593Smuzhiyun {
334*4882a593Smuzhiyun 	memtrace_debugfs_dir = debugfs_create_dir("memtrace",
335*4882a593Smuzhiyun 						  powerpc_debugfs_root);
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 	debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
338*4882a593Smuzhiyun 			    NULL, &memtrace_init_fops);
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 	return 0;
341*4882a593Smuzhiyun }
342*4882a593Smuzhiyun machine_device_initcall(powernv, memtrace_init);
343