xref: /OK3568_Linux_fs/kernel/fs/proc/kcore.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *	fs/proc/kcore.c kernel ELF core dumper
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  *	Modelled on fs/exec.c:aout_core_dump()
6*4882a593Smuzhiyun  *	Jeremy Fitzhardinge <jeremy@sw.oz.au>
7*4882a593Smuzhiyun  *	ELF version written by David Howells <David.Howells@nexor.co.uk>
8*4882a593Smuzhiyun  *	Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
9*4882a593Smuzhiyun  *	Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
10*4882a593Smuzhiyun  *	Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
11*4882a593Smuzhiyun  */
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun #include <linux/crash_core.h>
14*4882a593Smuzhiyun #include <linux/mm.h>
15*4882a593Smuzhiyun #include <linux/proc_fs.h>
16*4882a593Smuzhiyun #include <linux/kcore.h>
17*4882a593Smuzhiyun #include <linux/user.h>
18*4882a593Smuzhiyun #include <linux/capability.h>
19*4882a593Smuzhiyun #include <linux/elf.h>
20*4882a593Smuzhiyun #include <linux/elfcore.h>
21*4882a593Smuzhiyun #include <linux/notifier.h>
22*4882a593Smuzhiyun #include <linux/vmalloc.h>
23*4882a593Smuzhiyun #include <linux/highmem.h>
24*4882a593Smuzhiyun #include <linux/printk.h>
25*4882a593Smuzhiyun #include <linux/memblock.h>
26*4882a593Smuzhiyun #include <linux/init.h>
27*4882a593Smuzhiyun #include <linux/slab.h>
28*4882a593Smuzhiyun #include <linux/uaccess.h>
29*4882a593Smuzhiyun #include <asm/io.h>
30*4882a593Smuzhiyun #include <linux/list.h>
31*4882a593Smuzhiyun #include <linux/ioport.h>
32*4882a593Smuzhiyun #include <linux/memory.h>
33*4882a593Smuzhiyun #include <linux/sched/task.h>
34*4882a593Smuzhiyun #include <linux/security.h>
35*4882a593Smuzhiyun #include <asm/sections.h>
36*4882a593Smuzhiyun #include "internal.h"
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun #define CORE_STR "CORE"
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun #ifndef ELF_CORE_EFLAGS
41*4882a593Smuzhiyun #define ELF_CORE_EFLAGS	0
42*4882a593Smuzhiyun #endif
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun static struct proc_dir_entry *proc_root_kcore;
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun #ifndef kc_vaddr_to_offset
48*4882a593Smuzhiyun #define	kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
49*4882a593Smuzhiyun #endif
50*4882a593Smuzhiyun #ifndef	kc_offset_to_vaddr
51*4882a593Smuzhiyun #define	kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
52*4882a593Smuzhiyun #endif
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun static LIST_HEAD(kclist_head);
55*4882a593Smuzhiyun static DECLARE_RWSEM(kclist_lock);
56*4882a593Smuzhiyun static int kcore_need_update = 1;
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun /*
59*4882a593Smuzhiyun  * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
60*4882a593Smuzhiyun  * Same as oldmem_pfn_is_ram in vmcore
61*4882a593Smuzhiyun  */
62*4882a593Smuzhiyun static int (*mem_pfn_is_ram)(unsigned long pfn);
63*4882a593Smuzhiyun 
register_mem_pfn_is_ram(int (* fn)(unsigned long pfn))64*4882a593Smuzhiyun int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun 	if (mem_pfn_is_ram)
67*4882a593Smuzhiyun 		return -EBUSY;
68*4882a593Smuzhiyun 	mem_pfn_is_ram = fn;
69*4882a593Smuzhiyun 	return 0;
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun 
pfn_is_ram(unsigned long pfn)72*4882a593Smuzhiyun static int pfn_is_ram(unsigned long pfn)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun 	if (mem_pfn_is_ram)
75*4882a593Smuzhiyun 		return mem_pfn_is_ram(pfn);
76*4882a593Smuzhiyun 	else
77*4882a593Smuzhiyun 		return 1;
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun /* This doesn't grab kclist_lock, so it should only be used at init time. */
kclist_add(struct kcore_list * new,void * addr,size_t size,int type)81*4882a593Smuzhiyun void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
82*4882a593Smuzhiyun 		       int type)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun 	new->addr = (unsigned long)addr;
85*4882a593Smuzhiyun 	new->size = size;
86*4882a593Smuzhiyun 	new->type = type;
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun 	list_add_tail(&new->list, &kclist_head);
89*4882a593Smuzhiyun }
90*4882a593Smuzhiyun 
get_kcore_size(int * nphdr,size_t * phdrs_len,size_t * notes_len,size_t * data_offset)91*4882a593Smuzhiyun static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
92*4882a593Smuzhiyun 			     size_t *data_offset)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun 	size_t try, size;
95*4882a593Smuzhiyun 	struct kcore_list *m;
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun 	*nphdr = 1; /* PT_NOTE */
98*4882a593Smuzhiyun 	size = 0;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	list_for_each_entry(m, &kclist_head, list) {
101*4882a593Smuzhiyun 		try = kc_vaddr_to_offset((size_t)m->addr + m->size);
102*4882a593Smuzhiyun 		if (try > size)
103*4882a593Smuzhiyun 			size = try;
104*4882a593Smuzhiyun 		*nphdr = *nphdr + 1;
105*4882a593Smuzhiyun 	}
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 	*phdrs_len = *nphdr * sizeof(struct elf_phdr);
108*4882a593Smuzhiyun 	*notes_len = (4 * sizeof(struct elf_note) +
109*4882a593Smuzhiyun 		      3 * ALIGN(sizeof(CORE_STR), 4) +
110*4882a593Smuzhiyun 		      VMCOREINFO_NOTE_NAME_BYTES +
111*4882a593Smuzhiyun 		      ALIGN(sizeof(struct elf_prstatus), 4) +
112*4882a593Smuzhiyun 		      ALIGN(sizeof(struct elf_prpsinfo), 4) +
113*4882a593Smuzhiyun 		      ALIGN(arch_task_struct_size, 4) +
114*4882a593Smuzhiyun 		      ALIGN(vmcoreinfo_size, 4));
115*4882a593Smuzhiyun 	*data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
116*4882a593Smuzhiyun 				  *notes_len);
117*4882a593Smuzhiyun 	return *data_offset + size;
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
121*4882a593Smuzhiyun /*
122*4882a593Smuzhiyun  * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
123*4882a593Smuzhiyun  * because memory hole is not as big as !HIGHMEM case.
124*4882a593Smuzhiyun  * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
125*4882a593Smuzhiyun  */
kcore_ram_list(struct list_head * head)126*4882a593Smuzhiyun static int kcore_ram_list(struct list_head *head)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun 	struct kcore_list *ent;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
131*4882a593Smuzhiyun 	if (!ent)
132*4882a593Smuzhiyun 		return -ENOMEM;
133*4882a593Smuzhiyun 	ent->addr = (unsigned long)__va(0);
134*4882a593Smuzhiyun 	ent->size = max_low_pfn << PAGE_SHIFT;
135*4882a593Smuzhiyun 	ent->type = KCORE_RAM;
136*4882a593Smuzhiyun 	list_add(&ent->list, head);
137*4882a593Smuzhiyun 	return 0;
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun #else /* !CONFIG_HIGHMEM */
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
143*4882a593Smuzhiyun /* calculate vmemmap's address from given system ram pfn and register it */
144*4882a593Smuzhiyun static int
get_sparsemem_vmemmap_info(struct kcore_list * ent,struct list_head * head)145*4882a593Smuzhiyun get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
146*4882a593Smuzhiyun {
147*4882a593Smuzhiyun 	unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
148*4882a593Smuzhiyun 	unsigned long nr_pages = ent->size >> PAGE_SHIFT;
149*4882a593Smuzhiyun 	unsigned long start, end;
150*4882a593Smuzhiyun 	struct kcore_list *vmm, *tmp;
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 	start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
154*4882a593Smuzhiyun 	end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
155*4882a593Smuzhiyun 	end = PAGE_ALIGN(end);
156*4882a593Smuzhiyun 	/* overlap check (because we have to align page */
157*4882a593Smuzhiyun 	list_for_each_entry(tmp, head, list) {
158*4882a593Smuzhiyun 		if (tmp->type != KCORE_VMEMMAP)
159*4882a593Smuzhiyun 			continue;
160*4882a593Smuzhiyun 		if (start < tmp->addr + tmp->size)
161*4882a593Smuzhiyun 			if (end > tmp->addr)
162*4882a593Smuzhiyun 				end = tmp->addr;
163*4882a593Smuzhiyun 	}
164*4882a593Smuzhiyun 	if (start < end) {
165*4882a593Smuzhiyun 		vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
166*4882a593Smuzhiyun 		if (!vmm)
167*4882a593Smuzhiyun 			return 0;
168*4882a593Smuzhiyun 		vmm->addr = start;
169*4882a593Smuzhiyun 		vmm->size = end - start;
170*4882a593Smuzhiyun 		vmm->type = KCORE_VMEMMAP;
171*4882a593Smuzhiyun 		list_add_tail(&vmm->list, head);
172*4882a593Smuzhiyun 	}
173*4882a593Smuzhiyun 	return 1;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun #else
177*4882a593Smuzhiyun static int
get_sparsemem_vmemmap_info(struct kcore_list * ent,struct list_head * head)178*4882a593Smuzhiyun get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
179*4882a593Smuzhiyun {
180*4882a593Smuzhiyun 	return 1;
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun #endif
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun static int
kclist_add_private(unsigned long pfn,unsigned long nr_pages,void * arg)186*4882a593Smuzhiyun kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun 	struct list_head *head = (struct list_head *)arg;
189*4882a593Smuzhiyun 	struct kcore_list *ent;
190*4882a593Smuzhiyun 	struct page *p;
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	if (!pfn_valid(pfn))
193*4882a593Smuzhiyun 		return 1;
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	p = pfn_to_page(pfn);
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	ent = kmalloc(sizeof(*ent), GFP_KERNEL);
198*4882a593Smuzhiyun 	if (!ent)
199*4882a593Smuzhiyun 		return -ENOMEM;
200*4882a593Smuzhiyun 	ent->addr = (unsigned long)page_to_virt(p);
201*4882a593Smuzhiyun 	ent->size = nr_pages << PAGE_SHIFT;
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	if (!virt_addr_valid(ent->addr))
204*4882a593Smuzhiyun 		goto free_out;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	/* cut not-mapped area. ....from ppc-32 code. */
207*4882a593Smuzhiyun 	if (ULONG_MAX - ent->addr < ent->size)
208*4882a593Smuzhiyun 		ent->size = ULONG_MAX - ent->addr;
209*4882a593Smuzhiyun 
210*4882a593Smuzhiyun 	/*
211*4882a593Smuzhiyun 	 * We've already checked virt_addr_valid so we know this address
212*4882a593Smuzhiyun 	 * is a valid pointer, therefore we can check against it to determine
213*4882a593Smuzhiyun 	 * if we need to trim
214*4882a593Smuzhiyun 	 */
215*4882a593Smuzhiyun 	if (VMALLOC_START > ent->addr) {
216*4882a593Smuzhiyun 		if (VMALLOC_START - ent->addr < ent->size)
217*4882a593Smuzhiyun 			ent->size = VMALLOC_START - ent->addr;
218*4882a593Smuzhiyun 	}
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	ent->type = KCORE_RAM;
221*4882a593Smuzhiyun 	list_add_tail(&ent->list, head);
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 	if (!get_sparsemem_vmemmap_info(ent, head)) {
224*4882a593Smuzhiyun 		list_del(&ent->list);
225*4882a593Smuzhiyun 		goto free_out;
226*4882a593Smuzhiyun 	}
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	return 0;
229*4882a593Smuzhiyun free_out:
230*4882a593Smuzhiyun 	kfree(ent);
231*4882a593Smuzhiyun 	return 1;
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun 
kcore_ram_list(struct list_head * list)234*4882a593Smuzhiyun static int kcore_ram_list(struct list_head *list)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun 	int nid, ret;
237*4882a593Smuzhiyun 	unsigned long end_pfn;
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	/* Not inialized....update now */
240*4882a593Smuzhiyun 	/* find out "max pfn" */
241*4882a593Smuzhiyun 	end_pfn = 0;
242*4882a593Smuzhiyun 	for_each_node_state(nid, N_MEMORY) {
243*4882a593Smuzhiyun 		unsigned long node_end;
244*4882a593Smuzhiyun 		node_end = node_end_pfn(nid);
245*4882a593Smuzhiyun 		if (end_pfn < node_end)
246*4882a593Smuzhiyun 			end_pfn = node_end;
247*4882a593Smuzhiyun 	}
248*4882a593Smuzhiyun 	/* scan 0 to max_pfn */
249*4882a593Smuzhiyun 	ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
250*4882a593Smuzhiyun 	if (ret)
251*4882a593Smuzhiyun 		return -ENOMEM;
252*4882a593Smuzhiyun 	return 0;
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun #endif /* CONFIG_HIGHMEM */
255*4882a593Smuzhiyun 
kcore_update_ram(void)256*4882a593Smuzhiyun static int kcore_update_ram(void)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	LIST_HEAD(list);
259*4882a593Smuzhiyun 	LIST_HEAD(garbage);
260*4882a593Smuzhiyun 	int nphdr;
261*4882a593Smuzhiyun 	size_t phdrs_len, notes_len, data_offset;
262*4882a593Smuzhiyun 	struct kcore_list *tmp, *pos;
263*4882a593Smuzhiyun 	int ret = 0;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	down_write(&kclist_lock);
266*4882a593Smuzhiyun 	if (!xchg(&kcore_need_update, 0))
267*4882a593Smuzhiyun 		goto out;
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 	ret = kcore_ram_list(&list);
270*4882a593Smuzhiyun 	if (ret) {
271*4882a593Smuzhiyun 		/* Couldn't get the RAM list, try again next time. */
272*4882a593Smuzhiyun 		WRITE_ONCE(kcore_need_update, 1);
273*4882a593Smuzhiyun 		list_splice_tail(&list, &garbage);
274*4882a593Smuzhiyun 		goto out;
275*4882a593Smuzhiyun 	}
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
278*4882a593Smuzhiyun 		if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
279*4882a593Smuzhiyun 			list_move(&pos->list, &garbage);
280*4882a593Smuzhiyun 	}
281*4882a593Smuzhiyun 	list_splice_tail(&list, &kclist_head);
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 	proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, &notes_len,
284*4882a593Smuzhiyun 					       &data_offset);
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun out:
287*4882a593Smuzhiyun 	up_write(&kclist_lock);
288*4882a593Smuzhiyun 	list_for_each_entry_safe(pos, tmp, &garbage, list) {
289*4882a593Smuzhiyun 		list_del(&pos->list);
290*4882a593Smuzhiyun 		kfree(pos);
291*4882a593Smuzhiyun 	}
292*4882a593Smuzhiyun 	return ret;
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun 
append_kcore_note(char * notes,size_t * i,const char * name,unsigned int type,const void * desc,size_t descsz)295*4882a593Smuzhiyun static void append_kcore_note(char *notes, size_t *i, const char *name,
296*4882a593Smuzhiyun 			      unsigned int type, const void *desc,
297*4882a593Smuzhiyun 			      size_t descsz)
298*4882a593Smuzhiyun {
299*4882a593Smuzhiyun 	struct elf_note *note = (struct elf_note *)&notes[*i];
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 	note->n_namesz = strlen(name) + 1;
302*4882a593Smuzhiyun 	note->n_descsz = descsz;
303*4882a593Smuzhiyun 	note->n_type = type;
304*4882a593Smuzhiyun 	*i += sizeof(*note);
305*4882a593Smuzhiyun 	memcpy(&notes[*i], name, note->n_namesz);
306*4882a593Smuzhiyun 	*i = ALIGN(*i + note->n_namesz, 4);
307*4882a593Smuzhiyun 	memcpy(&notes[*i], desc, descsz);
308*4882a593Smuzhiyun 	*i = ALIGN(*i + descsz, 4);
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun static ssize_t
read_kcore(struct file * file,char __user * buffer,size_t buflen,loff_t * fpos)312*4882a593Smuzhiyun read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
313*4882a593Smuzhiyun {
314*4882a593Smuzhiyun 	char *buf = file->private_data;
315*4882a593Smuzhiyun 	size_t phdrs_offset, notes_offset, data_offset;
316*4882a593Smuzhiyun 	size_t phdrs_len, notes_len;
317*4882a593Smuzhiyun 	struct kcore_list *m;
318*4882a593Smuzhiyun 	size_t tsz;
319*4882a593Smuzhiyun 	int nphdr;
320*4882a593Smuzhiyun 	unsigned long start;
321*4882a593Smuzhiyun 	size_t orig_buflen = buflen;
322*4882a593Smuzhiyun 	int ret = 0;
323*4882a593Smuzhiyun 
324*4882a593Smuzhiyun 	down_read(&kclist_lock);
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
327*4882a593Smuzhiyun 	phdrs_offset = sizeof(struct elfhdr);
328*4882a593Smuzhiyun 	notes_offset = phdrs_offset + phdrs_len;
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 	/* ELF file header. */
331*4882a593Smuzhiyun 	if (buflen && *fpos < sizeof(struct elfhdr)) {
332*4882a593Smuzhiyun 		struct elfhdr ehdr = {
333*4882a593Smuzhiyun 			.e_ident = {
334*4882a593Smuzhiyun 				[EI_MAG0] = ELFMAG0,
335*4882a593Smuzhiyun 				[EI_MAG1] = ELFMAG1,
336*4882a593Smuzhiyun 				[EI_MAG2] = ELFMAG2,
337*4882a593Smuzhiyun 				[EI_MAG3] = ELFMAG3,
338*4882a593Smuzhiyun 				[EI_CLASS] = ELF_CLASS,
339*4882a593Smuzhiyun 				[EI_DATA] = ELF_DATA,
340*4882a593Smuzhiyun 				[EI_VERSION] = EV_CURRENT,
341*4882a593Smuzhiyun 				[EI_OSABI] = ELF_OSABI,
342*4882a593Smuzhiyun 			},
343*4882a593Smuzhiyun 			.e_type = ET_CORE,
344*4882a593Smuzhiyun 			.e_machine = ELF_ARCH,
345*4882a593Smuzhiyun 			.e_version = EV_CURRENT,
346*4882a593Smuzhiyun 			.e_phoff = sizeof(struct elfhdr),
347*4882a593Smuzhiyun 			.e_flags = ELF_CORE_EFLAGS,
348*4882a593Smuzhiyun 			.e_ehsize = sizeof(struct elfhdr),
349*4882a593Smuzhiyun 			.e_phentsize = sizeof(struct elf_phdr),
350*4882a593Smuzhiyun 			.e_phnum = nphdr,
351*4882a593Smuzhiyun 		};
352*4882a593Smuzhiyun 
353*4882a593Smuzhiyun 		tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
354*4882a593Smuzhiyun 		if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) {
355*4882a593Smuzhiyun 			ret = -EFAULT;
356*4882a593Smuzhiyun 			goto out;
357*4882a593Smuzhiyun 		}
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 		buffer += tsz;
360*4882a593Smuzhiyun 		buflen -= tsz;
361*4882a593Smuzhiyun 		*fpos += tsz;
362*4882a593Smuzhiyun 	}
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	/* ELF program headers. */
365*4882a593Smuzhiyun 	if (buflen && *fpos < phdrs_offset + phdrs_len) {
366*4882a593Smuzhiyun 		struct elf_phdr *phdrs, *phdr;
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 		phdrs = kzalloc(phdrs_len, GFP_KERNEL);
369*4882a593Smuzhiyun 		if (!phdrs) {
370*4882a593Smuzhiyun 			ret = -ENOMEM;
371*4882a593Smuzhiyun 			goto out;
372*4882a593Smuzhiyun 		}
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 		phdrs[0].p_type = PT_NOTE;
375*4882a593Smuzhiyun 		phdrs[0].p_offset = notes_offset;
376*4882a593Smuzhiyun 		phdrs[0].p_filesz = notes_len;
377*4882a593Smuzhiyun 
378*4882a593Smuzhiyun 		phdr = &phdrs[1];
379*4882a593Smuzhiyun 		list_for_each_entry(m, &kclist_head, list) {
380*4882a593Smuzhiyun 			phdr->p_type = PT_LOAD;
381*4882a593Smuzhiyun 			phdr->p_flags = PF_R | PF_W | PF_X;
382*4882a593Smuzhiyun 			phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
383*4882a593Smuzhiyun 			if (m->type == KCORE_REMAP)
384*4882a593Smuzhiyun 				phdr->p_vaddr = (size_t)m->vaddr;
385*4882a593Smuzhiyun 			else
386*4882a593Smuzhiyun 				phdr->p_vaddr = (size_t)m->addr;
387*4882a593Smuzhiyun 			if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
388*4882a593Smuzhiyun 				phdr->p_paddr = __pa(m->addr);
389*4882a593Smuzhiyun 			else if (m->type == KCORE_TEXT)
390*4882a593Smuzhiyun 				phdr->p_paddr = __pa_symbol(m->addr);
391*4882a593Smuzhiyun 			else
392*4882a593Smuzhiyun 				phdr->p_paddr = (elf_addr_t)-1;
393*4882a593Smuzhiyun 			phdr->p_filesz = phdr->p_memsz = m->size;
394*4882a593Smuzhiyun 			phdr->p_align = PAGE_SIZE;
395*4882a593Smuzhiyun 			phdr++;
396*4882a593Smuzhiyun 		}
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 		tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
399*4882a593Smuzhiyun 		if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset,
400*4882a593Smuzhiyun 				 tsz)) {
401*4882a593Smuzhiyun 			kfree(phdrs);
402*4882a593Smuzhiyun 			ret = -EFAULT;
403*4882a593Smuzhiyun 			goto out;
404*4882a593Smuzhiyun 		}
405*4882a593Smuzhiyun 		kfree(phdrs);
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 		buffer += tsz;
408*4882a593Smuzhiyun 		buflen -= tsz;
409*4882a593Smuzhiyun 		*fpos += tsz;
410*4882a593Smuzhiyun 	}
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun 	/* ELF note segment. */
413*4882a593Smuzhiyun 	if (buflen && *fpos < notes_offset + notes_len) {
414*4882a593Smuzhiyun 		struct elf_prstatus prstatus = {};
415*4882a593Smuzhiyun 		struct elf_prpsinfo prpsinfo = {
416*4882a593Smuzhiyun 			.pr_sname = 'R',
417*4882a593Smuzhiyun 			.pr_fname = "vmlinux",
418*4882a593Smuzhiyun 		};
419*4882a593Smuzhiyun 		char *notes;
420*4882a593Smuzhiyun 		size_t i = 0;
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 		strlcpy(prpsinfo.pr_psargs, saved_command_line,
423*4882a593Smuzhiyun 			sizeof(prpsinfo.pr_psargs));
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 		notes = kzalloc(notes_len, GFP_KERNEL);
426*4882a593Smuzhiyun 		if (!notes) {
427*4882a593Smuzhiyun 			ret = -ENOMEM;
428*4882a593Smuzhiyun 			goto out;
429*4882a593Smuzhiyun 		}
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun 		append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus,
432*4882a593Smuzhiyun 				  sizeof(prstatus));
433*4882a593Smuzhiyun 		append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo,
434*4882a593Smuzhiyun 				  sizeof(prpsinfo));
435*4882a593Smuzhiyun 		append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
436*4882a593Smuzhiyun 				  arch_task_struct_size);
437*4882a593Smuzhiyun 		/*
438*4882a593Smuzhiyun 		 * vmcoreinfo_size is mostly constant after init time, but it
439*4882a593Smuzhiyun 		 * can be changed by crash_save_vmcoreinfo(). Racing here with a
440*4882a593Smuzhiyun 		 * panic on another CPU before the machine goes down is insanely
441*4882a593Smuzhiyun 		 * unlikely, but it's better to not leave potential buffer
442*4882a593Smuzhiyun 		 * overflows lying around, regardless.
443*4882a593Smuzhiyun 		 */
444*4882a593Smuzhiyun 		append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
445*4882a593Smuzhiyun 				  vmcoreinfo_data,
446*4882a593Smuzhiyun 				  min(vmcoreinfo_size, notes_len - i));
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 		tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
449*4882a593Smuzhiyun 		if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
450*4882a593Smuzhiyun 			kfree(notes);
451*4882a593Smuzhiyun 			ret = -EFAULT;
452*4882a593Smuzhiyun 			goto out;
453*4882a593Smuzhiyun 		}
454*4882a593Smuzhiyun 		kfree(notes);
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun 		buffer += tsz;
457*4882a593Smuzhiyun 		buflen -= tsz;
458*4882a593Smuzhiyun 		*fpos += tsz;
459*4882a593Smuzhiyun 	}
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	/*
462*4882a593Smuzhiyun 	 * Check to see if our file offset matches with any of
463*4882a593Smuzhiyun 	 * the addresses in the elf_phdr on our list.
464*4882a593Smuzhiyun 	 */
465*4882a593Smuzhiyun 	start = kc_offset_to_vaddr(*fpos - data_offset);
466*4882a593Smuzhiyun 	if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
467*4882a593Smuzhiyun 		tsz = buflen;
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 	m = NULL;
470*4882a593Smuzhiyun 	while (buflen) {
471*4882a593Smuzhiyun 		/*
472*4882a593Smuzhiyun 		 * If this is the first iteration or the address is not within
473*4882a593Smuzhiyun 		 * the previous entry, search for a matching entry.
474*4882a593Smuzhiyun 		 */
475*4882a593Smuzhiyun 		if (!m || start < m->addr || start >= m->addr + m->size) {
476*4882a593Smuzhiyun 			list_for_each_entry(m, &kclist_head, list) {
477*4882a593Smuzhiyun 				if (start >= m->addr &&
478*4882a593Smuzhiyun 				    start < m->addr + m->size)
479*4882a593Smuzhiyun 					break;
480*4882a593Smuzhiyun 			}
481*4882a593Smuzhiyun 		}
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun 		if (&m->list == &kclist_head) {
484*4882a593Smuzhiyun 			if (clear_user(buffer, tsz)) {
485*4882a593Smuzhiyun 				ret = -EFAULT;
486*4882a593Smuzhiyun 				goto out;
487*4882a593Smuzhiyun 			}
488*4882a593Smuzhiyun 			m = NULL;	/* skip the list anchor */
489*4882a593Smuzhiyun 		} else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
490*4882a593Smuzhiyun 			if (clear_user(buffer, tsz)) {
491*4882a593Smuzhiyun 				ret = -EFAULT;
492*4882a593Smuzhiyun 				goto out;
493*4882a593Smuzhiyun 			}
494*4882a593Smuzhiyun 		} else if (m->type == KCORE_VMALLOC) {
495*4882a593Smuzhiyun 			vread(buf, (char *)start, tsz);
496*4882a593Smuzhiyun 			/* we have to zero-fill user buffer even if no read */
497*4882a593Smuzhiyun 			if (copy_to_user(buffer, buf, tsz)) {
498*4882a593Smuzhiyun 				ret = -EFAULT;
499*4882a593Smuzhiyun 				goto out;
500*4882a593Smuzhiyun 			}
501*4882a593Smuzhiyun 		} else if (m->type == KCORE_USER) {
502*4882a593Smuzhiyun 			/* User page is handled prior to normal kernel page: */
503*4882a593Smuzhiyun 			if (copy_to_user(buffer, (char *)start, tsz)) {
504*4882a593Smuzhiyun 				ret = -EFAULT;
505*4882a593Smuzhiyun 				goto out;
506*4882a593Smuzhiyun 			}
507*4882a593Smuzhiyun 		} else {
508*4882a593Smuzhiyun 			if (kern_addr_valid(start)) {
509*4882a593Smuzhiyun 				/*
510*4882a593Smuzhiyun 				 * Using bounce buffer to bypass the
511*4882a593Smuzhiyun 				 * hardened user copy kernel text checks.
512*4882a593Smuzhiyun 				 */
513*4882a593Smuzhiyun 				if (copy_from_kernel_nofault(buf, (void *)start,
514*4882a593Smuzhiyun 						tsz)) {
515*4882a593Smuzhiyun 					if (clear_user(buffer, tsz)) {
516*4882a593Smuzhiyun 						ret = -EFAULT;
517*4882a593Smuzhiyun 						goto out;
518*4882a593Smuzhiyun 					}
519*4882a593Smuzhiyun 				} else {
520*4882a593Smuzhiyun 					if (copy_to_user(buffer, buf, tsz)) {
521*4882a593Smuzhiyun 						ret = -EFAULT;
522*4882a593Smuzhiyun 						goto out;
523*4882a593Smuzhiyun 					}
524*4882a593Smuzhiyun 				}
525*4882a593Smuzhiyun 			} else {
526*4882a593Smuzhiyun 				if (clear_user(buffer, tsz)) {
527*4882a593Smuzhiyun 					ret = -EFAULT;
528*4882a593Smuzhiyun 					goto out;
529*4882a593Smuzhiyun 				}
530*4882a593Smuzhiyun 			}
531*4882a593Smuzhiyun 		}
532*4882a593Smuzhiyun 		buflen -= tsz;
533*4882a593Smuzhiyun 		*fpos += tsz;
534*4882a593Smuzhiyun 		buffer += tsz;
535*4882a593Smuzhiyun 		start += tsz;
536*4882a593Smuzhiyun 		tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
537*4882a593Smuzhiyun 	}
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun out:
540*4882a593Smuzhiyun 	up_read(&kclist_lock);
541*4882a593Smuzhiyun 	if (ret)
542*4882a593Smuzhiyun 		return ret;
543*4882a593Smuzhiyun 	return orig_buflen - buflen;
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun 
open_kcore(struct inode * inode,struct file * filp)546*4882a593Smuzhiyun static int open_kcore(struct inode *inode, struct file *filp)
547*4882a593Smuzhiyun {
548*4882a593Smuzhiyun 	int ret = security_locked_down(LOCKDOWN_KCORE);
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun 	if (!capable(CAP_SYS_RAWIO))
551*4882a593Smuzhiyun 		return -EPERM;
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 	if (ret)
554*4882a593Smuzhiyun 		return ret;
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun 	filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
557*4882a593Smuzhiyun 	if (!filp->private_data)
558*4882a593Smuzhiyun 		return -ENOMEM;
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	if (kcore_need_update)
561*4882a593Smuzhiyun 		kcore_update_ram();
562*4882a593Smuzhiyun 	if (i_size_read(inode) != proc_root_kcore->size) {
563*4882a593Smuzhiyun 		inode_lock(inode);
564*4882a593Smuzhiyun 		i_size_write(inode, proc_root_kcore->size);
565*4882a593Smuzhiyun 		inode_unlock(inode);
566*4882a593Smuzhiyun 	}
567*4882a593Smuzhiyun 	return 0;
568*4882a593Smuzhiyun }
569*4882a593Smuzhiyun 
release_kcore(struct inode * inode,struct file * file)570*4882a593Smuzhiyun static int release_kcore(struct inode *inode, struct file *file)
571*4882a593Smuzhiyun {
572*4882a593Smuzhiyun 	kfree(file->private_data);
573*4882a593Smuzhiyun 	return 0;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun static const struct proc_ops kcore_proc_ops = {
577*4882a593Smuzhiyun 	.proc_read	= read_kcore,
578*4882a593Smuzhiyun 	.proc_open	= open_kcore,
579*4882a593Smuzhiyun 	.proc_release	= release_kcore,
580*4882a593Smuzhiyun 	.proc_lseek	= default_llseek,
581*4882a593Smuzhiyun };
582*4882a593Smuzhiyun 
583*4882a593Smuzhiyun /* just remember that we have to update kcore */
kcore_callback(struct notifier_block * self,unsigned long action,void * arg)584*4882a593Smuzhiyun static int __meminit kcore_callback(struct notifier_block *self,
585*4882a593Smuzhiyun 				    unsigned long action, void *arg)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun 	switch (action) {
588*4882a593Smuzhiyun 	case MEM_ONLINE:
589*4882a593Smuzhiyun 	case MEM_OFFLINE:
590*4882a593Smuzhiyun 		kcore_need_update = 1;
591*4882a593Smuzhiyun 		break;
592*4882a593Smuzhiyun 	}
593*4882a593Smuzhiyun 	return NOTIFY_OK;
594*4882a593Smuzhiyun }
595*4882a593Smuzhiyun 
596*4882a593Smuzhiyun static struct notifier_block kcore_callback_nb __meminitdata = {
597*4882a593Smuzhiyun 	.notifier_call = kcore_callback,
598*4882a593Smuzhiyun 	.priority = 0,
599*4882a593Smuzhiyun };
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun static struct kcore_list kcore_vmalloc;
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun #ifdef CONFIG_ARCH_PROC_KCORE_TEXT
604*4882a593Smuzhiyun static struct kcore_list kcore_text;
605*4882a593Smuzhiyun /*
606*4882a593Smuzhiyun  * If defined, special segment is used for mapping kernel text instead of
607*4882a593Smuzhiyun  * direct-map area. We need to create special TEXT section.
608*4882a593Smuzhiyun  */
proc_kcore_text_init(void)609*4882a593Smuzhiyun static void __init proc_kcore_text_init(void)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun 	kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun #else
proc_kcore_text_init(void)614*4882a593Smuzhiyun static void __init proc_kcore_text_init(void)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun #endif
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
620*4882a593Smuzhiyun /*
621*4882a593Smuzhiyun  * MODULES_VADDR has no intersection with VMALLOC_ADDR.
622*4882a593Smuzhiyun  */
623*4882a593Smuzhiyun static struct kcore_list kcore_modules;
add_modules_range(void)624*4882a593Smuzhiyun static void __init add_modules_range(void)
625*4882a593Smuzhiyun {
626*4882a593Smuzhiyun 	if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
627*4882a593Smuzhiyun 		kclist_add(&kcore_modules, (void *)MODULES_VADDR,
628*4882a593Smuzhiyun 			MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
629*4882a593Smuzhiyun 	}
630*4882a593Smuzhiyun }
631*4882a593Smuzhiyun #else
add_modules_range(void)632*4882a593Smuzhiyun static void __init add_modules_range(void)
633*4882a593Smuzhiyun {
634*4882a593Smuzhiyun }
635*4882a593Smuzhiyun #endif
636*4882a593Smuzhiyun 
proc_kcore_init(void)637*4882a593Smuzhiyun static int __init proc_kcore_init(void)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun 	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops);
640*4882a593Smuzhiyun 	if (!proc_root_kcore) {
641*4882a593Smuzhiyun 		pr_err("couldn't create /proc/kcore\n");
642*4882a593Smuzhiyun 		return 0; /* Always returns 0. */
643*4882a593Smuzhiyun 	}
644*4882a593Smuzhiyun 	/* Store text area if it's special */
645*4882a593Smuzhiyun 	proc_kcore_text_init();
646*4882a593Smuzhiyun 	/* Store vmalloc area */
647*4882a593Smuzhiyun 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
648*4882a593Smuzhiyun 		VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
649*4882a593Smuzhiyun 	add_modules_range();
650*4882a593Smuzhiyun 	/* Store direct-map area from physical memory map */
651*4882a593Smuzhiyun 	kcore_update_ram();
652*4882a593Smuzhiyun 	register_hotmemory_notifier(&kcore_callback_nb);
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	return 0;
655*4882a593Smuzhiyun }
656*4882a593Smuzhiyun fs_initcall(proc_kcore_init);
657