1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * fs/proc/kcore.c kernel ELF core dumper
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Modelled on fs/exec.c:aout_core_dump()
6*4882a593Smuzhiyun * Jeremy Fitzhardinge <jeremy@sw.oz.au>
7*4882a593Smuzhiyun * ELF version written by David Howells <David.Howells@nexor.co.uk>
8*4882a593Smuzhiyun * Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
9*4882a593Smuzhiyun * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
10*4882a593Smuzhiyun * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
11*4882a593Smuzhiyun */
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include <linux/crash_core.h>
14*4882a593Smuzhiyun #include <linux/mm.h>
15*4882a593Smuzhiyun #include <linux/proc_fs.h>
16*4882a593Smuzhiyun #include <linux/kcore.h>
17*4882a593Smuzhiyun #include <linux/user.h>
18*4882a593Smuzhiyun #include <linux/capability.h>
19*4882a593Smuzhiyun #include <linux/elf.h>
20*4882a593Smuzhiyun #include <linux/elfcore.h>
21*4882a593Smuzhiyun #include <linux/notifier.h>
22*4882a593Smuzhiyun #include <linux/vmalloc.h>
23*4882a593Smuzhiyun #include <linux/highmem.h>
24*4882a593Smuzhiyun #include <linux/printk.h>
25*4882a593Smuzhiyun #include <linux/memblock.h>
26*4882a593Smuzhiyun #include <linux/init.h>
27*4882a593Smuzhiyun #include <linux/slab.h>
28*4882a593Smuzhiyun #include <linux/uaccess.h>
29*4882a593Smuzhiyun #include <asm/io.h>
30*4882a593Smuzhiyun #include <linux/list.h>
31*4882a593Smuzhiyun #include <linux/ioport.h>
32*4882a593Smuzhiyun #include <linux/memory.h>
33*4882a593Smuzhiyun #include <linux/sched/task.h>
34*4882a593Smuzhiyun #include <linux/security.h>
35*4882a593Smuzhiyun #include <asm/sections.h>
36*4882a593Smuzhiyun #include "internal.h"
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun #define CORE_STR "CORE"
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun #ifndef ELF_CORE_EFLAGS
41*4882a593Smuzhiyun #define ELF_CORE_EFLAGS 0
42*4882a593Smuzhiyun #endif
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun static struct proc_dir_entry *proc_root_kcore;
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun #ifndef kc_vaddr_to_offset
48*4882a593Smuzhiyun #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
49*4882a593Smuzhiyun #endif
50*4882a593Smuzhiyun #ifndef kc_offset_to_vaddr
51*4882a593Smuzhiyun #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
52*4882a593Smuzhiyun #endif
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun static LIST_HEAD(kclist_head);
55*4882a593Smuzhiyun static DECLARE_RWSEM(kclist_lock);
56*4882a593Smuzhiyun static int kcore_need_update = 1;
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun /*
59*4882a593Smuzhiyun * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
60*4882a593Smuzhiyun * Same as oldmem_pfn_is_ram in vmcore
61*4882a593Smuzhiyun */
62*4882a593Smuzhiyun static int (*mem_pfn_is_ram)(unsigned long pfn);
63*4882a593Smuzhiyun
register_mem_pfn_is_ram(int (* fn)(unsigned long pfn))64*4882a593Smuzhiyun int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
65*4882a593Smuzhiyun {
66*4882a593Smuzhiyun if (mem_pfn_is_ram)
67*4882a593Smuzhiyun return -EBUSY;
68*4882a593Smuzhiyun mem_pfn_is_ram = fn;
69*4882a593Smuzhiyun return 0;
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun
pfn_is_ram(unsigned long pfn)72*4882a593Smuzhiyun static int pfn_is_ram(unsigned long pfn)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun if (mem_pfn_is_ram)
75*4882a593Smuzhiyun return mem_pfn_is_ram(pfn);
76*4882a593Smuzhiyun else
77*4882a593Smuzhiyun return 1;
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun /* This doesn't grab kclist_lock, so it should only be used at init time. */
kclist_add(struct kcore_list * new,void * addr,size_t size,int type)81*4882a593Smuzhiyun void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
82*4882a593Smuzhiyun int type)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun new->addr = (unsigned long)addr;
85*4882a593Smuzhiyun new->size = size;
86*4882a593Smuzhiyun new->type = type;
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun list_add_tail(&new->list, &kclist_head);
89*4882a593Smuzhiyun }
90*4882a593Smuzhiyun
get_kcore_size(int * nphdr,size_t * phdrs_len,size_t * notes_len,size_t * data_offset)91*4882a593Smuzhiyun static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
92*4882a593Smuzhiyun size_t *data_offset)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun size_t try, size;
95*4882a593Smuzhiyun struct kcore_list *m;
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun *nphdr = 1; /* PT_NOTE */
98*4882a593Smuzhiyun size = 0;
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun list_for_each_entry(m, &kclist_head, list) {
101*4882a593Smuzhiyun try = kc_vaddr_to_offset((size_t)m->addr + m->size);
102*4882a593Smuzhiyun if (try > size)
103*4882a593Smuzhiyun size = try;
104*4882a593Smuzhiyun *nphdr = *nphdr + 1;
105*4882a593Smuzhiyun }
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun *phdrs_len = *nphdr * sizeof(struct elf_phdr);
108*4882a593Smuzhiyun *notes_len = (4 * sizeof(struct elf_note) +
109*4882a593Smuzhiyun 3 * ALIGN(sizeof(CORE_STR), 4) +
110*4882a593Smuzhiyun VMCOREINFO_NOTE_NAME_BYTES +
111*4882a593Smuzhiyun ALIGN(sizeof(struct elf_prstatus), 4) +
112*4882a593Smuzhiyun ALIGN(sizeof(struct elf_prpsinfo), 4) +
113*4882a593Smuzhiyun ALIGN(arch_task_struct_size, 4) +
114*4882a593Smuzhiyun ALIGN(vmcoreinfo_size, 4));
115*4882a593Smuzhiyun *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
116*4882a593Smuzhiyun *notes_len);
117*4882a593Smuzhiyun return *data_offset + size;
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun #ifdef CONFIG_HIGHMEM
121*4882a593Smuzhiyun /*
122*4882a593Smuzhiyun * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
123*4882a593Smuzhiyun * because memory hole is not as big as !HIGHMEM case.
124*4882a593Smuzhiyun * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
125*4882a593Smuzhiyun */
kcore_ram_list(struct list_head * head)126*4882a593Smuzhiyun static int kcore_ram_list(struct list_head *head)
127*4882a593Smuzhiyun {
128*4882a593Smuzhiyun struct kcore_list *ent;
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun ent = kmalloc(sizeof(*ent), GFP_KERNEL);
131*4882a593Smuzhiyun if (!ent)
132*4882a593Smuzhiyun return -ENOMEM;
133*4882a593Smuzhiyun ent->addr = (unsigned long)__va(0);
134*4882a593Smuzhiyun ent->size = max_low_pfn << PAGE_SHIFT;
135*4882a593Smuzhiyun ent->type = KCORE_RAM;
136*4882a593Smuzhiyun list_add(&ent->list, head);
137*4882a593Smuzhiyun return 0;
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun #else /* !CONFIG_HIGHMEM */
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun #ifdef CONFIG_SPARSEMEM_VMEMMAP
143*4882a593Smuzhiyun /* calculate vmemmap's address from given system ram pfn and register it */
144*4882a593Smuzhiyun static int
get_sparsemem_vmemmap_info(struct kcore_list * ent,struct list_head * head)145*4882a593Smuzhiyun get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
146*4882a593Smuzhiyun {
147*4882a593Smuzhiyun unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
148*4882a593Smuzhiyun unsigned long nr_pages = ent->size >> PAGE_SHIFT;
149*4882a593Smuzhiyun unsigned long start, end;
150*4882a593Smuzhiyun struct kcore_list *vmm, *tmp;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
154*4882a593Smuzhiyun end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
155*4882a593Smuzhiyun end = PAGE_ALIGN(end);
156*4882a593Smuzhiyun /* overlap check (because we have to align page */
157*4882a593Smuzhiyun list_for_each_entry(tmp, head, list) {
158*4882a593Smuzhiyun if (tmp->type != KCORE_VMEMMAP)
159*4882a593Smuzhiyun continue;
160*4882a593Smuzhiyun if (start < tmp->addr + tmp->size)
161*4882a593Smuzhiyun if (end > tmp->addr)
162*4882a593Smuzhiyun end = tmp->addr;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun if (start < end) {
165*4882a593Smuzhiyun vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
166*4882a593Smuzhiyun if (!vmm)
167*4882a593Smuzhiyun return 0;
168*4882a593Smuzhiyun vmm->addr = start;
169*4882a593Smuzhiyun vmm->size = end - start;
170*4882a593Smuzhiyun vmm->type = KCORE_VMEMMAP;
171*4882a593Smuzhiyun list_add_tail(&vmm->list, head);
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun return 1;
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun #else
177*4882a593Smuzhiyun static int
get_sparsemem_vmemmap_info(struct kcore_list * ent,struct list_head * head)178*4882a593Smuzhiyun get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
179*4882a593Smuzhiyun {
180*4882a593Smuzhiyun return 1;
181*4882a593Smuzhiyun }
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun #endif
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun static int
kclist_add_private(unsigned long pfn,unsigned long nr_pages,void * arg)186*4882a593Smuzhiyun kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun struct list_head *head = (struct list_head *)arg;
189*4882a593Smuzhiyun struct kcore_list *ent;
190*4882a593Smuzhiyun struct page *p;
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun if (!pfn_valid(pfn))
193*4882a593Smuzhiyun return 1;
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun p = pfn_to_page(pfn);
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun ent = kmalloc(sizeof(*ent), GFP_KERNEL);
198*4882a593Smuzhiyun if (!ent)
199*4882a593Smuzhiyun return -ENOMEM;
200*4882a593Smuzhiyun ent->addr = (unsigned long)page_to_virt(p);
201*4882a593Smuzhiyun ent->size = nr_pages << PAGE_SHIFT;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun if (!virt_addr_valid(ent->addr))
204*4882a593Smuzhiyun goto free_out;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun /* cut not-mapped area. ....from ppc-32 code. */
207*4882a593Smuzhiyun if (ULONG_MAX - ent->addr < ent->size)
208*4882a593Smuzhiyun ent->size = ULONG_MAX - ent->addr;
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun /*
211*4882a593Smuzhiyun * We've already checked virt_addr_valid so we know this address
212*4882a593Smuzhiyun * is a valid pointer, therefore we can check against it to determine
213*4882a593Smuzhiyun * if we need to trim
214*4882a593Smuzhiyun */
215*4882a593Smuzhiyun if (VMALLOC_START > ent->addr) {
216*4882a593Smuzhiyun if (VMALLOC_START - ent->addr < ent->size)
217*4882a593Smuzhiyun ent->size = VMALLOC_START - ent->addr;
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun ent->type = KCORE_RAM;
221*4882a593Smuzhiyun list_add_tail(&ent->list, head);
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun if (!get_sparsemem_vmemmap_info(ent, head)) {
224*4882a593Smuzhiyun list_del(&ent->list);
225*4882a593Smuzhiyun goto free_out;
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun return 0;
229*4882a593Smuzhiyun free_out:
230*4882a593Smuzhiyun kfree(ent);
231*4882a593Smuzhiyun return 1;
232*4882a593Smuzhiyun }
233*4882a593Smuzhiyun
kcore_ram_list(struct list_head * list)234*4882a593Smuzhiyun static int kcore_ram_list(struct list_head *list)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun int nid, ret;
237*4882a593Smuzhiyun unsigned long end_pfn;
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun /* Not inialized....update now */
240*4882a593Smuzhiyun /* find out "max pfn" */
241*4882a593Smuzhiyun end_pfn = 0;
242*4882a593Smuzhiyun for_each_node_state(nid, N_MEMORY) {
243*4882a593Smuzhiyun unsigned long node_end;
244*4882a593Smuzhiyun node_end = node_end_pfn(nid);
245*4882a593Smuzhiyun if (end_pfn < node_end)
246*4882a593Smuzhiyun end_pfn = node_end;
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun /* scan 0 to max_pfn */
249*4882a593Smuzhiyun ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
250*4882a593Smuzhiyun if (ret)
251*4882a593Smuzhiyun return -ENOMEM;
252*4882a593Smuzhiyun return 0;
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun #endif /* CONFIG_HIGHMEM */
255*4882a593Smuzhiyun
kcore_update_ram(void)256*4882a593Smuzhiyun static int kcore_update_ram(void)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun LIST_HEAD(list);
259*4882a593Smuzhiyun LIST_HEAD(garbage);
260*4882a593Smuzhiyun int nphdr;
261*4882a593Smuzhiyun size_t phdrs_len, notes_len, data_offset;
262*4882a593Smuzhiyun struct kcore_list *tmp, *pos;
263*4882a593Smuzhiyun int ret = 0;
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun down_write(&kclist_lock);
266*4882a593Smuzhiyun if (!xchg(&kcore_need_update, 0))
267*4882a593Smuzhiyun goto out;
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun ret = kcore_ram_list(&list);
270*4882a593Smuzhiyun if (ret) {
271*4882a593Smuzhiyun /* Couldn't get the RAM list, try again next time. */
272*4882a593Smuzhiyun WRITE_ONCE(kcore_need_update, 1);
273*4882a593Smuzhiyun list_splice_tail(&list, &garbage);
274*4882a593Smuzhiyun goto out;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun
277*4882a593Smuzhiyun list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
278*4882a593Smuzhiyun if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
279*4882a593Smuzhiyun list_move(&pos->list, &garbage);
280*4882a593Smuzhiyun }
281*4882a593Smuzhiyun list_splice_tail(&list, &kclist_head);
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len,
284*4882a593Smuzhiyun &data_offset);
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun out:
287*4882a593Smuzhiyun up_write(&kclist_lock);
288*4882a593Smuzhiyun list_for_each_entry_safe(pos, tmp, &garbage, list) {
289*4882a593Smuzhiyun list_del(&pos->list);
290*4882a593Smuzhiyun kfree(pos);
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun return ret;
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun
append_kcore_note(char * notes,size_t * i,const char * name,unsigned int type,const void * desc,size_t descsz)295*4882a593Smuzhiyun static void append_kcore_note(char *notes, size_t *i, const char *name,
296*4882a593Smuzhiyun unsigned int type, const void *desc,
297*4882a593Smuzhiyun size_t descsz)
298*4882a593Smuzhiyun {
299*4882a593Smuzhiyun struct elf_note *note = (struct elf_note *)¬es[*i];
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun note->n_namesz = strlen(name) + 1;
302*4882a593Smuzhiyun note->n_descsz = descsz;
303*4882a593Smuzhiyun note->n_type = type;
304*4882a593Smuzhiyun *i += sizeof(*note);
305*4882a593Smuzhiyun memcpy(¬es[*i], name, note->n_namesz);
306*4882a593Smuzhiyun *i = ALIGN(*i + note->n_namesz, 4);
307*4882a593Smuzhiyun memcpy(¬es[*i], desc, descsz);
308*4882a593Smuzhiyun *i = ALIGN(*i + descsz, 4);
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun static ssize_t
read_kcore(struct file * file,char __user * buffer,size_t buflen,loff_t * fpos)312*4882a593Smuzhiyun read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
313*4882a593Smuzhiyun {
314*4882a593Smuzhiyun char *buf = file->private_data;
315*4882a593Smuzhiyun size_t phdrs_offset, notes_offset, data_offset;
316*4882a593Smuzhiyun size_t phdrs_len, notes_len;
317*4882a593Smuzhiyun struct kcore_list *m;
318*4882a593Smuzhiyun size_t tsz;
319*4882a593Smuzhiyun int nphdr;
320*4882a593Smuzhiyun unsigned long start;
321*4882a593Smuzhiyun size_t orig_buflen = buflen;
322*4882a593Smuzhiyun int ret = 0;
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun down_read(&kclist_lock);
325*4882a593Smuzhiyun
326*4882a593Smuzhiyun get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset);
327*4882a593Smuzhiyun phdrs_offset = sizeof(struct elfhdr);
328*4882a593Smuzhiyun notes_offset = phdrs_offset + phdrs_len;
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun /* ELF file header. */
331*4882a593Smuzhiyun if (buflen && *fpos < sizeof(struct elfhdr)) {
332*4882a593Smuzhiyun struct elfhdr ehdr = {
333*4882a593Smuzhiyun .e_ident = {
334*4882a593Smuzhiyun [EI_MAG0] = ELFMAG0,
335*4882a593Smuzhiyun [EI_MAG1] = ELFMAG1,
336*4882a593Smuzhiyun [EI_MAG2] = ELFMAG2,
337*4882a593Smuzhiyun [EI_MAG3] = ELFMAG3,
338*4882a593Smuzhiyun [EI_CLASS] = ELF_CLASS,
339*4882a593Smuzhiyun [EI_DATA] = ELF_DATA,
340*4882a593Smuzhiyun [EI_VERSION] = EV_CURRENT,
341*4882a593Smuzhiyun [EI_OSABI] = ELF_OSABI,
342*4882a593Smuzhiyun },
343*4882a593Smuzhiyun .e_type = ET_CORE,
344*4882a593Smuzhiyun .e_machine = ELF_ARCH,
345*4882a593Smuzhiyun .e_version = EV_CURRENT,
346*4882a593Smuzhiyun .e_phoff = sizeof(struct elfhdr),
347*4882a593Smuzhiyun .e_flags = ELF_CORE_EFLAGS,
348*4882a593Smuzhiyun .e_ehsize = sizeof(struct elfhdr),
349*4882a593Smuzhiyun .e_phentsize = sizeof(struct elf_phdr),
350*4882a593Smuzhiyun .e_phnum = nphdr,
351*4882a593Smuzhiyun };
352*4882a593Smuzhiyun
353*4882a593Smuzhiyun tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
354*4882a593Smuzhiyun if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) {
355*4882a593Smuzhiyun ret = -EFAULT;
356*4882a593Smuzhiyun goto out;
357*4882a593Smuzhiyun }
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun buffer += tsz;
360*4882a593Smuzhiyun buflen -= tsz;
361*4882a593Smuzhiyun *fpos += tsz;
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun /* ELF program headers. */
365*4882a593Smuzhiyun if (buflen && *fpos < phdrs_offset + phdrs_len) {
366*4882a593Smuzhiyun struct elf_phdr *phdrs, *phdr;
367*4882a593Smuzhiyun
368*4882a593Smuzhiyun phdrs = kzalloc(phdrs_len, GFP_KERNEL);
369*4882a593Smuzhiyun if (!phdrs) {
370*4882a593Smuzhiyun ret = -ENOMEM;
371*4882a593Smuzhiyun goto out;
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun phdrs[0].p_type = PT_NOTE;
375*4882a593Smuzhiyun phdrs[0].p_offset = notes_offset;
376*4882a593Smuzhiyun phdrs[0].p_filesz = notes_len;
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun phdr = &phdrs[1];
379*4882a593Smuzhiyun list_for_each_entry(m, &kclist_head, list) {
380*4882a593Smuzhiyun phdr->p_type = PT_LOAD;
381*4882a593Smuzhiyun phdr->p_flags = PF_R | PF_W | PF_X;
382*4882a593Smuzhiyun phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
383*4882a593Smuzhiyun if (m->type == KCORE_REMAP)
384*4882a593Smuzhiyun phdr->p_vaddr = (size_t)m->vaddr;
385*4882a593Smuzhiyun else
386*4882a593Smuzhiyun phdr->p_vaddr = (size_t)m->addr;
387*4882a593Smuzhiyun if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
388*4882a593Smuzhiyun phdr->p_paddr = __pa(m->addr);
389*4882a593Smuzhiyun else if (m->type == KCORE_TEXT)
390*4882a593Smuzhiyun phdr->p_paddr = __pa_symbol(m->addr);
391*4882a593Smuzhiyun else
392*4882a593Smuzhiyun phdr->p_paddr = (elf_addr_t)-1;
393*4882a593Smuzhiyun phdr->p_filesz = phdr->p_memsz = m->size;
394*4882a593Smuzhiyun phdr->p_align = PAGE_SIZE;
395*4882a593Smuzhiyun phdr++;
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
399*4882a593Smuzhiyun if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset,
400*4882a593Smuzhiyun tsz)) {
401*4882a593Smuzhiyun kfree(phdrs);
402*4882a593Smuzhiyun ret = -EFAULT;
403*4882a593Smuzhiyun goto out;
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun kfree(phdrs);
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun buffer += tsz;
408*4882a593Smuzhiyun buflen -= tsz;
409*4882a593Smuzhiyun *fpos += tsz;
410*4882a593Smuzhiyun }
411*4882a593Smuzhiyun
412*4882a593Smuzhiyun /* ELF note segment. */
413*4882a593Smuzhiyun if (buflen && *fpos < notes_offset + notes_len) {
414*4882a593Smuzhiyun struct elf_prstatus prstatus = {};
415*4882a593Smuzhiyun struct elf_prpsinfo prpsinfo = {
416*4882a593Smuzhiyun .pr_sname = 'R',
417*4882a593Smuzhiyun .pr_fname = "vmlinux",
418*4882a593Smuzhiyun };
419*4882a593Smuzhiyun char *notes;
420*4882a593Smuzhiyun size_t i = 0;
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun strlcpy(prpsinfo.pr_psargs, saved_command_line,
423*4882a593Smuzhiyun sizeof(prpsinfo.pr_psargs));
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun notes = kzalloc(notes_len, GFP_KERNEL);
426*4882a593Smuzhiyun if (!notes) {
427*4882a593Smuzhiyun ret = -ENOMEM;
428*4882a593Smuzhiyun goto out;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus,
432*4882a593Smuzhiyun sizeof(prstatus));
433*4882a593Smuzhiyun append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo,
434*4882a593Smuzhiyun sizeof(prpsinfo));
435*4882a593Smuzhiyun append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
436*4882a593Smuzhiyun arch_task_struct_size);
437*4882a593Smuzhiyun /*
438*4882a593Smuzhiyun * vmcoreinfo_size is mostly constant after init time, but it
439*4882a593Smuzhiyun * can be changed by crash_save_vmcoreinfo(). Racing here with a
440*4882a593Smuzhiyun * panic on another CPU before the machine goes down is insanely
441*4882a593Smuzhiyun * unlikely, but it's better to not leave potential buffer
442*4882a593Smuzhiyun * overflows lying around, regardless.
443*4882a593Smuzhiyun */
444*4882a593Smuzhiyun append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
445*4882a593Smuzhiyun vmcoreinfo_data,
446*4882a593Smuzhiyun min(vmcoreinfo_size, notes_len - i));
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
449*4882a593Smuzhiyun if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
450*4882a593Smuzhiyun kfree(notes);
451*4882a593Smuzhiyun ret = -EFAULT;
452*4882a593Smuzhiyun goto out;
453*4882a593Smuzhiyun }
454*4882a593Smuzhiyun kfree(notes);
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun buffer += tsz;
457*4882a593Smuzhiyun buflen -= tsz;
458*4882a593Smuzhiyun *fpos += tsz;
459*4882a593Smuzhiyun }
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun /*
462*4882a593Smuzhiyun * Check to see if our file offset matches with any of
463*4882a593Smuzhiyun * the addresses in the elf_phdr on our list.
464*4882a593Smuzhiyun */
465*4882a593Smuzhiyun start = kc_offset_to_vaddr(*fpos - data_offset);
466*4882a593Smuzhiyun if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
467*4882a593Smuzhiyun tsz = buflen;
468*4882a593Smuzhiyun
469*4882a593Smuzhiyun m = NULL;
470*4882a593Smuzhiyun while (buflen) {
471*4882a593Smuzhiyun /*
472*4882a593Smuzhiyun * If this is the first iteration or the address is not within
473*4882a593Smuzhiyun * the previous entry, search for a matching entry.
474*4882a593Smuzhiyun */
475*4882a593Smuzhiyun if (!m || start < m->addr || start >= m->addr + m->size) {
476*4882a593Smuzhiyun list_for_each_entry(m, &kclist_head, list) {
477*4882a593Smuzhiyun if (start >= m->addr &&
478*4882a593Smuzhiyun start < m->addr + m->size)
479*4882a593Smuzhiyun break;
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun }
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun if (&m->list == &kclist_head) {
484*4882a593Smuzhiyun if (clear_user(buffer, tsz)) {
485*4882a593Smuzhiyun ret = -EFAULT;
486*4882a593Smuzhiyun goto out;
487*4882a593Smuzhiyun }
488*4882a593Smuzhiyun m = NULL; /* skip the list anchor */
489*4882a593Smuzhiyun } else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
490*4882a593Smuzhiyun if (clear_user(buffer, tsz)) {
491*4882a593Smuzhiyun ret = -EFAULT;
492*4882a593Smuzhiyun goto out;
493*4882a593Smuzhiyun }
494*4882a593Smuzhiyun } else if (m->type == KCORE_VMALLOC) {
495*4882a593Smuzhiyun vread(buf, (char *)start, tsz);
496*4882a593Smuzhiyun /* we have to zero-fill user buffer even if no read */
497*4882a593Smuzhiyun if (copy_to_user(buffer, buf, tsz)) {
498*4882a593Smuzhiyun ret = -EFAULT;
499*4882a593Smuzhiyun goto out;
500*4882a593Smuzhiyun }
501*4882a593Smuzhiyun } else if (m->type == KCORE_USER) {
502*4882a593Smuzhiyun /* User page is handled prior to normal kernel page: */
503*4882a593Smuzhiyun if (copy_to_user(buffer, (char *)start, tsz)) {
504*4882a593Smuzhiyun ret = -EFAULT;
505*4882a593Smuzhiyun goto out;
506*4882a593Smuzhiyun }
507*4882a593Smuzhiyun } else {
508*4882a593Smuzhiyun if (kern_addr_valid(start)) {
509*4882a593Smuzhiyun /*
510*4882a593Smuzhiyun * Using bounce buffer to bypass the
511*4882a593Smuzhiyun * hardened user copy kernel text checks.
512*4882a593Smuzhiyun */
513*4882a593Smuzhiyun if (copy_from_kernel_nofault(buf, (void *)start,
514*4882a593Smuzhiyun tsz)) {
515*4882a593Smuzhiyun if (clear_user(buffer, tsz)) {
516*4882a593Smuzhiyun ret = -EFAULT;
517*4882a593Smuzhiyun goto out;
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun } else {
520*4882a593Smuzhiyun if (copy_to_user(buffer, buf, tsz)) {
521*4882a593Smuzhiyun ret = -EFAULT;
522*4882a593Smuzhiyun goto out;
523*4882a593Smuzhiyun }
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun } else {
526*4882a593Smuzhiyun if (clear_user(buffer, tsz)) {
527*4882a593Smuzhiyun ret = -EFAULT;
528*4882a593Smuzhiyun goto out;
529*4882a593Smuzhiyun }
530*4882a593Smuzhiyun }
531*4882a593Smuzhiyun }
532*4882a593Smuzhiyun buflen -= tsz;
533*4882a593Smuzhiyun *fpos += tsz;
534*4882a593Smuzhiyun buffer += tsz;
535*4882a593Smuzhiyun start += tsz;
536*4882a593Smuzhiyun tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun out:
540*4882a593Smuzhiyun up_read(&kclist_lock);
541*4882a593Smuzhiyun if (ret)
542*4882a593Smuzhiyun return ret;
543*4882a593Smuzhiyun return orig_buflen - buflen;
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun
open_kcore(struct inode * inode,struct file * filp)546*4882a593Smuzhiyun static int open_kcore(struct inode *inode, struct file *filp)
547*4882a593Smuzhiyun {
548*4882a593Smuzhiyun int ret = security_locked_down(LOCKDOWN_KCORE);
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun if (!capable(CAP_SYS_RAWIO))
551*4882a593Smuzhiyun return -EPERM;
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun if (ret)
554*4882a593Smuzhiyun return ret;
555*4882a593Smuzhiyun
556*4882a593Smuzhiyun filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
557*4882a593Smuzhiyun if (!filp->private_data)
558*4882a593Smuzhiyun return -ENOMEM;
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun if (kcore_need_update)
561*4882a593Smuzhiyun kcore_update_ram();
562*4882a593Smuzhiyun if (i_size_read(inode) != proc_root_kcore->size) {
563*4882a593Smuzhiyun inode_lock(inode);
564*4882a593Smuzhiyun i_size_write(inode, proc_root_kcore->size);
565*4882a593Smuzhiyun inode_unlock(inode);
566*4882a593Smuzhiyun }
567*4882a593Smuzhiyun return 0;
568*4882a593Smuzhiyun }
569*4882a593Smuzhiyun
release_kcore(struct inode * inode,struct file * file)570*4882a593Smuzhiyun static int release_kcore(struct inode *inode, struct file *file)
571*4882a593Smuzhiyun {
572*4882a593Smuzhiyun kfree(file->private_data);
573*4882a593Smuzhiyun return 0;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun static const struct proc_ops kcore_proc_ops = {
577*4882a593Smuzhiyun .proc_read = read_kcore,
578*4882a593Smuzhiyun .proc_open = open_kcore,
579*4882a593Smuzhiyun .proc_release = release_kcore,
580*4882a593Smuzhiyun .proc_lseek = default_llseek,
581*4882a593Smuzhiyun };
582*4882a593Smuzhiyun
583*4882a593Smuzhiyun /* just remember that we have to update kcore */
kcore_callback(struct notifier_block * self,unsigned long action,void * arg)584*4882a593Smuzhiyun static int __meminit kcore_callback(struct notifier_block *self,
585*4882a593Smuzhiyun unsigned long action, void *arg)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun switch (action) {
588*4882a593Smuzhiyun case MEM_ONLINE:
589*4882a593Smuzhiyun case MEM_OFFLINE:
590*4882a593Smuzhiyun kcore_need_update = 1;
591*4882a593Smuzhiyun break;
592*4882a593Smuzhiyun }
593*4882a593Smuzhiyun return NOTIFY_OK;
594*4882a593Smuzhiyun }
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun static struct notifier_block kcore_callback_nb __meminitdata = {
597*4882a593Smuzhiyun .notifier_call = kcore_callback,
598*4882a593Smuzhiyun .priority = 0,
599*4882a593Smuzhiyun };
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun static struct kcore_list kcore_vmalloc;
602*4882a593Smuzhiyun
603*4882a593Smuzhiyun #ifdef CONFIG_ARCH_PROC_KCORE_TEXT
604*4882a593Smuzhiyun static struct kcore_list kcore_text;
605*4882a593Smuzhiyun /*
606*4882a593Smuzhiyun * If defined, special segment is used for mapping kernel text instead of
607*4882a593Smuzhiyun * direct-map area. We need to create special TEXT section.
608*4882a593Smuzhiyun */
proc_kcore_text_init(void)609*4882a593Smuzhiyun static void __init proc_kcore_text_init(void)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun #else
proc_kcore_text_init(void)614*4882a593Smuzhiyun static void __init proc_kcore_text_init(void)
615*4882a593Smuzhiyun {
616*4882a593Smuzhiyun }
617*4882a593Smuzhiyun #endif
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
620*4882a593Smuzhiyun /*
621*4882a593Smuzhiyun * MODULES_VADDR has no intersection with VMALLOC_ADDR.
622*4882a593Smuzhiyun */
623*4882a593Smuzhiyun static struct kcore_list kcore_modules;
add_modules_range(void)624*4882a593Smuzhiyun static void __init add_modules_range(void)
625*4882a593Smuzhiyun {
626*4882a593Smuzhiyun if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) {
627*4882a593Smuzhiyun kclist_add(&kcore_modules, (void *)MODULES_VADDR,
628*4882a593Smuzhiyun MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
629*4882a593Smuzhiyun }
630*4882a593Smuzhiyun }
631*4882a593Smuzhiyun #else
add_modules_range(void)632*4882a593Smuzhiyun static void __init add_modules_range(void)
633*4882a593Smuzhiyun {
634*4882a593Smuzhiyun }
635*4882a593Smuzhiyun #endif
636*4882a593Smuzhiyun
proc_kcore_init(void)637*4882a593Smuzhiyun static int __init proc_kcore_init(void)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &kcore_proc_ops);
640*4882a593Smuzhiyun if (!proc_root_kcore) {
641*4882a593Smuzhiyun pr_err("couldn't create /proc/kcore\n");
642*4882a593Smuzhiyun return 0; /* Always returns 0. */
643*4882a593Smuzhiyun }
644*4882a593Smuzhiyun /* Store text area if it's special */
645*4882a593Smuzhiyun proc_kcore_text_init();
646*4882a593Smuzhiyun /* Store vmalloc area */
647*4882a593Smuzhiyun kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
648*4882a593Smuzhiyun VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
649*4882a593Smuzhiyun add_modules_range();
650*4882a593Smuzhiyun /* Store direct-map area from physical memory map */
651*4882a593Smuzhiyun kcore_update_ram();
652*4882a593Smuzhiyun register_hotmemory_notifier(&kcore_callback_nb);
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun return 0;
655*4882a593Smuzhiyun }
656*4882a593Smuzhiyun fs_initcall(proc_kcore_init);
657