1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * S390 version
4*4882a593Smuzhiyun * Copyright IBM Corp. 1999
5*4882a593Smuzhiyun * Author(s): Hartmut Penner (hp@de.ibm.com)
6*4882a593Smuzhiyun * Ulrich Weigand (uweigand@de.ibm.com)
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * Derived from "arch/i386/mm/fault.c"
9*4882a593Smuzhiyun * Copyright (C) 1995 Linus Torvalds
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include <linux/kernel_stat.h>
13*4882a593Smuzhiyun #include <linux/perf_event.h>
14*4882a593Smuzhiyun #include <linux/signal.h>
15*4882a593Smuzhiyun #include <linux/sched.h>
16*4882a593Smuzhiyun #include <linux/sched/debug.h>
17*4882a593Smuzhiyun #include <linux/kernel.h>
18*4882a593Smuzhiyun #include <linux/errno.h>
19*4882a593Smuzhiyun #include <linux/string.h>
20*4882a593Smuzhiyun #include <linux/types.h>
21*4882a593Smuzhiyun #include <linux/ptrace.h>
22*4882a593Smuzhiyun #include <linux/mman.h>
23*4882a593Smuzhiyun #include <linux/mm.h>
24*4882a593Smuzhiyun #include <linux/compat.h>
25*4882a593Smuzhiyun #include <linux/smp.h>
26*4882a593Smuzhiyun #include <linux/kdebug.h>
27*4882a593Smuzhiyun #include <linux/init.h>
28*4882a593Smuzhiyun #include <linux/console.h>
29*4882a593Smuzhiyun #include <linux/extable.h>
30*4882a593Smuzhiyun #include <linux/hardirq.h>
31*4882a593Smuzhiyun #include <linux/kprobes.h>
32*4882a593Smuzhiyun #include <linux/uaccess.h>
33*4882a593Smuzhiyun #include <linux/hugetlb.h>
34*4882a593Smuzhiyun #include <asm/asm-offsets.h>
35*4882a593Smuzhiyun #include <asm/diag.h>
36*4882a593Smuzhiyun #include <asm/gmap.h>
37*4882a593Smuzhiyun #include <asm/irq.h>
38*4882a593Smuzhiyun #include <asm/mmu_context.h>
39*4882a593Smuzhiyun #include <asm/facility.h>
40*4882a593Smuzhiyun #include <asm/uv.h>
41*4882a593Smuzhiyun #include "../kernel/entry.h"
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun #define __FAIL_ADDR_MASK -4096L
44*4882a593Smuzhiyun #define __SUBCODE_MASK 0x0600
45*4882a593Smuzhiyun #define __PF_RES_FIELD 0x8000000000000000ULL
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun #define VM_FAULT_BADCONTEXT ((__force vm_fault_t) 0x010000)
48*4882a593Smuzhiyun #define VM_FAULT_BADMAP ((__force vm_fault_t) 0x020000)
49*4882a593Smuzhiyun #define VM_FAULT_BADACCESS ((__force vm_fault_t) 0x040000)
50*4882a593Smuzhiyun #define VM_FAULT_SIGNAL ((__force vm_fault_t) 0x080000)
51*4882a593Smuzhiyun #define VM_FAULT_PFAULT ((__force vm_fault_t) 0x100000)
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun enum fault_type {
54*4882a593Smuzhiyun KERNEL_FAULT,
55*4882a593Smuzhiyun USER_FAULT,
56*4882a593Smuzhiyun VDSO_FAULT,
57*4882a593Smuzhiyun GMAP_FAULT,
58*4882a593Smuzhiyun };
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun static unsigned long store_indication __read_mostly;
61*4882a593Smuzhiyun
fault_init(void)62*4882a593Smuzhiyun static int __init fault_init(void)
63*4882a593Smuzhiyun {
64*4882a593Smuzhiyun if (test_facility(75))
65*4882a593Smuzhiyun store_indication = 0xc00;
66*4882a593Smuzhiyun return 0;
67*4882a593Smuzhiyun }
68*4882a593Smuzhiyun early_initcall(fault_init);
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun /*
71*4882a593Smuzhiyun * Find out which address space caused the exception.
72*4882a593Smuzhiyun */
get_fault_type(struct pt_regs * regs)73*4882a593Smuzhiyun static enum fault_type get_fault_type(struct pt_regs *regs)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun unsigned long trans_exc_code;
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun trans_exc_code = regs->int_parm_long & 3;
78*4882a593Smuzhiyun if (likely(trans_exc_code == 0)) {
79*4882a593Smuzhiyun /* primary space exception */
80*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_PGSTE) &&
81*4882a593Smuzhiyun test_pt_regs_flag(regs, PIF_GUEST_FAULT))
82*4882a593Smuzhiyun return GMAP_FAULT;
83*4882a593Smuzhiyun if (current->thread.mm_segment == USER_DS)
84*4882a593Smuzhiyun return USER_FAULT;
85*4882a593Smuzhiyun return KERNEL_FAULT;
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun if (trans_exc_code == 2) {
88*4882a593Smuzhiyun /* secondary space exception */
89*4882a593Smuzhiyun if (current->thread.mm_segment & 1) {
90*4882a593Smuzhiyun if (current->thread.mm_segment == USER_DS_SACF)
91*4882a593Smuzhiyun return USER_FAULT;
92*4882a593Smuzhiyun return KERNEL_FAULT;
93*4882a593Smuzhiyun }
94*4882a593Smuzhiyun return VDSO_FAULT;
95*4882a593Smuzhiyun }
96*4882a593Smuzhiyun if (trans_exc_code == 1) {
97*4882a593Smuzhiyun /* access register mode, not used in the kernel */
98*4882a593Smuzhiyun return USER_FAULT;
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun /* home space exception -> access via kernel ASCE */
101*4882a593Smuzhiyun return KERNEL_FAULT;
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
bad_address(void * p)104*4882a593Smuzhiyun static int bad_address(void *p)
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun unsigned long dummy;
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun return get_kernel_nofault(dummy, (unsigned long *)p);
109*4882a593Smuzhiyun }
110*4882a593Smuzhiyun
dump_pagetable(unsigned long asce,unsigned long address)111*4882a593Smuzhiyun static void dump_pagetable(unsigned long asce, unsigned long address)
112*4882a593Smuzhiyun {
113*4882a593Smuzhiyun unsigned long *table = __va(asce & _ASCE_ORIGIN);
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun pr_alert("AS:%016lx ", asce);
116*4882a593Smuzhiyun switch (asce & _ASCE_TYPE_MASK) {
117*4882a593Smuzhiyun case _ASCE_TYPE_REGION1:
118*4882a593Smuzhiyun table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
119*4882a593Smuzhiyun if (bad_address(table))
120*4882a593Smuzhiyun goto bad;
121*4882a593Smuzhiyun pr_cont("R1:%016lx ", *table);
122*4882a593Smuzhiyun if (*table & _REGION_ENTRY_INVALID)
123*4882a593Smuzhiyun goto out;
124*4882a593Smuzhiyun table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
125*4882a593Smuzhiyun fallthrough;
126*4882a593Smuzhiyun case _ASCE_TYPE_REGION2:
127*4882a593Smuzhiyun table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
128*4882a593Smuzhiyun if (bad_address(table))
129*4882a593Smuzhiyun goto bad;
130*4882a593Smuzhiyun pr_cont("R2:%016lx ", *table);
131*4882a593Smuzhiyun if (*table & _REGION_ENTRY_INVALID)
132*4882a593Smuzhiyun goto out;
133*4882a593Smuzhiyun table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
134*4882a593Smuzhiyun fallthrough;
135*4882a593Smuzhiyun case _ASCE_TYPE_REGION3:
136*4882a593Smuzhiyun table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
137*4882a593Smuzhiyun if (bad_address(table))
138*4882a593Smuzhiyun goto bad;
139*4882a593Smuzhiyun pr_cont("R3:%016lx ", *table);
140*4882a593Smuzhiyun if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
141*4882a593Smuzhiyun goto out;
142*4882a593Smuzhiyun table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
143*4882a593Smuzhiyun fallthrough;
144*4882a593Smuzhiyun case _ASCE_TYPE_SEGMENT:
145*4882a593Smuzhiyun table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
146*4882a593Smuzhiyun if (bad_address(table))
147*4882a593Smuzhiyun goto bad;
148*4882a593Smuzhiyun pr_cont("S:%016lx ", *table);
149*4882a593Smuzhiyun if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
150*4882a593Smuzhiyun goto out;
151*4882a593Smuzhiyun table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
154*4882a593Smuzhiyun if (bad_address(table))
155*4882a593Smuzhiyun goto bad;
156*4882a593Smuzhiyun pr_cont("P:%016lx ", *table);
157*4882a593Smuzhiyun out:
158*4882a593Smuzhiyun pr_cont("\n");
159*4882a593Smuzhiyun return;
160*4882a593Smuzhiyun bad:
161*4882a593Smuzhiyun pr_cont("BAD\n");
162*4882a593Smuzhiyun }
163*4882a593Smuzhiyun
dump_fault_info(struct pt_regs * regs)164*4882a593Smuzhiyun static void dump_fault_info(struct pt_regs *regs)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun unsigned long asce;
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun pr_alert("Failing address: %016lx TEID: %016lx\n",
169*4882a593Smuzhiyun regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
170*4882a593Smuzhiyun pr_alert("Fault in ");
171*4882a593Smuzhiyun switch (regs->int_parm_long & 3) {
172*4882a593Smuzhiyun case 3:
173*4882a593Smuzhiyun pr_cont("home space ");
174*4882a593Smuzhiyun break;
175*4882a593Smuzhiyun case 2:
176*4882a593Smuzhiyun pr_cont("secondary space ");
177*4882a593Smuzhiyun break;
178*4882a593Smuzhiyun case 1:
179*4882a593Smuzhiyun pr_cont("access register ");
180*4882a593Smuzhiyun break;
181*4882a593Smuzhiyun case 0:
182*4882a593Smuzhiyun pr_cont("primary space ");
183*4882a593Smuzhiyun break;
184*4882a593Smuzhiyun }
185*4882a593Smuzhiyun pr_cont("mode while using ");
186*4882a593Smuzhiyun switch (get_fault_type(regs)) {
187*4882a593Smuzhiyun case USER_FAULT:
188*4882a593Smuzhiyun asce = S390_lowcore.user_asce;
189*4882a593Smuzhiyun pr_cont("user ");
190*4882a593Smuzhiyun break;
191*4882a593Smuzhiyun case VDSO_FAULT:
192*4882a593Smuzhiyun asce = S390_lowcore.vdso_asce;
193*4882a593Smuzhiyun pr_cont("vdso ");
194*4882a593Smuzhiyun break;
195*4882a593Smuzhiyun case GMAP_FAULT:
196*4882a593Smuzhiyun asce = ((struct gmap *) S390_lowcore.gmap)->asce;
197*4882a593Smuzhiyun pr_cont("gmap ");
198*4882a593Smuzhiyun break;
199*4882a593Smuzhiyun case KERNEL_FAULT:
200*4882a593Smuzhiyun asce = S390_lowcore.kernel_asce;
201*4882a593Smuzhiyun pr_cont("kernel ");
202*4882a593Smuzhiyun break;
203*4882a593Smuzhiyun default:
204*4882a593Smuzhiyun unreachable();
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun pr_cont("ASCE.\n");
207*4882a593Smuzhiyun dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun
210*4882a593Smuzhiyun int show_unhandled_signals = 1;
211*4882a593Smuzhiyun
report_user_fault(struct pt_regs * regs,long signr,int is_mm_fault)212*4882a593Smuzhiyun void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
213*4882a593Smuzhiyun {
214*4882a593Smuzhiyun if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
215*4882a593Smuzhiyun return;
216*4882a593Smuzhiyun if (!unhandled_signal(current, signr))
217*4882a593Smuzhiyun return;
218*4882a593Smuzhiyun if (!printk_ratelimit())
219*4882a593Smuzhiyun return;
220*4882a593Smuzhiyun printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
221*4882a593Smuzhiyun regs->int_code & 0xffff, regs->int_code >> 17);
222*4882a593Smuzhiyun print_vma_addr(KERN_CONT "in ", regs->psw.addr);
223*4882a593Smuzhiyun printk(KERN_CONT "\n");
224*4882a593Smuzhiyun if (is_mm_fault)
225*4882a593Smuzhiyun dump_fault_info(regs);
226*4882a593Smuzhiyun show_regs(regs);
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /*
230*4882a593Smuzhiyun * Send SIGSEGV to task. This is an external routine
231*4882a593Smuzhiyun * to keep the stack usage of do_page_fault small.
232*4882a593Smuzhiyun */
do_sigsegv(struct pt_regs * regs,int si_code)233*4882a593Smuzhiyun static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun report_user_fault(regs, SIGSEGV, 1);
236*4882a593Smuzhiyun force_sig_fault(SIGSEGV, si_code,
237*4882a593Smuzhiyun (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun
s390_search_extables(unsigned long addr)240*4882a593Smuzhiyun const struct exception_table_entry *s390_search_extables(unsigned long addr)
241*4882a593Smuzhiyun {
242*4882a593Smuzhiyun const struct exception_table_entry *fixup;
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun fixup = search_extable(__start_dma_ex_table,
245*4882a593Smuzhiyun __stop_dma_ex_table - __start_dma_ex_table,
246*4882a593Smuzhiyun addr);
247*4882a593Smuzhiyun if (!fixup)
248*4882a593Smuzhiyun fixup = search_exception_tables(addr);
249*4882a593Smuzhiyun return fixup;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
do_no_context(struct pt_regs * regs)252*4882a593Smuzhiyun static noinline void do_no_context(struct pt_regs *regs)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun const struct exception_table_entry *fixup;
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun /* Are we prepared to handle this kernel fault? */
257*4882a593Smuzhiyun fixup = s390_search_extables(regs->psw.addr);
258*4882a593Smuzhiyun if (fixup && ex_handle(fixup, regs))
259*4882a593Smuzhiyun return;
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun /*
262*4882a593Smuzhiyun * Oops. The kernel tried to access some bad page. We'll have to
263*4882a593Smuzhiyun * terminate things with extreme prejudice.
264*4882a593Smuzhiyun */
265*4882a593Smuzhiyun if (get_fault_type(regs) == KERNEL_FAULT)
266*4882a593Smuzhiyun printk(KERN_ALERT "Unable to handle kernel pointer dereference"
267*4882a593Smuzhiyun " in virtual kernel address space\n");
268*4882a593Smuzhiyun else
269*4882a593Smuzhiyun printk(KERN_ALERT "Unable to handle kernel paging request"
270*4882a593Smuzhiyun " in virtual user address space\n");
271*4882a593Smuzhiyun dump_fault_info(regs);
272*4882a593Smuzhiyun die(regs, "Oops");
273*4882a593Smuzhiyun do_exit(SIGKILL);
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun
do_low_address(struct pt_regs * regs)276*4882a593Smuzhiyun static noinline void do_low_address(struct pt_regs *regs)
277*4882a593Smuzhiyun {
278*4882a593Smuzhiyun /* Low-address protection hit in kernel mode means
279*4882a593Smuzhiyun NULL pointer write access in kernel mode. */
280*4882a593Smuzhiyun if (regs->psw.mask & PSW_MASK_PSTATE) {
281*4882a593Smuzhiyun /* Low-address protection hit in user mode 'cannot happen'. */
282*4882a593Smuzhiyun die (regs, "Low-address protection");
283*4882a593Smuzhiyun do_exit(SIGKILL);
284*4882a593Smuzhiyun }
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun do_no_context(regs);
287*4882a593Smuzhiyun }
288*4882a593Smuzhiyun
do_sigbus(struct pt_regs * regs)289*4882a593Smuzhiyun static noinline void do_sigbus(struct pt_regs *regs)
290*4882a593Smuzhiyun {
291*4882a593Smuzhiyun /*
292*4882a593Smuzhiyun * Send a sigbus, regardless of whether we were in kernel
293*4882a593Smuzhiyun * or user mode.
294*4882a593Smuzhiyun */
295*4882a593Smuzhiyun force_sig_fault(SIGBUS, BUS_ADRERR,
296*4882a593Smuzhiyun (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
signal_return(struct pt_regs * regs)299*4882a593Smuzhiyun static noinline int signal_return(struct pt_regs *regs)
300*4882a593Smuzhiyun {
301*4882a593Smuzhiyun u16 instruction;
302*4882a593Smuzhiyun int rc;
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
305*4882a593Smuzhiyun if (rc)
306*4882a593Smuzhiyun return rc;
307*4882a593Smuzhiyun if (instruction == 0x0a77) {
308*4882a593Smuzhiyun set_pt_regs_flag(regs, PIF_SYSCALL);
309*4882a593Smuzhiyun regs->int_code = 0x00040077;
310*4882a593Smuzhiyun return 0;
311*4882a593Smuzhiyun } else if (instruction == 0x0aad) {
312*4882a593Smuzhiyun set_pt_regs_flag(regs, PIF_SYSCALL);
313*4882a593Smuzhiyun regs->int_code = 0x000400ad;
314*4882a593Smuzhiyun return 0;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun return -EACCES;
317*4882a593Smuzhiyun }
318*4882a593Smuzhiyun
do_fault_error(struct pt_regs * regs,int access,vm_fault_t fault)319*4882a593Smuzhiyun static noinline void do_fault_error(struct pt_regs *regs, int access,
320*4882a593Smuzhiyun vm_fault_t fault)
321*4882a593Smuzhiyun {
322*4882a593Smuzhiyun int si_code;
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun switch (fault) {
325*4882a593Smuzhiyun case VM_FAULT_BADACCESS:
326*4882a593Smuzhiyun if (access == VM_EXEC && signal_return(regs) == 0)
327*4882a593Smuzhiyun break;
328*4882a593Smuzhiyun fallthrough;
329*4882a593Smuzhiyun case VM_FAULT_BADMAP:
330*4882a593Smuzhiyun /* Bad memory access. Check if it is kernel or user space. */
331*4882a593Smuzhiyun if (user_mode(regs)) {
332*4882a593Smuzhiyun /* User mode accesses just cause a SIGSEGV */
333*4882a593Smuzhiyun si_code = (fault == VM_FAULT_BADMAP) ?
334*4882a593Smuzhiyun SEGV_MAPERR : SEGV_ACCERR;
335*4882a593Smuzhiyun do_sigsegv(regs, si_code);
336*4882a593Smuzhiyun break;
337*4882a593Smuzhiyun }
338*4882a593Smuzhiyun fallthrough;
339*4882a593Smuzhiyun case VM_FAULT_BADCONTEXT:
340*4882a593Smuzhiyun case VM_FAULT_PFAULT:
341*4882a593Smuzhiyun do_no_context(regs);
342*4882a593Smuzhiyun break;
343*4882a593Smuzhiyun case VM_FAULT_SIGNAL:
344*4882a593Smuzhiyun if (!user_mode(regs))
345*4882a593Smuzhiyun do_no_context(regs);
346*4882a593Smuzhiyun break;
347*4882a593Smuzhiyun default: /* fault & VM_FAULT_ERROR */
348*4882a593Smuzhiyun if (fault & VM_FAULT_OOM) {
349*4882a593Smuzhiyun if (!user_mode(regs))
350*4882a593Smuzhiyun do_no_context(regs);
351*4882a593Smuzhiyun else
352*4882a593Smuzhiyun pagefault_out_of_memory();
353*4882a593Smuzhiyun } else if (fault & VM_FAULT_SIGSEGV) {
354*4882a593Smuzhiyun /* Kernel mode? Handle exceptions or die */
355*4882a593Smuzhiyun if (!user_mode(regs))
356*4882a593Smuzhiyun do_no_context(regs);
357*4882a593Smuzhiyun else
358*4882a593Smuzhiyun do_sigsegv(regs, SEGV_MAPERR);
359*4882a593Smuzhiyun } else if (fault & VM_FAULT_SIGBUS) {
360*4882a593Smuzhiyun /* Kernel mode? Handle exceptions or die */
361*4882a593Smuzhiyun if (!user_mode(regs))
362*4882a593Smuzhiyun do_no_context(regs);
363*4882a593Smuzhiyun else
364*4882a593Smuzhiyun do_sigbus(regs);
365*4882a593Smuzhiyun } else
366*4882a593Smuzhiyun BUG();
367*4882a593Smuzhiyun break;
368*4882a593Smuzhiyun }
369*4882a593Smuzhiyun }
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun /*
372*4882a593Smuzhiyun * This routine handles page faults. It determines the address,
373*4882a593Smuzhiyun * and the problem, and then passes it off to one of the appropriate
374*4882a593Smuzhiyun * routines.
375*4882a593Smuzhiyun *
376*4882a593Smuzhiyun * interruption code (int_code):
377*4882a593Smuzhiyun * 04 Protection -> Write-Protection (suppression)
378*4882a593Smuzhiyun * 10 Segment translation -> Not present (nullification)
379*4882a593Smuzhiyun * 11 Page translation -> Not present (nullification)
380*4882a593Smuzhiyun * 3b Region third trans. -> Not present (nullification)
381*4882a593Smuzhiyun */
do_exception(struct pt_regs * regs,int access)382*4882a593Smuzhiyun static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
383*4882a593Smuzhiyun {
384*4882a593Smuzhiyun struct gmap *gmap;
385*4882a593Smuzhiyun struct task_struct *tsk;
386*4882a593Smuzhiyun struct mm_struct *mm;
387*4882a593Smuzhiyun struct vm_area_struct *vma;
388*4882a593Smuzhiyun enum fault_type type;
389*4882a593Smuzhiyun unsigned long trans_exc_code;
390*4882a593Smuzhiyun unsigned long address;
391*4882a593Smuzhiyun unsigned int flags;
392*4882a593Smuzhiyun vm_fault_t fault;
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun tsk = current;
395*4882a593Smuzhiyun /*
396*4882a593Smuzhiyun * The instruction that caused the program check has
397*4882a593Smuzhiyun * been nullified. Don't signal single step via SIGTRAP.
398*4882a593Smuzhiyun */
399*4882a593Smuzhiyun clear_pt_regs_flag(regs, PIF_PER_TRAP);
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun if (kprobe_page_fault(regs, 14))
402*4882a593Smuzhiyun return 0;
403*4882a593Smuzhiyun
404*4882a593Smuzhiyun mm = tsk->mm;
405*4882a593Smuzhiyun trans_exc_code = regs->int_parm_long;
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun /*
408*4882a593Smuzhiyun * Verify that the fault happened in user space, that
409*4882a593Smuzhiyun * we are not in an interrupt and that there is a
410*4882a593Smuzhiyun * user context.
411*4882a593Smuzhiyun */
412*4882a593Smuzhiyun fault = VM_FAULT_BADCONTEXT;
413*4882a593Smuzhiyun type = get_fault_type(regs);
414*4882a593Smuzhiyun switch (type) {
415*4882a593Smuzhiyun case KERNEL_FAULT:
416*4882a593Smuzhiyun goto out;
417*4882a593Smuzhiyun case VDSO_FAULT:
418*4882a593Smuzhiyun fault = VM_FAULT_BADMAP;
419*4882a593Smuzhiyun goto out;
420*4882a593Smuzhiyun case USER_FAULT:
421*4882a593Smuzhiyun case GMAP_FAULT:
422*4882a593Smuzhiyun if (faulthandler_disabled() || !mm)
423*4882a593Smuzhiyun goto out;
424*4882a593Smuzhiyun break;
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun address = trans_exc_code & __FAIL_ADDR_MASK;
428*4882a593Smuzhiyun perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
429*4882a593Smuzhiyun flags = FAULT_FLAG_DEFAULT;
430*4882a593Smuzhiyun if (user_mode(regs))
431*4882a593Smuzhiyun flags |= FAULT_FLAG_USER;
432*4882a593Smuzhiyun if ((trans_exc_code & store_indication) == 0x400)
433*4882a593Smuzhiyun access = VM_WRITE;
434*4882a593Smuzhiyun if (access == VM_WRITE)
435*4882a593Smuzhiyun flags |= FAULT_FLAG_WRITE;
436*4882a593Smuzhiyun mmap_read_lock(mm);
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun gmap = NULL;
439*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
440*4882a593Smuzhiyun gmap = (struct gmap *) S390_lowcore.gmap;
441*4882a593Smuzhiyun current->thread.gmap_addr = address;
442*4882a593Smuzhiyun current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
443*4882a593Smuzhiyun current->thread.gmap_int_code = regs->int_code & 0xffff;
444*4882a593Smuzhiyun address = __gmap_translate(gmap, address);
445*4882a593Smuzhiyun if (address == -EFAULT) {
446*4882a593Smuzhiyun fault = VM_FAULT_BADMAP;
447*4882a593Smuzhiyun goto out_up;
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun if (gmap->pfault_enabled)
450*4882a593Smuzhiyun flags |= FAULT_FLAG_RETRY_NOWAIT;
451*4882a593Smuzhiyun }
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun retry:
454*4882a593Smuzhiyun fault = VM_FAULT_BADMAP;
455*4882a593Smuzhiyun vma = find_vma(mm, address);
456*4882a593Smuzhiyun if (!vma)
457*4882a593Smuzhiyun goto out_up;
458*4882a593Smuzhiyun
459*4882a593Smuzhiyun if (unlikely(vma->vm_start > address)) {
460*4882a593Smuzhiyun if (!(vma->vm_flags & VM_GROWSDOWN))
461*4882a593Smuzhiyun goto out_up;
462*4882a593Smuzhiyun if (expand_stack(vma, address))
463*4882a593Smuzhiyun goto out_up;
464*4882a593Smuzhiyun }
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun /*
467*4882a593Smuzhiyun * Ok, we have a good vm_area for this memory access, so
468*4882a593Smuzhiyun * we can handle it..
469*4882a593Smuzhiyun */
470*4882a593Smuzhiyun fault = VM_FAULT_BADACCESS;
471*4882a593Smuzhiyun if (unlikely(!(vma->vm_flags & access)))
472*4882a593Smuzhiyun goto out_up;
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun if (is_vm_hugetlb_page(vma))
475*4882a593Smuzhiyun address &= HPAGE_MASK;
476*4882a593Smuzhiyun /*
477*4882a593Smuzhiyun * If for any reason at all we couldn't handle the fault,
478*4882a593Smuzhiyun * make sure we exit gracefully rather than endlessly redo
479*4882a593Smuzhiyun * the fault.
480*4882a593Smuzhiyun */
481*4882a593Smuzhiyun fault = handle_mm_fault(vma, address, flags, regs);
482*4882a593Smuzhiyun if (fault_signal_pending(fault, regs)) {
483*4882a593Smuzhiyun fault = VM_FAULT_SIGNAL;
484*4882a593Smuzhiyun if (flags & FAULT_FLAG_RETRY_NOWAIT)
485*4882a593Smuzhiyun goto out_up;
486*4882a593Smuzhiyun goto out;
487*4882a593Smuzhiyun }
488*4882a593Smuzhiyun if (unlikely(fault & VM_FAULT_ERROR))
489*4882a593Smuzhiyun goto out_up;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun if (flags & FAULT_FLAG_ALLOW_RETRY) {
492*4882a593Smuzhiyun if (fault & VM_FAULT_RETRY) {
493*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
494*4882a593Smuzhiyun (flags & FAULT_FLAG_RETRY_NOWAIT)) {
495*4882a593Smuzhiyun /* FAULT_FLAG_RETRY_NOWAIT has been set,
496*4882a593Smuzhiyun * mmap_lock has not been released */
497*4882a593Smuzhiyun current->thread.gmap_pfault = 1;
498*4882a593Smuzhiyun fault = VM_FAULT_PFAULT;
499*4882a593Smuzhiyun goto out_up;
500*4882a593Smuzhiyun }
501*4882a593Smuzhiyun flags &= ~FAULT_FLAG_RETRY_NOWAIT;
502*4882a593Smuzhiyun flags |= FAULT_FLAG_TRIED;
503*4882a593Smuzhiyun mmap_read_lock(mm);
504*4882a593Smuzhiyun goto retry;
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun }
507*4882a593Smuzhiyun if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
508*4882a593Smuzhiyun address = __gmap_link(gmap, current->thread.gmap_addr,
509*4882a593Smuzhiyun address);
510*4882a593Smuzhiyun if (address == -EFAULT) {
511*4882a593Smuzhiyun fault = VM_FAULT_BADMAP;
512*4882a593Smuzhiyun goto out_up;
513*4882a593Smuzhiyun }
514*4882a593Smuzhiyun if (address == -ENOMEM) {
515*4882a593Smuzhiyun fault = VM_FAULT_OOM;
516*4882a593Smuzhiyun goto out_up;
517*4882a593Smuzhiyun }
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun fault = 0;
520*4882a593Smuzhiyun out_up:
521*4882a593Smuzhiyun mmap_read_unlock(mm);
522*4882a593Smuzhiyun out:
523*4882a593Smuzhiyun return fault;
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun
do_protection_exception(struct pt_regs * regs)526*4882a593Smuzhiyun void do_protection_exception(struct pt_regs *regs)
527*4882a593Smuzhiyun {
528*4882a593Smuzhiyun unsigned long trans_exc_code;
529*4882a593Smuzhiyun int access;
530*4882a593Smuzhiyun vm_fault_t fault;
531*4882a593Smuzhiyun
532*4882a593Smuzhiyun trans_exc_code = regs->int_parm_long;
533*4882a593Smuzhiyun /*
534*4882a593Smuzhiyun * Protection exceptions are suppressing, decrement psw address.
535*4882a593Smuzhiyun * The exception to this rule are aborted transactions, for these
536*4882a593Smuzhiyun * the PSW already points to the correct location.
537*4882a593Smuzhiyun */
538*4882a593Smuzhiyun if (!(regs->int_code & 0x200))
539*4882a593Smuzhiyun regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
540*4882a593Smuzhiyun /*
541*4882a593Smuzhiyun * Check for low-address protection. This needs to be treated
542*4882a593Smuzhiyun * as a special case because the translation exception code
543*4882a593Smuzhiyun * field is not guaranteed to contain valid data in this case.
544*4882a593Smuzhiyun */
545*4882a593Smuzhiyun if (unlikely(!(trans_exc_code & 4))) {
546*4882a593Smuzhiyun do_low_address(regs);
547*4882a593Smuzhiyun return;
548*4882a593Smuzhiyun }
549*4882a593Smuzhiyun if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) {
550*4882a593Smuzhiyun regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) |
551*4882a593Smuzhiyun (regs->psw.addr & PAGE_MASK);
552*4882a593Smuzhiyun access = VM_EXEC;
553*4882a593Smuzhiyun fault = VM_FAULT_BADACCESS;
554*4882a593Smuzhiyun } else {
555*4882a593Smuzhiyun access = VM_WRITE;
556*4882a593Smuzhiyun fault = do_exception(regs, access);
557*4882a593Smuzhiyun }
558*4882a593Smuzhiyun if (unlikely(fault))
559*4882a593Smuzhiyun do_fault_error(regs, access, fault);
560*4882a593Smuzhiyun }
561*4882a593Smuzhiyun NOKPROBE_SYMBOL(do_protection_exception);
562*4882a593Smuzhiyun
do_dat_exception(struct pt_regs * regs)563*4882a593Smuzhiyun void do_dat_exception(struct pt_regs *regs)
564*4882a593Smuzhiyun {
565*4882a593Smuzhiyun int access;
566*4882a593Smuzhiyun vm_fault_t fault;
567*4882a593Smuzhiyun
568*4882a593Smuzhiyun access = VM_ACCESS_FLAGS;
569*4882a593Smuzhiyun fault = do_exception(regs, access);
570*4882a593Smuzhiyun if (unlikely(fault))
571*4882a593Smuzhiyun do_fault_error(regs, access, fault);
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun NOKPROBE_SYMBOL(do_dat_exception);
574*4882a593Smuzhiyun
575*4882a593Smuzhiyun #ifdef CONFIG_PFAULT
576*4882a593Smuzhiyun /*
577*4882a593Smuzhiyun * 'pfault' pseudo page faults routines.
578*4882a593Smuzhiyun */
579*4882a593Smuzhiyun static int pfault_disable;
580*4882a593Smuzhiyun
nopfault(char * str)581*4882a593Smuzhiyun static int __init nopfault(char *str)
582*4882a593Smuzhiyun {
583*4882a593Smuzhiyun pfault_disable = 1;
584*4882a593Smuzhiyun return 1;
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun __setup("nopfault", nopfault);
588*4882a593Smuzhiyun
589*4882a593Smuzhiyun struct pfault_refbk {
590*4882a593Smuzhiyun u16 refdiagc;
591*4882a593Smuzhiyun u16 reffcode;
592*4882a593Smuzhiyun u16 refdwlen;
593*4882a593Smuzhiyun u16 refversn;
594*4882a593Smuzhiyun u64 refgaddr;
595*4882a593Smuzhiyun u64 refselmk;
596*4882a593Smuzhiyun u64 refcmpmk;
597*4882a593Smuzhiyun u64 reserved;
598*4882a593Smuzhiyun } __attribute__ ((packed, aligned(8)));
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun static struct pfault_refbk pfault_init_refbk = {
601*4882a593Smuzhiyun .refdiagc = 0x258,
602*4882a593Smuzhiyun .reffcode = 0,
603*4882a593Smuzhiyun .refdwlen = 5,
604*4882a593Smuzhiyun .refversn = 2,
605*4882a593Smuzhiyun .refgaddr = __LC_LPP,
606*4882a593Smuzhiyun .refselmk = 1ULL << 48,
607*4882a593Smuzhiyun .refcmpmk = 1ULL << 48,
608*4882a593Smuzhiyun .reserved = __PF_RES_FIELD
609*4882a593Smuzhiyun };
610*4882a593Smuzhiyun
pfault_init(void)611*4882a593Smuzhiyun int pfault_init(void)
612*4882a593Smuzhiyun {
613*4882a593Smuzhiyun int rc;
614*4882a593Smuzhiyun
615*4882a593Smuzhiyun if (pfault_disable)
616*4882a593Smuzhiyun return -1;
617*4882a593Smuzhiyun diag_stat_inc(DIAG_STAT_X258);
618*4882a593Smuzhiyun asm volatile(
619*4882a593Smuzhiyun " diag %1,%0,0x258\n"
620*4882a593Smuzhiyun "0: j 2f\n"
621*4882a593Smuzhiyun "1: la %0,8\n"
622*4882a593Smuzhiyun "2:\n"
623*4882a593Smuzhiyun EX_TABLE(0b,1b)
624*4882a593Smuzhiyun : "=d" (rc)
625*4882a593Smuzhiyun : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
626*4882a593Smuzhiyun return rc;
627*4882a593Smuzhiyun }
628*4882a593Smuzhiyun
629*4882a593Smuzhiyun static struct pfault_refbk pfault_fini_refbk = {
630*4882a593Smuzhiyun .refdiagc = 0x258,
631*4882a593Smuzhiyun .reffcode = 1,
632*4882a593Smuzhiyun .refdwlen = 5,
633*4882a593Smuzhiyun .refversn = 2,
634*4882a593Smuzhiyun };
635*4882a593Smuzhiyun
pfault_fini(void)636*4882a593Smuzhiyun void pfault_fini(void)
637*4882a593Smuzhiyun {
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun if (pfault_disable)
640*4882a593Smuzhiyun return;
641*4882a593Smuzhiyun diag_stat_inc(DIAG_STAT_X258);
642*4882a593Smuzhiyun asm volatile(
643*4882a593Smuzhiyun " diag %0,0,0x258\n"
644*4882a593Smuzhiyun "0: nopr %%r7\n"
645*4882a593Smuzhiyun EX_TABLE(0b,0b)
646*4882a593Smuzhiyun : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
647*4882a593Smuzhiyun }
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun static DEFINE_SPINLOCK(pfault_lock);
650*4882a593Smuzhiyun static LIST_HEAD(pfault_list);
651*4882a593Smuzhiyun
652*4882a593Smuzhiyun #define PF_COMPLETE 0x0080
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun /*
655*4882a593Smuzhiyun * The mechanism of our pfault code: if Linux is running as guest, runs a user
656*4882a593Smuzhiyun * space process and the user space process accesses a page that the host has
657*4882a593Smuzhiyun * paged out we get a pfault interrupt.
658*4882a593Smuzhiyun *
659*4882a593Smuzhiyun * This allows us, within the guest, to schedule a different process. Without
660*4882a593Smuzhiyun * this mechanism the host would have to suspend the whole virtual cpu until
661*4882a593Smuzhiyun * the page has been paged in.
662*4882a593Smuzhiyun *
663*4882a593Smuzhiyun * So when we get such an interrupt then we set the state of the current task
664*4882a593Smuzhiyun * to uninterruptible and also set the need_resched flag. Both happens within
665*4882a593Smuzhiyun * interrupt context(!). If we later on want to return to user space we
666*4882a593Smuzhiyun * recognize the need_resched flag and then call schedule(). It's not very
667*4882a593Smuzhiyun * obvious how this works...
668*4882a593Smuzhiyun *
669*4882a593Smuzhiyun * Of course we have a lot of additional fun with the completion interrupt (->
670*4882a593Smuzhiyun * host signals that a page of a process has been paged in and the process can
671*4882a593Smuzhiyun * continue to run). This interrupt can arrive on any cpu and, since we have
672*4882a593Smuzhiyun * virtual cpus, actually appear before the interrupt that signals that a page
673*4882a593Smuzhiyun * is missing.
674*4882a593Smuzhiyun */
pfault_interrupt(struct ext_code ext_code,unsigned int param32,unsigned long param64)675*4882a593Smuzhiyun static void pfault_interrupt(struct ext_code ext_code,
676*4882a593Smuzhiyun unsigned int param32, unsigned long param64)
677*4882a593Smuzhiyun {
678*4882a593Smuzhiyun struct task_struct *tsk;
679*4882a593Smuzhiyun __u16 subcode;
680*4882a593Smuzhiyun pid_t pid;
681*4882a593Smuzhiyun
682*4882a593Smuzhiyun /*
683*4882a593Smuzhiyun * Get the external interruption subcode & pfault initial/completion
684*4882a593Smuzhiyun * signal bit. VM stores this in the 'cpu address' field associated
685*4882a593Smuzhiyun * with the external interrupt.
686*4882a593Smuzhiyun */
687*4882a593Smuzhiyun subcode = ext_code.subcode;
688*4882a593Smuzhiyun if ((subcode & 0xff00) != __SUBCODE_MASK)
689*4882a593Smuzhiyun return;
690*4882a593Smuzhiyun inc_irq_stat(IRQEXT_PFL);
691*4882a593Smuzhiyun /* Get the token (= pid of the affected task). */
692*4882a593Smuzhiyun pid = param64 & LPP_PID_MASK;
693*4882a593Smuzhiyun rcu_read_lock();
694*4882a593Smuzhiyun tsk = find_task_by_pid_ns(pid, &init_pid_ns);
695*4882a593Smuzhiyun if (tsk)
696*4882a593Smuzhiyun get_task_struct(tsk);
697*4882a593Smuzhiyun rcu_read_unlock();
698*4882a593Smuzhiyun if (!tsk)
699*4882a593Smuzhiyun return;
700*4882a593Smuzhiyun spin_lock(&pfault_lock);
701*4882a593Smuzhiyun if (subcode & PF_COMPLETE) {
702*4882a593Smuzhiyun /* signal bit is set -> a page has been swapped in by VM */
703*4882a593Smuzhiyun if (tsk->thread.pfault_wait == 1) {
704*4882a593Smuzhiyun /* Initial interrupt was faster than the completion
705*4882a593Smuzhiyun * interrupt. pfault_wait is valid. Set pfault_wait
706*4882a593Smuzhiyun * back to zero and wake up the process. This can
707*4882a593Smuzhiyun * safely be done because the task is still sleeping
708*4882a593Smuzhiyun * and can't produce new pfaults. */
709*4882a593Smuzhiyun tsk->thread.pfault_wait = 0;
710*4882a593Smuzhiyun list_del(&tsk->thread.list);
711*4882a593Smuzhiyun wake_up_process(tsk);
712*4882a593Smuzhiyun put_task_struct(tsk);
713*4882a593Smuzhiyun } else {
714*4882a593Smuzhiyun /* Completion interrupt was faster than initial
715*4882a593Smuzhiyun * interrupt. Set pfault_wait to -1 so the initial
716*4882a593Smuzhiyun * interrupt doesn't put the task to sleep.
717*4882a593Smuzhiyun * If the task is not running, ignore the completion
718*4882a593Smuzhiyun * interrupt since it must be a leftover of a PFAULT
719*4882a593Smuzhiyun * CANCEL operation which didn't remove all pending
720*4882a593Smuzhiyun * completion interrupts. */
721*4882a593Smuzhiyun if (tsk->state == TASK_RUNNING)
722*4882a593Smuzhiyun tsk->thread.pfault_wait = -1;
723*4882a593Smuzhiyun }
724*4882a593Smuzhiyun } else {
725*4882a593Smuzhiyun /* signal bit not set -> a real page is missing. */
726*4882a593Smuzhiyun if (WARN_ON_ONCE(tsk != current))
727*4882a593Smuzhiyun goto out;
728*4882a593Smuzhiyun if (tsk->thread.pfault_wait == 1) {
729*4882a593Smuzhiyun /* Already on the list with a reference: put to sleep */
730*4882a593Smuzhiyun goto block;
731*4882a593Smuzhiyun } else if (tsk->thread.pfault_wait == -1) {
732*4882a593Smuzhiyun /* Completion interrupt was faster than the initial
733*4882a593Smuzhiyun * interrupt (pfault_wait == -1). Set pfault_wait
734*4882a593Smuzhiyun * back to zero and exit. */
735*4882a593Smuzhiyun tsk->thread.pfault_wait = 0;
736*4882a593Smuzhiyun } else {
737*4882a593Smuzhiyun /* Initial interrupt arrived before completion
738*4882a593Smuzhiyun * interrupt. Let the task sleep.
739*4882a593Smuzhiyun * An extra task reference is needed since a different
740*4882a593Smuzhiyun * cpu may set the task state to TASK_RUNNING again
741*4882a593Smuzhiyun * before the scheduler is reached. */
742*4882a593Smuzhiyun get_task_struct(tsk);
743*4882a593Smuzhiyun tsk->thread.pfault_wait = 1;
744*4882a593Smuzhiyun list_add(&tsk->thread.list, &pfault_list);
745*4882a593Smuzhiyun block:
746*4882a593Smuzhiyun /* Since this must be a userspace fault, there
747*4882a593Smuzhiyun * is no kernel task state to trample. Rely on the
748*4882a593Smuzhiyun * return to userspace schedule() to block. */
749*4882a593Smuzhiyun __set_current_state(TASK_UNINTERRUPTIBLE);
750*4882a593Smuzhiyun set_tsk_need_resched(tsk);
751*4882a593Smuzhiyun set_preempt_need_resched();
752*4882a593Smuzhiyun }
753*4882a593Smuzhiyun }
754*4882a593Smuzhiyun out:
755*4882a593Smuzhiyun spin_unlock(&pfault_lock);
756*4882a593Smuzhiyun put_task_struct(tsk);
757*4882a593Smuzhiyun }
758*4882a593Smuzhiyun
pfault_cpu_dead(unsigned int cpu)759*4882a593Smuzhiyun static int pfault_cpu_dead(unsigned int cpu)
760*4882a593Smuzhiyun {
761*4882a593Smuzhiyun struct thread_struct *thread, *next;
762*4882a593Smuzhiyun struct task_struct *tsk;
763*4882a593Smuzhiyun
764*4882a593Smuzhiyun spin_lock_irq(&pfault_lock);
765*4882a593Smuzhiyun list_for_each_entry_safe(thread, next, &pfault_list, list) {
766*4882a593Smuzhiyun thread->pfault_wait = 0;
767*4882a593Smuzhiyun list_del(&thread->list);
768*4882a593Smuzhiyun tsk = container_of(thread, struct task_struct, thread);
769*4882a593Smuzhiyun wake_up_process(tsk);
770*4882a593Smuzhiyun put_task_struct(tsk);
771*4882a593Smuzhiyun }
772*4882a593Smuzhiyun spin_unlock_irq(&pfault_lock);
773*4882a593Smuzhiyun return 0;
774*4882a593Smuzhiyun }
775*4882a593Smuzhiyun
pfault_irq_init(void)776*4882a593Smuzhiyun static int __init pfault_irq_init(void)
777*4882a593Smuzhiyun {
778*4882a593Smuzhiyun int rc;
779*4882a593Smuzhiyun
780*4882a593Smuzhiyun rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
781*4882a593Smuzhiyun if (rc)
782*4882a593Smuzhiyun goto out_extint;
783*4882a593Smuzhiyun rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
784*4882a593Smuzhiyun if (rc)
785*4882a593Smuzhiyun goto out_pfault;
786*4882a593Smuzhiyun irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
787*4882a593Smuzhiyun cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
788*4882a593Smuzhiyun NULL, pfault_cpu_dead);
789*4882a593Smuzhiyun return 0;
790*4882a593Smuzhiyun
791*4882a593Smuzhiyun out_pfault:
792*4882a593Smuzhiyun unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
793*4882a593Smuzhiyun out_extint:
794*4882a593Smuzhiyun pfault_disable = 1;
795*4882a593Smuzhiyun return rc;
796*4882a593Smuzhiyun }
797*4882a593Smuzhiyun early_initcall(pfault_irq_init);
798*4882a593Smuzhiyun
799*4882a593Smuzhiyun #endif /* CONFIG_PFAULT */
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_PGSTE)
do_secure_storage_access(struct pt_regs * regs)802*4882a593Smuzhiyun void do_secure_storage_access(struct pt_regs *regs)
803*4882a593Smuzhiyun {
804*4882a593Smuzhiyun unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
805*4882a593Smuzhiyun struct vm_area_struct *vma;
806*4882a593Smuzhiyun struct mm_struct *mm;
807*4882a593Smuzhiyun struct page *page;
808*4882a593Smuzhiyun int rc;
809*4882a593Smuzhiyun
810*4882a593Smuzhiyun /*
811*4882a593Smuzhiyun * bit 61 tells us if the address is valid, if it's not we
812*4882a593Smuzhiyun * have a major problem and should stop the kernel or send a
813*4882a593Smuzhiyun * SIGSEGV to the process. Unfortunately bit 61 is not
814*4882a593Smuzhiyun * reliable without the misc UV feature so we need to check
815*4882a593Smuzhiyun * for that as well.
816*4882a593Smuzhiyun */
817*4882a593Smuzhiyun if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
818*4882a593Smuzhiyun !test_bit_inv(61, ®s->int_parm_long)) {
819*4882a593Smuzhiyun /*
820*4882a593Smuzhiyun * When this happens, userspace did something that it
821*4882a593Smuzhiyun * was not supposed to do, e.g. branching into secure
822*4882a593Smuzhiyun * memory. Trigger a segmentation fault.
823*4882a593Smuzhiyun */
824*4882a593Smuzhiyun if (user_mode(regs)) {
825*4882a593Smuzhiyun send_sig(SIGSEGV, current, 0);
826*4882a593Smuzhiyun return;
827*4882a593Smuzhiyun }
828*4882a593Smuzhiyun
829*4882a593Smuzhiyun /*
830*4882a593Smuzhiyun * The kernel should never run into this case and we
831*4882a593Smuzhiyun * have no way out of this situation.
832*4882a593Smuzhiyun */
833*4882a593Smuzhiyun panic("Unexpected PGM 0x3d with TEID bit 61=0");
834*4882a593Smuzhiyun }
835*4882a593Smuzhiyun
836*4882a593Smuzhiyun switch (get_fault_type(regs)) {
837*4882a593Smuzhiyun case USER_FAULT:
838*4882a593Smuzhiyun mm = current->mm;
839*4882a593Smuzhiyun mmap_read_lock(mm);
840*4882a593Smuzhiyun vma = find_vma(mm, addr);
841*4882a593Smuzhiyun if (!vma) {
842*4882a593Smuzhiyun mmap_read_unlock(mm);
843*4882a593Smuzhiyun do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
844*4882a593Smuzhiyun break;
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
847*4882a593Smuzhiyun if (IS_ERR_OR_NULL(page)) {
848*4882a593Smuzhiyun mmap_read_unlock(mm);
849*4882a593Smuzhiyun break;
850*4882a593Smuzhiyun }
851*4882a593Smuzhiyun if (arch_make_page_accessible(page))
852*4882a593Smuzhiyun send_sig(SIGSEGV, current, 0);
853*4882a593Smuzhiyun put_page(page);
854*4882a593Smuzhiyun mmap_read_unlock(mm);
855*4882a593Smuzhiyun break;
856*4882a593Smuzhiyun case KERNEL_FAULT:
857*4882a593Smuzhiyun page = phys_to_page(addr);
858*4882a593Smuzhiyun if (unlikely(!try_get_page(page)))
859*4882a593Smuzhiyun break;
860*4882a593Smuzhiyun rc = arch_make_page_accessible(page);
861*4882a593Smuzhiyun put_page(page);
862*4882a593Smuzhiyun if (rc)
863*4882a593Smuzhiyun BUG();
864*4882a593Smuzhiyun break;
865*4882a593Smuzhiyun case VDSO_FAULT:
866*4882a593Smuzhiyun case GMAP_FAULT:
867*4882a593Smuzhiyun default:
868*4882a593Smuzhiyun do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
869*4882a593Smuzhiyun WARN_ON_ONCE(1);
870*4882a593Smuzhiyun }
871*4882a593Smuzhiyun }
872*4882a593Smuzhiyun NOKPROBE_SYMBOL(do_secure_storage_access);
873*4882a593Smuzhiyun
do_non_secure_storage_access(struct pt_regs * regs)874*4882a593Smuzhiyun void do_non_secure_storage_access(struct pt_regs *regs)
875*4882a593Smuzhiyun {
876*4882a593Smuzhiyun unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
877*4882a593Smuzhiyun struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
878*4882a593Smuzhiyun
879*4882a593Smuzhiyun if (get_fault_type(regs) != GMAP_FAULT) {
880*4882a593Smuzhiyun do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
881*4882a593Smuzhiyun WARN_ON_ONCE(1);
882*4882a593Smuzhiyun return;
883*4882a593Smuzhiyun }
884*4882a593Smuzhiyun
885*4882a593Smuzhiyun if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
886*4882a593Smuzhiyun send_sig(SIGSEGV, current, 0);
887*4882a593Smuzhiyun }
888*4882a593Smuzhiyun NOKPROBE_SYMBOL(do_non_secure_storage_access);
889*4882a593Smuzhiyun
do_secure_storage_violation(struct pt_regs * regs)890*4882a593Smuzhiyun void do_secure_storage_violation(struct pt_regs *regs)
891*4882a593Smuzhiyun {
892*4882a593Smuzhiyun /*
893*4882a593Smuzhiyun * Either KVM messed up the secure guest mapping or the same
894*4882a593Smuzhiyun * page is mapped into multiple secure guests.
895*4882a593Smuzhiyun *
896*4882a593Smuzhiyun * This exception is only triggered when a guest 2 is running
897*4882a593Smuzhiyun * and can therefore never occur in kernel context.
898*4882a593Smuzhiyun */
899*4882a593Smuzhiyun printk_ratelimited(KERN_WARNING
900*4882a593Smuzhiyun "Secure storage violation in task: %s, pid %d\n",
901*4882a593Smuzhiyun current->comm, current->pid);
902*4882a593Smuzhiyun send_sig(SIGSEGV, current, 0);
903*4882a593Smuzhiyun }
904*4882a593Smuzhiyun
905*4882a593Smuzhiyun #else
do_secure_storage_access(struct pt_regs * regs)906*4882a593Smuzhiyun void do_secure_storage_access(struct pt_regs *regs)
907*4882a593Smuzhiyun {
908*4882a593Smuzhiyun default_trap_handler(regs);
909*4882a593Smuzhiyun }
910*4882a593Smuzhiyun
do_non_secure_storage_access(struct pt_regs * regs)911*4882a593Smuzhiyun void do_non_secure_storage_access(struct pt_regs *regs)
912*4882a593Smuzhiyun {
913*4882a593Smuzhiyun default_trap_handler(regs);
914*4882a593Smuzhiyun }
915*4882a593Smuzhiyun
do_secure_storage_violation(struct pt_regs * regs)916*4882a593Smuzhiyun void do_secure_storage_violation(struct pt_regs *regs)
917*4882a593Smuzhiyun {
918*4882a593Smuzhiyun default_trap_handler(regs);
919*4882a593Smuzhiyun }
920*4882a593Smuzhiyun #endif
921