1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright 2014 IBM Corp.
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun #include <linux/workqueue.h>
7*4882a593Smuzhiyun #include <linux/sched/signal.h>
8*4882a593Smuzhiyun #include <linux/sched/mm.h>
9*4882a593Smuzhiyun #include <linux/pid.h>
10*4882a593Smuzhiyun #include <linux/mm.h>
11*4882a593Smuzhiyun #include <linux/moduleparam.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #undef MODULE_PARAM_PREFIX
14*4882a593Smuzhiyun #define MODULE_PARAM_PREFIX "cxl" "."
15*4882a593Smuzhiyun #include <asm/current.h>
16*4882a593Smuzhiyun #include <asm/copro.h>
17*4882a593Smuzhiyun #include <asm/mmu.h>
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun #include "cxl.h"
20*4882a593Smuzhiyun #include "trace.h"
21*4882a593Smuzhiyun
sste_matches(struct cxl_sste * sste,struct copro_slb * slb)22*4882a593Smuzhiyun static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
23*4882a593Smuzhiyun {
24*4882a593Smuzhiyun return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
25*4882a593Smuzhiyun (sste->esid_data == cpu_to_be64(slb->esid)));
26*4882a593Smuzhiyun }
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun /*
29*4882a593Smuzhiyun * This finds a free SSTE for the given SLB, or returns NULL if it's already in
30*4882a593Smuzhiyun * the segment table.
31*4882a593Smuzhiyun */
find_free_sste(struct cxl_context * ctx,struct copro_slb * slb)32*4882a593Smuzhiyun static struct cxl_sste *find_free_sste(struct cxl_context *ctx,
33*4882a593Smuzhiyun struct copro_slb *slb)
34*4882a593Smuzhiyun {
35*4882a593Smuzhiyun struct cxl_sste *primary, *sste, *ret = NULL;
36*4882a593Smuzhiyun unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
37*4882a593Smuzhiyun unsigned int entry;
38*4882a593Smuzhiyun unsigned int hash;
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun if (slb->vsid & SLB_VSID_B_1T)
41*4882a593Smuzhiyun hash = (slb->esid >> SID_SHIFT_1T) & mask;
42*4882a593Smuzhiyun else /* 256M */
43*4882a593Smuzhiyun hash = (slb->esid >> SID_SHIFT) & mask;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun primary = ctx->sstp + (hash << 3);
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun for (entry = 0, sste = primary; entry < 8; entry++, sste++) {
48*4882a593Smuzhiyun if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
49*4882a593Smuzhiyun ret = sste;
50*4882a593Smuzhiyun if (sste_matches(sste, slb))
51*4882a593Smuzhiyun return NULL;
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun if (ret)
54*4882a593Smuzhiyun return ret;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun /* Nothing free, select an entry to cast out */
57*4882a593Smuzhiyun ret = primary + ctx->sst_lru;
58*4882a593Smuzhiyun ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun return ret;
61*4882a593Smuzhiyun }
62*4882a593Smuzhiyun
cxl_load_segment(struct cxl_context * ctx,struct copro_slb * slb)63*4882a593Smuzhiyun static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
64*4882a593Smuzhiyun {
65*4882a593Smuzhiyun /* mask is the group index, we search primary and secondary here. */
66*4882a593Smuzhiyun struct cxl_sste *sste;
67*4882a593Smuzhiyun unsigned long flags;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun spin_lock_irqsave(&ctx->sste_lock, flags);
70*4882a593Smuzhiyun sste = find_free_sste(ctx, slb);
71*4882a593Smuzhiyun if (!sste)
72*4882a593Smuzhiyun goto out_unlock;
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
75*4882a593Smuzhiyun sste - ctx->sstp, slb->vsid, slb->esid);
76*4882a593Smuzhiyun trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid);
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun sste->vsid_data = cpu_to_be64(slb->vsid);
79*4882a593Smuzhiyun sste->esid_data = cpu_to_be64(slb->esid);
80*4882a593Smuzhiyun out_unlock:
81*4882a593Smuzhiyun spin_unlock_irqrestore(&ctx->sste_lock, flags);
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun
cxl_fault_segment(struct cxl_context * ctx,struct mm_struct * mm,u64 ea)84*4882a593Smuzhiyun static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm,
85*4882a593Smuzhiyun u64 ea)
86*4882a593Smuzhiyun {
87*4882a593Smuzhiyun struct copro_slb slb = {0,0};
88*4882a593Smuzhiyun int rc;
89*4882a593Smuzhiyun
90*4882a593Smuzhiyun if (!(rc = copro_calculate_slb(mm, ea, &slb))) {
91*4882a593Smuzhiyun cxl_load_segment(ctx, &slb);
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun return rc;
95*4882a593Smuzhiyun }
96*4882a593Smuzhiyun
cxl_ack_ae(struct cxl_context * ctx)97*4882a593Smuzhiyun static void cxl_ack_ae(struct cxl_context *ctx)
98*4882a593Smuzhiyun {
99*4882a593Smuzhiyun unsigned long flags;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun spin_lock_irqsave(&ctx->lock, flags);
104*4882a593Smuzhiyun ctx->pending_fault = true;
105*4882a593Smuzhiyun ctx->fault_addr = ctx->dar;
106*4882a593Smuzhiyun ctx->fault_dsisr = ctx->dsisr;
107*4882a593Smuzhiyun spin_unlock_irqrestore(&ctx->lock, flags);
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun wake_up_all(&ctx->wq);
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun
cxl_handle_segment_miss(struct cxl_context * ctx,struct mm_struct * mm,u64 ea)112*4882a593Smuzhiyun static int cxl_handle_segment_miss(struct cxl_context *ctx,
113*4882a593Smuzhiyun struct mm_struct *mm, u64 ea)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun int rc;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea);
118*4882a593Smuzhiyun trace_cxl_ste_miss(ctx, ea);
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun if ((rc = cxl_fault_segment(ctx, mm, ea)))
121*4882a593Smuzhiyun cxl_ack_ae(ctx);
122*4882a593Smuzhiyun else {
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun mb(); /* Order seg table write to TFC MMIO write */
125*4882a593Smuzhiyun cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun return IRQ_HANDLED;
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun
cxl_handle_mm_fault(struct mm_struct * mm,u64 dsisr,u64 dar)131*4882a593Smuzhiyun int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar)
132*4882a593Smuzhiyun {
133*4882a593Smuzhiyun vm_fault_t flt = 0;
134*4882a593Smuzhiyun int result;
135*4882a593Smuzhiyun unsigned long access, flags, inv_flags = 0;
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun /*
138*4882a593Smuzhiyun * Add the fault handling cpu to task mm cpumask so that we
139*4882a593Smuzhiyun * can do a safe lockless page table walk when inserting the
140*4882a593Smuzhiyun * hash page table entry. This function get called with a
141*4882a593Smuzhiyun * valid mm for user space addresses. Hence using the if (mm)
142*4882a593Smuzhiyun * check is sufficient here.
143*4882a593Smuzhiyun */
144*4882a593Smuzhiyun if (mm && !cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
145*4882a593Smuzhiyun cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
146*4882a593Smuzhiyun /*
147*4882a593Smuzhiyun * We need to make sure we walk the table only after
148*4882a593Smuzhiyun * we update the cpumask. The other side of the barrier
149*4882a593Smuzhiyun * is explained in serialize_against_pte_lookup()
150*4882a593Smuzhiyun */
151*4882a593Smuzhiyun smp_mb();
152*4882a593Smuzhiyun }
153*4882a593Smuzhiyun if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
154*4882a593Smuzhiyun pr_devel("copro_handle_mm_fault failed: %#x\n", result);
155*4882a593Smuzhiyun return result;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun if (!radix_enabled()) {
159*4882a593Smuzhiyun /*
160*4882a593Smuzhiyun * update_mmu_cache() will not have loaded the hash since current->trap
161*4882a593Smuzhiyun * is not a 0x400 or 0x300, so just call hash_page_mm() here.
162*4882a593Smuzhiyun */
163*4882a593Smuzhiyun access = _PAGE_PRESENT | _PAGE_READ;
164*4882a593Smuzhiyun if (dsisr & CXL_PSL_DSISR_An_S)
165*4882a593Smuzhiyun access |= _PAGE_WRITE;
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun if (!mm && (get_region_id(dar) != USER_REGION_ID))
168*4882a593Smuzhiyun access |= _PAGE_PRIVILEGED;
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun if (dsisr & DSISR_NOHPTE)
171*4882a593Smuzhiyun inv_flags |= HPTE_NOHPTE_UPDATE;
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun local_irq_save(flags);
174*4882a593Smuzhiyun hash_page_mm(mm, dar, access, 0x300, inv_flags);
175*4882a593Smuzhiyun local_irq_restore(flags);
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun return 0;
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun
cxl_handle_page_fault(struct cxl_context * ctx,struct mm_struct * mm,u64 dsisr,u64 dar)180*4882a593Smuzhiyun static void cxl_handle_page_fault(struct cxl_context *ctx,
181*4882a593Smuzhiyun struct mm_struct *mm,
182*4882a593Smuzhiyun u64 dsisr, u64 dar)
183*4882a593Smuzhiyun {
184*4882a593Smuzhiyun trace_cxl_pte_miss(ctx, dsisr, dar);
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun if (cxl_handle_mm_fault(mm, dsisr, dar)) {
187*4882a593Smuzhiyun cxl_ack_ae(ctx);
188*4882a593Smuzhiyun } else {
189*4882a593Smuzhiyun pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
190*4882a593Smuzhiyun cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun /*
195*4882a593Smuzhiyun * Returns the mm_struct corresponding to the context ctx.
196*4882a593Smuzhiyun * mm_users == 0, the context may be in the process of being closed.
197*4882a593Smuzhiyun */
get_mem_context(struct cxl_context * ctx)198*4882a593Smuzhiyun static struct mm_struct *get_mem_context(struct cxl_context *ctx)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun if (ctx->mm == NULL)
201*4882a593Smuzhiyun return NULL;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun if (!atomic_inc_not_zero(&ctx->mm->mm_users))
204*4882a593Smuzhiyun return NULL;
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun return ctx->mm;
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
cxl_is_segment_miss(struct cxl_context * ctx,u64 dsisr)209*4882a593Smuzhiyun static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
210*4882a593Smuzhiyun {
211*4882a593Smuzhiyun if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
212*4882a593Smuzhiyun return true;
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun return false;
215*4882a593Smuzhiyun }
216*4882a593Smuzhiyun
cxl_is_page_fault(struct cxl_context * ctx,u64 dsisr)217*4882a593Smuzhiyun static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
220*4882a593Smuzhiyun return true;
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun if (cxl_is_power9())
223*4882a593Smuzhiyun return true;
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun return false;
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
cxl_handle_fault(struct work_struct * fault_work)228*4882a593Smuzhiyun void cxl_handle_fault(struct work_struct *fault_work)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun struct cxl_context *ctx =
231*4882a593Smuzhiyun container_of(fault_work, struct cxl_context, fault_work);
232*4882a593Smuzhiyun u64 dsisr = ctx->dsisr;
233*4882a593Smuzhiyun u64 dar = ctx->dar;
234*4882a593Smuzhiyun struct mm_struct *mm = NULL;
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_HVMODE)) {
237*4882a593Smuzhiyun if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
238*4882a593Smuzhiyun cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
239*4882a593Smuzhiyun cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
240*4882a593Smuzhiyun /* Most likely explanation is harmless - a dedicated
241*4882a593Smuzhiyun * process has detached and these were cleared by the
242*4882a593Smuzhiyun * PSL purge, but warn about it just in case
243*4882a593Smuzhiyun */
244*4882a593Smuzhiyun dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
245*4882a593Smuzhiyun return;
246*4882a593Smuzhiyun }
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun /* Early return if the context is being / has been detached */
250*4882a593Smuzhiyun if (ctx->status == CLOSED) {
251*4882a593Smuzhiyun cxl_ack_ae(ctx);
252*4882a593Smuzhiyun return;
253*4882a593Smuzhiyun }
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
256*4882a593Smuzhiyun "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun if (!ctx->kernel) {
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun mm = get_mem_context(ctx);
261*4882a593Smuzhiyun if (mm == NULL) {
262*4882a593Smuzhiyun pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
263*4882a593Smuzhiyun __func__, ctx->pe, pid_nr(ctx->pid));
264*4882a593Smuzhiyun cxl_ack_ae(ctx);
265*4882a593Smuzhiyun return;
266*4882a593Smuzhiyun } else {
267*4882a593Smuzhiyun pr_devel("Handling page fault for pe=%d pid=%i\n",
268*4882a593Smuzhiyun ctx->pe, pid_nr(ctx->pid));
269*4882a593Smuzhiyun }
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun if (cxl_is_segment_miss(ctx, dsisr))
273*4882a593Smuzhiyun cxl_handle_segment_miss(ctx, mm, dar);
274*4882a593Smuzhiyun else if (cxl_is_page_fault(ctx, dsisr))
275*4882a593Smuzhiyun cxl_handle_page_fault(ctx, mm, dsisr, dar);
276*4882a593Smuzhiyun else
277*4882a593Smuzhiyun WARN(1, "cxl_handle_fault has nothing to handle\n");
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun if (mm)
280*4882a593Smuzhiyun mmput(mm);
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun
cxl_prefault_one(struct cxl_context * ctx,u64 ea)283*4882a593Smuzhiyun static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
284*4882a593Smuzhiyun {
285*4882a593Smuzhiyun struct mm_struct *mm;
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun mm = get_mem_context(ctx);
288*4882a593Smuzhiyun if (mm == NULL) {
289*4882a593Smuzhiyun pr_devel("cxl_prefault_one unable to get mm %i\n",
290*4882a593Smuzhiyun pid_nr(ctx->pid));
291*4882a593Smuzhiyun return;
292*4882a593Smuzhiyun }
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun cxl_fault_segment(ctx, mm, ea);
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun mmput(mm);
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
next_segment(u64 ea,u64 vsid)299*4882a593Smuzhiyun static u64 next_segment(u64 ea, u64 vsid)
300*4882a593Smuzhiyun {
301*4882a593Smuzhiyun if (vsid & SLB_VSID_B_1T)
302*4882a593Smuzhiyun ea |= (1ULL << 40) - 1;
303*4882a593Smuzhiyun else
304*4882a593Smuzhiyun ea |= (1ULL << 28) - 1;
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun return ea + 1;
307*4882a593Smuzhiyun }
308*4882a593Smuzhiyun
cxl_prefault_vma(struct cxl_context * ctx)309*4882a593Smuzhiyun static void cxl_prefault_vma(struct cxl_context *ctx)
310*4882a593Smuzhiyun {
311*4882a593Smuzhiyun u64 ea, last_esid = 0;
312*4882a593Smuzhiyun struct copro_slb slb;
313*4882a593Smuzhiyun struct vm_area_struct *vma;
314*4882a593Smuzhiyun int rc;
315*4882a593Smuzhiyun struct mm_struct *mm;
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun mm = get_mem_context(ctx);
318*4882a593Smuzhiyun if (mm == NULL) {
319*4882a593Smuzhiyun pr_devel("cxl_prefault_vm unable to get mm %i\n",
320*4882a593Smuzhiyun pid_nr(ctx->pid));
321*4882a593Smuzhiyun return;
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun mmap_read_lock(mm);
325*4882a593Smuzhiyun for (vma = mm->mmap; vma; vma = vma->vm_next) {
326*4882a593Smuzhiyun for (ea = vma->vm_start; ea < vma->vm_end;
327*4882a593Smuzhiyun ea = next_segment(ea, slb.vsid)) {
328*4882a593Smuzhiyun rc = copro_calculate_slb(mm, ea, &slb);
329*4882a593Smuzhiyun if (rc)
330*4882a593Smuzhiyun continue;
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun if (last_esid == slb.esid)
333*4882a593Smuzhiyun continue;
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun cxl_load_segment(ctx, &slb);
336*4882a593Smuzhiyun last_esid = slb.esid;
337*4882a593Smuzhiyun }
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun mmap_read_unlock(mm);
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun mmput(mm);
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun
cxl_prefault(struct cxl_context * ctx,u64 wed)344*4882a593Smuzhiyun void cxl_prefault(struct cxl_context *ctx, u64 wed)
345*4882a593Smuzhiyun {
346*4882a593Smuzhiyun switch (ctx->afu->prefault_mode) {
347*4882a593Smuzhiyun case CXL_PREFAULT_WED:
348*4882a593Smuzhiyun cxl_prefault_one(ctx, wed);
349*4882a593Smuzhiyun break;
350*4882a593Smuzhiyun case CXL_PREFAULT_ALL:
351*4882a593Smuzhiyun cxl_prefault_vma(ctx);
352*4882a593Smuzhiyun break;
353*4882a593Smuzhiyun default:
354*4882a593Smuzhiyun break;
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun }
357