1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * NFIT - Machine Check Handler
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun #include <linux/notifier.h>
8*4882a593Smuzhiyun #include <linux/acpi.h>
9*4882a593Smuzhiyun #include <linux/nd.h>
10*4882a593Smuzhiyun #include <asm/mce.h>
11*4882a593Smuzhiyun #include "nfit.h"
12*4882a593Smuzhiyun
nfit_handle_mce(struct notifier_block * nb,unsigned long val,void * data)13*4882a593Smuzhiyun static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
14*4882a593Smuzhiyun void *data)
15*4882a593Smuzhiyun {
16*4882a593Smuzhiyun struct mce *mce = (struct mce *)data;
17*4882a593Smuzhiyun struct acpi_nfit_desc *acpi_desc;
18*4882a593Smuzhiyun struct nfit_spa *nfit_spa;
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun /* We only care about uncorrectable memory errors */
21*4882a593Smuzhiyun if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
22*4882a593Smuzhiyun return NOTIFY_DONE;
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun /* Verify the address reported in the MCE is valid. */
25*4882a593Smuzhiyun if (!mce_usable_address(mce))
26*4882a593Smuzhiyun return NOTIFY_DONE;
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun /*
29*4882a593Smuzhiyun * mce->addr contains the physical addr accessed that caused the
30*4882a593Smuzhiyun * machine check. We need to walk through the list of NFITs, and see
31*4882a593Smuzhiyun * if any of them matches that address, and only then start a scrub.
32*4882a593Smuzhiyun */
33*4882a593Smuzhiyun mutex_lock(&acpi_desc_lock);
34*4882a593Smuzhiyun list_for_each_entry(acpi_desc, &acpi_descs, list) {
35*4882a593Smuzhiyun struct device *dev = acpi_desc->dev;
36*4882a593Smuzhiyun int found_match = 0;
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun mutex_lock(&acpi_desc->init_mutex);
39*4882a593Smuzhiyun list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
40*4882a593Smuzhiyun struct acpi_nfit_system_address *spa = nfit_spa->spa;
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun if (nfit_spa_type(spa) != NFIT_SPA_PM)
43*4882a593Smuzhiyun continue;
44*4882a593Smuzhiyun /* find the spa that covers the mce addr */
45*4882a593Smuzhiyun if (spa->address > mce->addr)
46*4882a593Smuzhiyun continue;
47*4882a593Smuzhiyun if ((spa->address + spa->length - 1) < mce->addr)
48*4882a593Smuzhiyun continue;
49*4882a593Smuzhiyun found_match = 1;
50*4882a593Smuzhiyun dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
51*4882a593Smuzhiyun spa->range_index, spa->address, spa->length);
52*4882a593Smuzhiyun /*
53*4882a593Smuzhiyun * We can break at the first match because we're going
54*4882a593Smuzhiyun * to rescan all the SPA ranges. There shouldn't be any
55*4882a593Smuzhiyun * aliasing anyway.
56*4882a593Smuzhiyun */
57*4882a593Smuzhiyun break;
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun mutex_unlock(&acpi_desc->init_mutex);
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun if (!found_match)
62*4882a593Smuzhiyun continue;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun /* If this fails due to an -ENOMEM, there is little we can do */
65*4882a593Smuzhiyun nvdimm_bus_add_badrange(acpi_desc->nvdimm_bus,
66*4882a593Smuzhiyun ALIGN(mce->addr, L1_CACHE_BYTES),
67*4882a593Smuzhiyun L1_CACHE_BYTES);
68*4882a593Smuzhiyun nvdimm_region_notify(nfit_spa->nd_region,
69*4882a593Smuzhiyun NVDIMM_REVALIDATE_POISON);
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
72*4882a593Smuzhiyun /*
73*4882a593Smuzhiyun * We can ignore an -EBUSY here because if an ARS is
74*4882a593Smuzhiyun * already in progress, just let that be the last
75*4882a593Smuzhiyun * authoritative one
76*4882a593Smuzhiyun */
77*4882a593Smuzhiyun acpi_nfit_ars_rescan(acpi_desc, 0);
78*4882a593Smuzhiyun }
79*4882a593Smuzhiyun mce->kflags |= MCE_HANDLED_NFIT;
80*4882a593Smuzhiyun break;
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun mutex_unlock(&acpi_desc_lock);
84*4882a593Smuzhiyun return NOTIFY_DONE;
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun static struct notifier_block nfit_mce_dec = {
88*4882a593Smuzhiyun .notifier_call = nfit_handle_mce,
89*4882a593Smuzhiyun .priority = MCE_PRIO_NFIT,
90*4882a593Smuzhiyun };
91*4882a593Smuzhiyun
nfit_mce_register(void)92*4882a593Smuzhiyun void nfit_mce_register(void)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun mce_register_decode_chain(&nfit_mce_dec);
95*4882a593Smuzhiyun }
96*4882a593Smuzhiyun
nfit_mce_unregister(void)97*4882a593Smuzhiyun void nfit_mce_unregister(void)
98*4882a593Smuzhiyun {
99*4882a593Smuzhiyun mce_unregister_decode_chain(&nfit_mce_dec);
100*4882a593Smuzhiyun }
101