xref: /OK3568_Linux_fs/kernel/drivers/edac/ghes_edac.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * GHES/EDAC Linux driver
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (c) 2013 by Mauro Carvalho Chehab
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Red Hat Inc. https://www.redhat.com
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include <acpi/ghes.h>
13*4882a593Smuzhiyun #include <linux/edac.h>
14*4882a593Smuzhiyun #include <linux/dmi.h>
15*4882a593Smuzhiyun #include "edac_module.h"
16*4882a593Smuzhiyun #include <ras/ras_event.h>
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun struct ghes_pvt {
19*4882a593Smuzhiyun 	struct mem_ctl_info *mci;
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun 	/* Buffers for the error handling routine */
22*4882a593Smuzhiyun 	char other_detail[400];
23*4882a593Smuzhiyun 	char msg[80];
24*4882a593Smuzhiyun };
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun static refcount_t ghes_refcount = REFCOUNT_INIT(0);
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun /*
29*4882a593Smuzhiyun  * Access to ghes_pvt must be protected by ghes_lock. The spinlock
30*4882a593Smuzhiyun  * also provides the necessary (implicit) memory barrier for the SMP
31*4882a593Smuzhiyun  * case to make the pointer visible on another CPU.
32*4882a593Smuzhiyun  */
33*4882a593Smuzhiyun static struct ghes_pvt *ghes_pvt;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun  * This driver's representation of the system hardware, as collected
37*4882a593Smuzhiyun  * from DMI.
38*4882a593Smuzhiyun  */
39*4882a593Smuzhiyun struct ghes_hw_desc {
40*4882a593Smuzhiyun 	int num_dimms;
41*4882a593Smuzhiyun 	struct dimm_info *dimms;
42*4882a593Smuzhiyun } ghes_hw;
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun /* GHES registration mutex */
45*4882a593Smuzhiyun static DEFINE_MUTEX(ghes_reg_mutex);
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun /*
48*4882a593Smuzhiyun  * Sync with other, potentially concurrent callers of
49*4882a593Smuzhiyun  * ghes_edac_report_mem_error(). We don't know what the
50*4882a593Smuzhiyun  * "inventive" firmware would do.
51*4882a593Smuzhiyun  */
52*4882a593Smuzhiyun static DEFINE_SPINLOCK(ghes_lock);
53*4882a593Smuzhiyun 
54*4882a593Smuzhiyun /* "ghes_edac.force_load=1" skips the platform check */
55*4882a593Smuzhiyun static bool __read_mostly force_load;
56*4882a593Smuzhiyun module_param(force_load, bool, 0);
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun static bool system_scanned;
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun /* Memory Device - Type 17 of SMBIOS spec */
61*4882a593Smuzhiyun struct memdev_dmi_entry {
62*4882a593Smuzhiyun 	u8 type;
63*4882a593Smuzhiyun 	u8 length;
64*4882a593Smuzhiyun 	u16 handle;
65*4882a593Smuzhiyun 	u16 phys_mem_array_handle;
66*4882a593Smuzhiyun 	u16 mem_err_info_handle;
67*4882a593Smuzhiyun 	u16 total_width;
68*4882a593Smuzhiyun 	u16 data_width;
69*4882a593Smuzhiyun 	u16 size;
70*4882a593Smuzhiyun 	u8 form_factor;
71*4882a593Smuzhiyun 	u8 device_set;
72*4882a593Smuzhiyun 	u8 device_locator;
73*4882a593Smuzhiyun 	u8 bank_locator;
74*4882a593Smuzhiyun 	u8 memory_type;
75*4882a593Smuzhiyun 	u16 type_detail;
76*4882a593Smuzhiyun 	u16 speed;
77*4882a593Smuzhiyun 	u8 manufacturer;
78*4882a593Smuzhiyun 	u8 serial_number;
79*4882a593Smuzhiyun 	u8 asset_tag;
80*4882a593Smuzhiyun 	u8 part_number;
81*4882a593Smuzhiyun 	u8 attributes;
82*4882a593Smuzhiyun 	u32 extended_size;
83*4882a593Smuzhiyun 	u16 conf_mem_clk_speed;
84*4882a593Smuzhiyun } __attribute__((__packed__));
85*4882a593Smuzhiyun 
find_dimm_by_handle(struct mem_ctl_info * mci,u16 handle)86*4882a593Smuzhiyun static struct dimm_info *find_dimm_by_handle(struct mem_ctl_info *mci, u16 handle)
87*4882a593Smuzhiyun {
88*4882a593Smuzhiyun 	struct dimm_info *dimm;
89*4882a593Smuzhiyun 
90*4882a593Smuzhiyun 	mci_for_each_dimm(mci, dimm) {
91*4882a593Smuzhiyun 		if (dimm->smbios_handle == handle)
92*4882a593Smuzhiyun 			return dimm;
93*4882a593Smuzhiyun 	}
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun 	return NULL;
96*4882a593Smuzhiyun }
97*4882a593Smuzhiyun 
dimm_setup_label(struct dimm_info * dimm,u16 handle)98*4882a593Smuzhiyun static void dimm_setup_label(struct dimm_info *dimm, u16 handle)
99*4882a593Smuzhiyun {
100*4882a593Smuzhiyun 	const char *bank = NULL, *device = NULL;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 	dmi_memdev_name(handle, &bank, &device);
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun 	/*
105*4882a593Smuzhiyun 	 * Set to a NULL string when both bank and device are zero. In this case,
106*4882a593Smuzhiyun 	 * the label assigned by default will be preserved.
107*4882a593Smuzhiyun 	 */
108*4882a593Smuzhiyun 	snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",
109*4882a593Smuzhiyun 		 (bank && *bank) ? bank : "",
110*4882a593Smuzhiyun 		 (bank && *bank && device && *device) ? " " : "",
111*4882a593Smuzhiyun 		 (device && *device) ? device : "");
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun 
assign_dmi_dimm_info(struct dimm_info * dimm,struct memdev_dmi_entry * entry)114*4882a593Smuzhiyun static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun 	u16 rdr_mask = BIT(7) | BIT(13);
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 	if (entry->size == 0xffff) {
119*4882a593Smuzhiyun 		pr_info("Can't get DIMM%i size\n", dimm->idx);
120*4882a593Smuzhiyun 		dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
121*4882a593Smuzhiyun 	} else if (entry->size == 0x7fff) {
122*4882a593Smuzhiyun 		dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
123*4882a593Smuzhiyun 	} else {
124*4882a593Smuzhiyun 		if (entry->size & BIT(15))
125*4882a593Smuzhiyun 			dimm->nr_pages = MiB_TO_PAGES((entry->size & 0x7fff) << 10);
126*4882a593Smuzhiyun 		else
127*4882a593Smuzhiyun 			dimm->nr_pages = MiB_TO_PAGES(entry->size);
128*4882a593Smuzhiyun 	}
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	switch (entry->memory_type) {
131*4882a593Smuzhiyun 	case 0x12:
132*4882a593Smuzhiyun 		if (entry->type_detail & BIT(13))
133*4882a593Smuzhiyun 			dimm->mtype = MEM_RDDR;
134*4882a593Smuzhiyun 		else
135*4882a593Smuzhiyun 			dimm->mtype = MEM_DDR;
136*4882a593Smuzhiyun 		break;
137*4882a593Smuzhiyun 	case 0x13:
138*4882a593Smuzhiyun 		if (entry->type_detail & BIT(13))
139*4882a593Smuzhiyun 			dimm->mtype = MEM_RDDR2;
140*4882a593Smuzhiyun 		else
141*4882a593Smuzhiyun 			dimm->mtype = MEM_DDR2;
142*4882a593Smuzhiyun 		break;
143*4882a593Smuzhiyun 	case 0x14:
144*4882a593Smuzhiyun 		dimm->mtype = MEM_FB_DDR2;
145*4882a593Smuzhiyun 		break;
146*4882a593Smuzhiyun 	case 0x18:
147*4882a593Smuzhiyun 		if (entry->type_detail & BIT(12))
148*4882a593Smuzhiyun 			dimm->mtype = MEM_NVDIMM;
149*4882a593Smuzhiyun 		else if (entry->type_detail & BIT(13))
150*4882a593Smuzhiyun 			dimm->mtype = MEM_RDDR3;
151*4882a593Smuzhiyun 		else
152*4882a593Smuzhiyun 			dimm->mtype = MEM_DDR3;
153*4882a593Smuzhiyun 		break;
154*4882a593Smuzhiyun 	case 0x1a:
155*4882a593Smuzhiyun 		if (entry->type_detail & BIT(12))
156*4882a593Smuzhiyun 			dimm->mtype = MEM_NVDIMM;
157*4882a593Smuzhiyun 		else if (entry->type_detail & BIT(13))
158*4882a593Smuzhiyun 			dimm->mtype = MEM_RDDR4;
159*4882a593Smuzhiyun 		else
160*4882a593Smuzhiyun 			dimm->mtype = MEM_DDR4;
161*4882a593Smuzhiyun 		break;
162*4882a593Smuzhiyun 	default:
163*4882a593Smuzhiyun 		if (entry->type_detail & BIT(6))
164*4882a593Smuzhiyun 			dimm->mtype = MEM_RMBS;
165*4882a593Smuzhiyun 		else if ((entry->type_detail & rdr_mask) == rdr_mask)
166*4882a593Smuzhiyun 			dimm->mtype = MEM_RDR;
167*4882a593Smuzhiyun 		else if (entry->type_detail & BIT(7))
168*4882a593Smuzhiyun 			dimm->mtype = MEM_SDR;
169*4882a593Smuzhiyun 		else if (entry->type_detail & BIT(9))
170*4882a593Smuzhiyun 			dimm->mtype = MEM_EDO;
171*4882a593Smuzhiyun 		else
172*4882a593Smuzhiyun 			dimm->mtype = MEM_UNKNOWN;
173*4882a593Smuzhiyun 	}
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	/*
176*4882a593Smuzhiyun 	 * Actually, we can only detect if the memory has bits for
177*4882a593Smuzhiyun 	 * checksum or not
178*4882a593Smuzhiyun 	 */
179*4882a593Smuzhiyun 	if (entry->total_width == entry->data_width)
180*4882a593Smuzhiyun 		dimm->edac_mode = EDAC_NONE;
181*4882a593Smuzhiyun 	else
182*4882a593Smuzhiyun 		dimm->edac_mode = EDAC_SECDED;
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun 	dimm->dtype = DEV_UNKNOWN;
185*4882a593Smuzhiyun 	dimm->grain = 128;		/* Likely, worse case */
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	dimm_setup_label(dimm, entry->handle);
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun 	if (dimm->nr_pages) {
190*4882a593Smuzhiyun 		edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
191*4882a593Smuzhiyun 			dimm->idx, edac_mem_types[dimm->mtype],
192*4882a593Smuzhiyun 			PAGES_TO_MiB(dimm->nr_pages),
193*4882a593Smuzhiyun 			(dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
194*4882a593Smuzhiyun 		edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
195*4882a593Smuzhiyun 			entry->memory_type, entry->type_detail,
196*4882a593Smuzhiyun 			entry->total_width, entry->data_width);
197*4882a593Smuzhiyun 	}
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	dimm->smbios_handle = entry->handle;
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun 
enumerate_dimms(const struct dmi_header * dh,void * arg)202*4882a593Smuzhiyun static void enumerate_dimms(const struct dmi_header *dh, void *arg)
203*4882a593Smuzhiyun {
204*4882a593Smuzhiyun 	struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
205*4882a593Smuzhiyun 	struct ghes_hw_desc *hw = (struct ghes_hw_desc *)arg;
206*4882a593Smuzhiyun 	struct dimm_info *d;
207*4882a593Smuzhiyun 
208*4882a593Smuzhiyun 	if (dh->type != DMI_ENTRY_MEM_DEVICE)
209*4882a593Smuzhiyun 		return;
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	/* Enlarge the array with additional 16 */
212*4882a593Smuzhiyun 	if (!hw->num_dimms || !(hw->num_dimms % 16)) {
213*4882a593Smuzhiyun 		struct dimm_info *new;
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 		new = krealloc(hw->dimms, (hw->num_dimms + 16) * sizeof(struct dimm_info),
216*4882a593Smuzhiyun 			        GFP_KERNEL);
217*4882a593Smuzhiyun 		if (!new) {
218*4882a593Smuzhiyun 			WARN_ON_ONCE(1);
219*4882a593Smuzhiyun 			return;
220*4882a593Smuzhiyun 		}
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 		hw->dimms = new;
223*4882a593Smuzhiyun 	}
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	d = &hw->dimms[hw->num_dimms];
226*4882a593Smuzhiyun 	d->idx = hw->num_dimms;
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	assign_dmi_dimm_info(d, entry);
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	hw->num_dimms++;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun 
ghes_scan_system(void)233*4882a593Smuzhiyun static void ghes_scan_system(void)
234*4882a593Smuzhiyun {
235*4882a593Smuzhiyun 	if (system_scanned)
236*4882a593Smuzhiyun 		return;
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	dmi_walk(enumerate_dimms, &ghes_hw);
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 	system_scanned = true;
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun 
ghes_edac_report_mem_error(int sev,struct cper_sec_mem_err * mem_err)243*4882a593Smuzhiyun void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	struct edac_raw_error_desc *e;
246*4882a593Smuzhiyun 	struct mem_ctl_info *mci;
247*4882a593Smuzhiyun 	struct ghes_pvt *pvt;
248*4882a593Smuzhiyun 	unsigned long flags;
249*4882a593Smuzhiyun 	char *p;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	/*
252*4882a593Smuzhiyun 	 * We can do the locking below because GHES defers error processing
253*4882a593Smuzhiyun 	 * from NMI to IRQ context. Whenever that changes, we'd at least
254*4882a593Smuzhiyun 	 * know.
255*4882a593Smuzhiyun 	 */
256*4882a593Smuzhiyun 	if (WARN_ON_ONCE(in_nmi()))
257*4882a593Smuzhiyun 		return;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	spin_lock_irqsave(&ghes_lock, flags);
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	pvt = ghes_pvt;
262*4882a593Smuzhiyun 	if (!pvt)
263*4882a593Smuzhiyun 		goto unlock;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	mci = pvt->mci;
266*4882a593Smuzhiyun 	e = &mci->error_desc;
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun 	/* Cleans the error report buffer */
269*4882a593Smuzhiyun 	memset(e, 0, sizeof (*e));
270*4882a593Smuzhiyun 	e->error_count = 1;
271*4882a593Smuzhiyun 	e->grain = 1;
272*4882a593Smuzhiyun 	e->msg = pvt->msg;
273*4882a593Smuzhiyun 	e->other_detail = pvt->other_detail;
274*4882a593Smuzhiyun 	e->top_layer = -1;
275*4882a593Smuzhiyun 	e->mid_layer = -1;
276*4882a593Smuzhiyun 	e->low_layer = -1;
277*4882a593Smuzhiyun 	*pvt->other_detail = '\0';
278*4882a593Smuzhiyun 	*pvt->msg = '\0';
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 	switch (sev) {
281*4882a593Smuzhiyun 	case GHES_SEV_CORRECTED:
282*4882a593Smuzhiyun 		e->type = HW_EVENT_ERR_CORRECTED;
283*4882a593Smuzhiyun 		break;
284*4882a593Smuzhiyun 	case GHES_SEV_RECOVERABLE:
285*4882a593Smuzhiyun 		e->type = HW_EVENT_ERR_UNCORRECTED;
286*4882a593Smuzhiyun 		break;
287*4882a593Smuzhiyun 	case GHES_SEV_PANIC:
288*4882a593Smuzhiyun 		e->type = HW_EVENT_ERR_FATAL;
289*4882a593Smuzhiyun 		break;
290*4882a593Smuzhiyun 	default:
291*4882a593Smuzhiyun 	case GHES_SEV_NO:
292*4882a593Smuzhiyun 		e->type = HW_EVENT_ERR_INFO;
293*4882a593Smuzhiyun 	}
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	edac_dbg(1, "error validation_bits: 0x%08llx\n",
296*4882a593Smuzhiyun 		 (long long)mem_err->validation_bits);
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	/* Error type, mapped on e->msg */
299*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
300*4882a593Smuzhiyun 		p = pvt->msg;
301*4882a593Smuzhiyun 		switch (mem_err->error_type) {
302*4882a593Smuzhiyun 		case 0:
303*4882a593Smuzhiyun 			p += sprintf(p, "Unknown");
304*4882a593Smuzhiyun 			break;
305*4882a593Smuzhiyun 		case 1:
306*4882a593Smuzhiyun 			p += sprintf(p, "No error");
307*4882a593Smuzhiyun 			break;
308*4882a593Smuzhiyun 		case 2:
309*4882a593Smuzhiyun 			p += sprintf(p, "Single-bit ECC");
310*4882a593Smuzhiyun 			break;
311*4882a593Smuzhiyun 		case 3:
312*4882a593Smuzhiyun 			p += sprintf(p, "Multi-bit ECC");
313*4882a593Smuzhiyun 			break;
314*4882a593Smuzhiyun 		case 4:
315*4882a593Smuzhiyun 			p += sprintf(p, "Single-symbol ChipKill ECC");
316*4882a593Smuzhiyun 			break;
317*4882a593Smuzhiyun 		case 5:
318*4882a593Smuzhiyun 			p += sprintf(p, "Multi-symbol ChipKill ECC");
319*4882a593Smuzhiyun 			break;
320*4882a593Smuzhiyun 		case 6:
321*4882a593Smuzhiyun 			p += sprintf(p, "Master abort");
322*4882a593Smuzhiyun 			break;
323*4882a593Smuzhiyun 		case 7:
324*4882a593Smuzhiyun 			p += sprintf(p, "Target abort");
325*4882a593Smuzhiyun 			break;
326*4882a593Smuzhiyun 		case 8:
327*4882a593Smuzhiyun 			p += sprintf(p, "Parity Error");
328*4882a593Smuzhiyun 			break;
329*4882a593Smuzhiyun 		case 9:
330*4882a593Smuzhiyun 			p += sprintf(p, "Watchdog timeout");
331*4882a593Smuzhiyun 			break;
332*4882a593Smuzhiyun 		case 10:
333*4882a593Smuzhiyun 			p += sprintf(p, "Invalid address");
334*4882a593Smuzhiyun 			break;
335*4882a593Smuzhiyun 		case 11:
336*4882a593Smuzhiyun 			p += sprintf(p, "Mirror Broken");
337*4882a593Smuzhiyun 			break;
338*4882a593Smuzhiyun 		case 12:
339*4882a593Smuzhiyun 			p += sprintf(p, "Memory Sparing");
340*4882a593Smuzhiyun 			break;
341*4882a593Smuzhiyun 		case 13:
342*4882a593Smuzhiyun 			p += sprintf(p, "Scrub corrected error");
343*4882a593Smuzhiyun 			break;
344*4882a593Smuzhiyun 		case 14:
345*4882a593Smuzhiyun 			p += sprintf(p, "Scrub uncorrected error");
346*4882a593Smuzhiyun 			break;
347*4882a593Smuzhiyun 		case 15:
348*4882a593Smuzhiyun 			p += sprintf(p, "Physical Memory Map-out event");
349*4882a593Smuzhiyun 			break;
350*4882a593Smuzhiyun 		default:
351*4882a593Smuzhiyun 			p += sprintf(p, "reserved error (%d)",
352*4882a593Smuzhiyun 				     mem_err->error_type);
353*4882a593Smuzhiyun 		}
354*4882a593Smuzhiyun 	} else {
355*4882a593Smuzhiyun 		strcpy(pvt->msg, "unknown error");
356*4882a593Smuzhiyun 	}
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	/* Error address */
359*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_PA) {
360*4882a593Smuzhiyun 		e->page_frame_number = PHYS_PFN(mem_err->physical_addr);
361*4882a593Smuzhiyun 		e->offset_in_page = offset_in_page(mem_err->physical_addr);
362*4882a593Smuzhiyun 	}
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	/* Error grain */
365*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
366*4882a593Smuzhiyun 		e->grain = ~mem_err->physical_addr_mask + 1;
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 	/* Memory error location, mapped on e->location */
369*4882a593Smuzhiyun 	p = e->location;
370*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
371*4882a593Smuzhiyun 		p += sprintf(p, "node:%d ", mem_err->node);
372*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
373*4882a593Smuzhiyun 		p += sprintf(p, "card:%d ", mem_err->card);
374*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
375*4882a593Smuzhiyun 		p += sprintf(p, "module:%d ", mem_err->module);
376*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
377*4882a593Smuzhiyun 		p += sprintf(p, "rank:%d ", mem_err->rank);
378*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
379*4882a593Smuzhiyun 		p += sprintf(p, "bank:%d ", mem_err->bank);
380*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
381*4882a593Smuzhiyun 		p += sprintf(p, "bank_group:%d ",
382*4882a593Smuzhiyun 			     mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
383*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
384*4882a593Smuzhiyun 		p += sprintf(p, "bank_address:%d ",
385*4882a593Smuzhiyun 			     mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
386*4882a593Smuzhiyun 	if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
387*4882a593Smuzhiyun 		u32 row = mem_err->row;
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 		row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
390*4882a593Smuzhiyun 		p += sprintf(p, "row:%d ", row);
391*4882a593Smuzhiyun 	}
392*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
393*4882a593Smuzhiyun 		p += sprintf(p, "col:%d ", mem_err->column);
394*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
395*4882a593Smuzhiyun 		p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
396*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
397*4882a593Smuzhiyun 		const char *bank = NULL, *device = NULL;
398*4882a593Smuzhiyun 		struct dimm_info *dimm;
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 		dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
401*4882a593Smuzhiyun 		if (bank != NULL && device != NULL)
402*4882a593Smuzhiyun 			p += sprintf(p, "DIMM location:%s %s ", bank, device);
403*4882a593Smuzhiyun 		else
404*4882a593Smuzhiyun 			p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
405*4882a593Smuzhiyun 				     mem_err->mem_dev_handle);
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 		dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
408*4882a593Smuzhiyun 		if (dimm) {
409*4882a593Smuzhiyun 			e->top_layer = dimm->idx;
410*4882a593Smuzhiyun 			strcpy(e->label, dimm->label);
411*4882a593Smuzhiyun 		}
412*4882a593Smuzhiyun 	}
413*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
414*4882a593Smuzhiyun 		p += sprintf(p, "chipID: %d ",
415*4882a593Smuzhiyun 			     mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
416*4882a593Smuzhiyun 	if (p > e->location)
417*4882a593Smuzhiyun 		*(p - 1) = '\0';
418*4882a593Smuzhiyun 
419*4882a593Smuzhiyun 	if (!*e->label)
420*4882a593Smuzhiyun 		strcpy(e->label, "unknown memory");
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	/* All other fields are mapped on e->other_detail */
423*4882a593Smuzhiyun 	p = pvt->other_detail;
424*4882a593Smuzhiyun 	p += snprintf(p, sizeof(pvt->other_detail),
425*4882a593Smuzhiyun 		"APEI location: %s ", e->location);
426*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
427*4882a593Smuzhiyun 		u64 status = mem_err->error_status;
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 		p += sprintf(p, "status(0x%016llx): ", (long long)status);
430*4882a593Smuzhiyun 		switch ((status >> 8) & 0xff) {
431*4882a593Smuzhiyun 		case 1:
432*4882a593Smuzhiyun 			p += sprintf(p, "Error detected internal to the component ");
433*4882a593Smuzhiyun 			break;
434*4882a593Smuzhiyun 		case 16:
435*4882a593Smuzhiyun 			p += sprintf(p, "Error detected in the bus ");
436*4882a593Smuzhiyun 			break;
437*4882a593Smuzhiyun 		case 4:
438*4882a593Smuzhiyun 			p += sprintf(p, "Storage error in DRAM memory ");
439*4882a593Smuzhiyun 			break;
440*4882a593Smuzhiyun 		case 5:
441*4882a593Smuzhiyun 			p += sprintf(p, "Storage error in TLB ");
442*4882a593Smuzhiyun 			break;
443*4882a593Smuzhiyun 		case 6:
444*4882a593Smuzhiyun 			p += sprintf(p, "Storage error in cache ");
445*4882a593Smuzhiyun 			break;
446*4882a593Smuzhiyun 		case 7:
447*4882a593Smuzhiyun 			p += sprintf(p, "Error in one or more functional units ");
448*4882a593Smuzhiyun 			break;
449*4882a593Smuzhiyun 		case 8:
450*4882a593Smuzhiyun 			p += sprintf(p, "component failed self test ");
451*4882a593Smuzhiyun 			break;
452*4882a593Smuzhiyun 		case 9:
453*4882a593Smuzhiyun 			p += sprintf(p, "Overflow or undervalue of internal queue ");
454*4882a593Smuzhiyun 			break;
455*4882a593Smuzhiyun 		case 17:
456*4882a593Smuzhiyun 			p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
457*4882a593Smuzhiyun 			break;
458*4882a593Smuzhiyun 		case 18:
459*4882a593Smuzhiyun 			p += sprintf(p, "Improper access error ");
460*4882a593Smuzhiyun 			break;
461*4882a593Smuzhiyun 		case 19:
462*4882a593Smuzhiyun 			p += sprintf(p, "Access to a memory address which is not mapped to any component ");
463*4882a593Smuzhiyun 			break;
464*4882a593Smuzhiyun 		case 20:
465*4882a593Smuzhiyun 			p += sprintf(p, "Loss of Lockstep ");
466*4882a593Smuzhiyun 			break;
467*4882a593Smuzhiyun 		case 21:
468*4882a593Smuzhiyun 			p += sprintf(p, "Response not associated with a request ");
469*4882a593Smuzhiyun 			break;
470*4882a593Smuzhiyun 		case 22:
471*4882a593Smuzhiyun 			p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
472*4882a593Smuzhiyun 			break;
473*4882a593Smuzhiyun 		case 23:
474*4882a593Smuzhiyun 			p += sprintf(p, "Detection of a PATH_ERROR ");
475*4882a593Smuzhiyun 			break;
476*4882a593Smuzhiyun 		case 25:
477*4882a593Smuzhiyun 			p += sprintf(p, "Bus operation timeout ");
478*4882a593Smuzhiyun 			break;
479*4882a593Smuzhiyun 		case 26:
480*4882a593Smuzhiyun 			p += sprintf(p, "A read was issued to data that has been poisoned ");
481*4882a593Smuzhiyun 			break;
482*4882a593Smuzhiyun 		default:
483*4882a593Smuzhiyun 			p += sprintf(p, "reserved ");
484*4882a593Smuzhiyun 			break;
485*4882a593Smuzhiyun 		}
486*4882a593Smuzhiyun 	}
487*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
488*4882a593Smuzhiyun 		p += sprintf(p, "requestorID: 0x%016llx ",
489*4882a593Smuzhiyun 			     (long long)mem_err->requestor_id);
490*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
491*4882a593Smuzhiyun 		p += sprintf(p, "responderID: 0x%016llx ",
492*4882a593Smuzhiyun 			     (long long)mem_err->responder_id);
493*4882a593Smuzhiyun 	if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
494*4882a593Smuzhiyun 		p += sprintf(p, "targetID: 0x%016llx ",
495*4882a593Smuzhiyun 			     (long long)mem_err->responder_id);
496*4882a593Smuzhiyun 	if (p > pvt->other_detail)
497*4882a593Smuzhiyun 		*(p - 1) = '\0';
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	edac_raw_mc_handle_error(e);
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun unlock:
502*4882a593Smuzhiyun 	spin_unlock_irqrestore(&ghes_lock, flags);
503*4882a593Smuzhiyun }
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun /*
506*4882a593Smuzhiyun  * Known systems that are safe to enable this module.
507*4882a593Smuzhiyun  */
508*4882a593Smuzhiyun static struct acpi_platform_list plat_list[] = {
509*4882a593Smuzhiyun 	{"HPE   ", "Server  ", 0, ACPI_SIG_FADT, all_versions},
510*4882a593Smuzhiyun 	{ } /* End */
511*4882a593Smuzhiyun };
512*4882a593Smuzhiyun 
ghes_edac_register(struct ghes * ghes,struct device * dev)513*4882a593Smuzhiyun int ghes_edac_register(struct ghes *ghes, struct device *dev)
514*4882a593Smuzhiyun {
515*4882a593Smuzhiyun 	bool fake = false;
516*4882a593Smuzhiyun 	struct mem_ctl_info *mci;
517*4882a593Smuzhiyun 	struct ghes_pvt *pvt;
518*4882a593Smuzhiyun 	struct edac_mc_layer layers[1];
519*4882a593Smuzhiyun 	unsigned long flags;
520*4882a593Smuzhiyun 	int idx = -1;
521*4882a593Smuzhiyun 	int rc = 0;
522*4882a593Smuzhiyun 
523*4882a593Smuzhiyun 	if (IS_ENABLED(CONFIG_X86)) {
524*4882a593Smuzhiyun 		/* Check if safe to enable on this system */
525*4882a593Smuzhiyun 		idx = acpi_match_platform_list(plat_list);
526*4882a593Smuzhiyun 		if (!force_load && idx < 0)
527*4882a593Smuzhiyun 			return -ENODEV;
528*4882a593Smuzhiyun 	} else {
529*4882a593Smuzhiyun 		force_load = true;
530*4882a593Smuzhiyun 		idx = 0;
531*4882a593Smuzhiyun 	}
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	/* finish another registration/unregistration instance first */
534*4882a593Smuzhiyun 	mutex_lock(&ghes_reg_mutex);
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	/*
537*4882a593Smuzhiyun 	 * We have only one logical memory controller to which all DIMMs belong.
538*4882a593Smuzhiyun 	 */
539*4882a593Smuzhiyun 	if (refcount_inc_not_zero(&ghes_refcount))
540*4882a593Smuzhiyun 		goto unlock;
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun 	ghes_scan_system();
543*4882a593Smuzhiyun 
544*4882a593Smuzhiyun 	/* Check if we've got a bogus BIOS */
545*4882a593Smuzhiyun 	if (!ghes_hw.num_dimms) {
546*4882a593Smuzhiyun 		fake = true;
547*4882a593Smuzhiyun 		ghes_hw.num_dimms = 1;
548*4882a593Smuzhiyun 	}
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun 	layers[0].type = EDAC_MC_LAYER_ALL_MEM;
551*4882a593Smuzhiyun 	layers[0].size = ghes_hw.num_dimms;
552*4882a593Smuzhiyun 	layers[0].is_virt_csrow = true;
553*4882a593Smuzhiyun 
554*4882a593Smuzhiyun 	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_pvt));
555*4882a593Smuzhiyun 	if (!mci) {
556*4882a593Smuzhiyun 		pr_info("Can't allocate memory for EDAC data\n");
557*4882a593Smuzhiyun 		rc = -ENOMEM;
558*4882a593Smuzhiyun 		goto unlock;
559*4882a593Smuzhiyun 	}
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	pvt		= mci->pvt_info;
562*4882a593Smuzhiyun 	pvt->mci	= mci;
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 	mci->pdev = dev;
565*4882a593Smuzhiyun 	mci->mtype_cap = MEM_FLAG_EMPTY;
566*4882a593Smuzhiyun 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
567*4882a593Smuzhiyun 	mci->edac_cap = EDAC_FLAG_NONE;
568*4882a593Smuzhiyun 	mci->mod_name = "ghes_edac.c";
569*4882a593Smuzhiyun 	mci->ctl_name = "ghes_edac";
570*4882a593Smuzhiyun 	mci->dev_name = "ghes";
571*4882a593Smuzhiyun 
572*4882a593Smuzhiyun 	if (fake) {
573*4882a593Smuzhiyun 		pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
574*4882a593Smuzhiyun 		pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
575*4882a593Smuzhiyun 		pr_info("work on such system. Use this driver with caution\n");
576*4882a593Smuzhiyun 	} else if (idx < 0) {
577*4882a593Smuzhiyun 		pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
578*4882a593Smuzhiyun 		pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
579*4882a593Smuzhiyun 		pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
580*4882a593Smuzhiyun 		pr_info("If you find incorrect reports, please contact your hardware vendor\n");
581*4882a593Smuzhiyun 		pr_info("to correct its BIOS.\n");
582*4882a593Smuzhiyun 		pr_info("This system has %d DIMM sockets.\n", ghes_hw.num_dimms);
583*4882a593Smuzhiyun 	}
584*4882a593Smuzhiyun 
585*4882a593Smuzhiyun 	if (!fake) {
586*4882a593Smuzhiyun 		struct dimm_info *src, *dst;
587*4882a593Smuzhiyun 		int i = 0;
588*4882a593Smuzhiyun 
589*4882a593Smuzhiyun 		mci_for_each_dimm(mci, dst) {
590*4882a593Smuzhiyun 			src = &ghes_hw.dimms[i];
591*4882a593Smuzhiyun 
592*4882a593Smuzhiyun 			dst->idx	   = src->idx;
593*4882a593Smuzhiyun 			dst->smbios_handle = src->smbios_handle;
594*4882a593Smuzhiyun 			dst->nr_pages	   = src->nr_pages;
595*4882a593Smuzhiyun 			dst->mtype	   = src->mtype;
596*4882a593Smuzhiyun 			dst->edac_mode	   = src->edac_mode;
597*4882a593Smuzhiyun 			dst->dtype	   = src->dtype;
598*4882a593Smuzhiyun 			dst->grain	   = src->grain;
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 			/*
601*4882a593Smuzhiyun 			 * If no src->label, preserve default label assigned
602*4882a593Smuzhiyun 			 * from EDAC core.
603*4882a593Smuzhiyun 			 */
604*4882a593Smuzhiyun 			if (strlen(src->label))
605*4882a593Smuzhiyun 				memcpy(dst->label, src->label, sizeof(src->label));
606*4882a593Smuzhiyun 
607*4882a593Smuzhiyun 			i++;
608*4882a593Smuzhiyun 		}
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	} else {
611*4882a593Smuzhiyun 		struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0);
612*4882a593Smuzhiyun 
613*4882a593Smuzhiyun 		dimm->nr_pages = 1;
614*4882a593Smuzhiyun 		dimm->grain = 128;
615*4882a593Smuzhiyun 		dimm->mtype = MEM_UNKNOWN;
616*4882a593Smuzhiyun 		dimm->dtype = DEV_UNKNOWN;
617*4882a593Smuzhiyun 		dimm->edac_mode = EDAC_SECDED;
618*4882a593Smuzhiyun 	}
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun 	rc = edac_mc_add_mc(mci);
621*4882a593Smuzhiyun 	if (rc < 0) {
622*4882a593Smuzhiyun 		pr_info("Can't register with the EDAC core\n");
623*4882a593Smuzhiyun 		edac_mc_free(mci);
624*4882a593Smuzhiyun 		rc = -ENODEV;
625*4882a593Smuzhiyun 		goto unlock;
626*4882a593Smuzhiyun 	}
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun 	spin_lock_irqsave(&ghes_lock, flags);
629*4882a593Smuzhiyun 	ghes_pvt = pvt;
630*4882a593Smuzhiyun 	spin_unlock_irqrestore(&ghes_lock, flags);
631*4882a593Smuzhiyun 
632*4882a593Smuzhiyun 	/* only set on success */
633*4882a593Smuzhiyun 	refcount_set(&ghes_refcount, 1);
634*4882a593Smuzhiyun 
635*4882a593Smuzhiyun unlock:
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	/* Not needed anymore */
638*4882a593Smuzhiyun 	kfree(ghes_hw.dimms);
639*4882a593Smuzhiyun 	ghes_hw.dimms = NULL;
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun 	mutex_unlock(&ghes_reg_mutex);
642*4882a593Smuzhiyun 
643*4882a593Smuzhiyun 	return rc;
644*4882a593Smuzhiyun }
645*4882a593Smuzhiyun 
ghes_edac_unregister(struct ghes * ghes)646*4882a593Smuzhiyun void ghes_edac_unregister(struct ghes *ghes)
647*4882a593Smuzhiyun {
648*4882a593Smuzhiyun 	struct mem_ctl_info *mci;
649*4882a593Smuzhiyun 	unsigned long flags;
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 	if (!force_load)
652*4882a593Smuzhiyun 		return;
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	mutex_lock(&ghes_reg_mutex);
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun 	system_scanned = false;
657*4882a593Smuzhiyun 	memset(&ghes_hw, 0, sizeof(struct ghes_hw_desc));
658*4882a593Smuzhiyun 
659*4882a593Smuzhiyun 	if (!refcount_dec_and_test(&ghes_refcount))
660*4882a593Smuzhiyun 		goto unlock;
661*4882a593Smuzhiyun 
662*4882a593Smuzhiyun 	/*
663*4882a593Smuzhiyun 	 * Wait for the irq handler being finished.
664*4882a593Smuzhiyun 	 */
665*4882a593Smuzhiyun 	spin_lock_irqsave(&ghes_lock, flags);
666*4882a593Smuzhiyun 	mci = ghes_pvt ? ghes_pvt->mci : NULL;
667*4882a593Smuzhiyun 	ghes_pvt = NULL;
668*4882a593Smuzhiyun 	spin_unlock_irqrestore(&ghes_lock, flags);
669*4882a593Smuzhiyun 
670*4882a593Smuzhiyun 	if (!mci)
671*4882a593Smuzhiyun 		goto unlock;
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 	mci = edac_mc_del_mc(mci->pdev);
674*4882a593Smuzhiyun 	if (mci)
675*4882a593Smuzhiyun 		edac_mc_free(mci);
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun unlock:
678*4882a593Smuzhiyun 	mutex_unlock(&ghes_reg_mutex);
679*4882a593Smuzhiyun }
680