xref: /OK3568_Linux_fs/kernel/drivers/edac/edac_device.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * Defines, structures, APIs for edac_device
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * (C) 2007 Linux Networx (http://lnxi.com)
5*4882a593Smuzhiyun  * This file may be distributed under the terms of the
6*4882a593Smuzhiyun  * GNU General Public License.
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * Written by Thayne Harbaugh
9*4882a593Smuzhiyun  * Based on work by Dan Hollis <goemon at anime dot net> and others.
10*4882a593Smuzhiyun  *	http://www.anime.net/~goemon/linux-ecc/
11*4882a593Smuzhiyun  *
12*4882a593Smuzhiyun  * NMI handling support added by
13*4882a593Smuzhiyun  *     Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
14*4882a593Smuzhiyun  *
15*4882a593Smuzhiyun  * Refactored for multi-source files:
16*4882a593Smuzhiyun  *	Doug Thompson <norsk5@xmission.com>
17*4882a593Smuzhiyun  *
18*4882a593Smuzhiyun  * Please look at Documentation/driver-api/edac.rst for more info about
19*4882a593Smuzhiyun  * EDAC core structs and functions.
20*4882a593Smuzhiyun  */
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun #ifndef _EDAC_DEVICE_H_
23*4882a593Smuzhiyun #define _EDAC_DEVICE_H_
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun #include <linux/completion.h>
26*4882a593Smuzhiyun #include <linux/device.h>
27*4882a593Smuzhiyun #include <linux/edac.h>
28*4882a593Smuzhiyun #include <linux/kobject.h>
29*4882a593Smuzhiyun #include <linux/list.h>
30*4882a593Smuzhiyun #include <linux/types.h>
31*4882a593Smuzhiyun #include <linux/sysfs.h>
32*4882a593Smuzhiyun #include <linux/workqueue.h>
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun  * The following are the structures to provide for a generic
37*4882a593Smuzhiyun  * or abstract 'edac_device'. This set of structures and the
38*4882a593Smuzhiyun  * code that implements the APIs for the same, provide for
39*4882a593Smuzhiyun  * registering EDAC type devices which are NOT standard memory.
40*4882a593Smuzhiyun  *
41*4882a593Smuzhiyun  * CPU caches (L1 and L2)
42*4882a593Smuzhiyun  * DMA engines
43*4882a593Smuzhiyun  * Core CPU switches
44*4882a593Smuzhiyun  * Fabric switch units
45*4882a593Smuzhiyun  * PCIe interface controllers
46*4882a593Smuzhiyun  * other EDAC/ECC type devices that can be monitored for
47*4882a593Smuzhiyun  * errors, etc.
48*4882a593Smuzhiyun  *
49*4882a593Smuzhiyun  * It allows for a 2 level set of hierarchy. For example:
50*4882a593Smuzhiyun  *
51*4882a593Smuzhiyun  * cache could be composed of L1, L2 and L3 levels of cache.
52*4882a593Smuzhiyun  * Each CPU core would have its own L1 cache, while sharing
53*4882a593Smuzhiyun  * L2 and maybe L3 caches.
54*4882a593Smuzhiyun  *
55*4882a593Smuzhiyun  * View them arranged, via the sysfs presentation:
56*4882a593Smuzhiyun  * /sys/devices/system/edac/..
57*4882a593Smuzhiyun  *
58*4882a593Smuzhiyun  *	mc/		<existing memory device directory>
59*4882a593Smuzhiyun  *	cpu/cpu0/..	<L1 and L2 block directory>
60*4882a593Smuzhiyun  *		/L1-cache/ce_count
61*4882a593Smuzhiyun  *			 /ue_count
62*4882a593Smuzhiyun  *		/L2-cache/ce_count
63*4882a593Smuzhiyun  *			 /ue_count
64*4882a593Smuzhiyun  *	cpu/cpu1/..	<L1 and L2 block directory>
65*4882a593Smuzhiyun  *		/L1-cache/ce_count
66*4882a593Smuzhiyun  *			 /ue_count
67*4882a593Smuzhiyun  *		/L2-cache/ce_count
68*4882a593Smuzhiyun  *			 /ue_count
69*4882a593Smuzhiyun  *	...
70*4882a593Smuzhiyun  *
71*4882a593Smuzhiyun  *	the L1 and L2 directories would be "edac_device_block's"
72*4882a593Smuzhiyun  */
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun struct edac_device_counter {
75*4882a593Smuzhiyun 	u32 ue_count;
76*4882a593Smuzhiyun 	u32 ce_count;
77*4882a593Smuzhiyun };
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun /* forward reference */
80*4882a593Smuzhiyun struct edac_device_ctl_info;
81*4882a593Smuzhiyun struct edac_device_block;
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun /* edac_dev_sysfs_attribute structure
84*4882a593Smuzhiyun  *	used for driver sysfs attributes in mem_ctl_info
85*4882a593Smuzhiyun  *	for extra controls and attributes:
86*4882a593Smuzhiyun  *		like high level error Injection controls
87*4882a593Smuzhiyun  */
88*4882a593Smuzhiyun struct edac_dev_sysfs_attribute {
89*4882a593Smuzhiyun 	struct attribute attr;
90*4882a593Smuzhiyun 	ssize_t (*show)(struct edac_device_ctl_info *, char *);
91*4882a593Smuzhiyun 	ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
92*4882a593Smuzhiyun };
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun /* edac_dev_sysfs_block_attribute structure
95*4882a593Smuzhiyun  *
96*4882a593Smuzhiyun  *	used in leaf 'block' nodes for adding controls/attributes
97*4882a593Smuzhiyun  *
98*4882a593Smuzhiyun  *	each block in each instance of the containing control structure
99*4882a593Smuzhiyun  *	can have an array of the following. The show and store functions
100*4882a593Smuzhiyun  *	will be filled in with the show/store function in the
101*4882a593Smuzhiyun  *	low level driver.
102*4882a593Smuzhiyun  *
103*4882a593Smuzhiyun  *	The 'value' field will be the actual value field used for
104*4882a593Smuzhiyun  *	counting
105*4882a593Smuzhiyun  */
106*4882a593Smuzhiyun struct edac_dev_sysfs_block_attribute {
107*4882a593Smuzhiyun 	struct attribute attr;
108*4882a593Smuzhiyun 	ssize_t (*show)(struct kobject *, struct attribute *, char *);
109*4882a593Smuzhiyun 	ssize_t (*store)(struct kobject *, struct attribute *,
110*4882a593Smuzhiyun 			const char *, size_t);
111*4882a593Smuzhiyun 	struct edac_device_block *block;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	unsigned int value;
114*4882a593Smuzhiyun };
115*4882a593Smuzhiyun 
116*4882a593Smuzhiyun /* device block control structure */
117*4882a593Smuzhiyun struct edac_device_block {
118*4882a593Smuzhiyun 	struct edac_device_instance *instance;	/* Up Pointer */
119*4882a593Smuzhiyun 	char name[EDAC_DEVICE_NAME_LEN + 1];
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 	struct edac_device_counter counters;	/* basic UE and CE counters */
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 	int nr_attribs;		/* how many attributes */
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	/* this block's attributes, could be NULL */
126*4882a593Smuzhiyun 	struct edac_dev_sysfs_block_attribute *block_attributes;
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	/* edac sysfs device control */
129*4882a593Smuzhiyun 	struct kobject kobj;
130*4882a593Smuzhiyun };
131*4882a593Smuzhiyun 
132*4882a593Smuzhiyun /* device instance control structure */
133*4882a593Smuzhiyun struct edac_device_instance {
134*4882a593Smuzhiyun 	struct edac_device_ctl_info *ctl;	/* Up pointer */
135*4882a593Smuzhiyun 	char name[EDAC_DEVICE_NAME_LEN + 4];
136*4882a593Smuzhiyun 
137*4882a593Smuzhiyun 	struct edac_device_counter counters;	/* instance counters */
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	u32 nr_blocks;		/* how many blocks */
140*4882a593Smuzhiyun 	struct edac_device_block *blocks;	/* block array */
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	/* edac sysfs device control */
143*4882a593Smuzhiyun 	struct kobject kobj;
144*4882a593Smuzhiyun };
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun /*
148*4882a593Smuzhiyun  * Abstract edac_device control info structure
149*4882a593Smuzhiyun  *
150*4882a593Smuzhiyun  */
151*4882a593Smuzhiyun struct edac_device_ctl_info {
152*4882a593Smuzhiyun 	/* for global list of edac_device_ctl_info structs */
153*4882a593Smuzhiyun 	struct list_head link;
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	struct module *owner;	/* Module owner of this control struct */
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	int dev_idx;
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	/* Per instance controls for this edac_device */
160*4882a593Smuzhiyun 	int log_ue;		/* boolean for logging UEs */
161*4882a593Smuzhiyun 	int log_ce;		/* boolean for logging CEs */
162*4882a593Smuzhiyun 	int panic_on_ue;	/* boolean for panic'ing on an UE */
163*4882a593Smuzhiyun 	unsigned poll_msec;	/* number of milliseconds to poll interval */
164*4882a593Smuzhiyun 	unsigned long delay;	/* number of jiffies for poll_msec */
165*4882a593Smuzhiyun 
166*4882a593Smuzhiyun 	/* Additional top controller level attributes, but specified
167*4882a593Smuzhiyun 	 * by the low level driver.
168*4882a593Smuzhiyun 	 *
169*4882a593Smuzhiyun 	 * Set by the low level driver to provide attributes at the
170*4882a593Smuzhiyun 	 * controller level, same level as 'ue_count' and 'ce_count' above.
171*4882a593Smuzhiyun 	 * An array of structures, NULL terminated
172*4882a593Smuzhiyun 	 *
173*4882a593Smuzhiyun 	 * If attributes are desired, then set to array of attributes
174*4882a593Smuzhiyun 	 * If no attributes are desired, leave NULL
175*4882a593Smuzhiyun 	 */
176*4882a593Smuzhiyun 	struct edac_dev_sysfs_attribute *sysfs_attributes;
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	/* pointer to main 'edac' subsys in sysfs */
179*4882a593Smuzhiyun 	struct bus_type *edac_subsys;
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	/* the internal state of this controller instance */
182*4882a593Smuzhiyun 	int op_state;
183*4882a593Smuzhiyun 	/* work struct for this instance */
184*4882a593Smuzhiyun 	struct delayed_work work;
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	/* pointer to edac polling checking routine:
187*4882a593Smuzhiyun 	 *      If NOT NULL: points to polling check routine
188*4882a593Smuzhiyun 	 *      If NULL: Then assumes INTERRUPT operation, where
189*4882a593Smuzhiyun 	 *              MC driver will receive events
190*4882a593Smuzhiyun 	 */
191*4882a593Smuzhiyun 	void (*edac_check) (struct edac_device_ctl_info * edac_dev);
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	struct device *dev;	/* pointer to device structure */
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	const char *mod_name;	/* module name */
196*4882a593Smuzhiyun 	const char *ctl_name;	/* edac controller  name */
197*4882a593Smuzhiyun 	const char *dev_name;	/* pci/platform/etc... name */
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	void *pvt_info;		/* pointer to 'private driver' info */
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	unsigned long start_time;	/* edac_device load start time (jiffies) */
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	struct completion removal_complete;
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	/* sysfs top name under 'edac' directory
206*4882a593Smuzhiyun 	 * and instance name:
207*4882a593Smuzhiyun 	 *      cpu/cpu0/...
208*4882a593Smuzhiyun 	 *      cpu/cpu1/...
209*4882a593Smuzhiyun 	 *      cpu/cpu2/...
210*4882a593Smuzhiyun 	 *      ...
211*4882a593Smuzhiyun 	 */
212*4882a593Smuzhiyun 	char name[EDAC_DEVICE_NAME_LEN + 1];
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	/* Number of instances supported on this control structure
215*4882a593Smuzhiyun 	 * and the array of those instances
216*4882a593Smuzhiyun 	 */
217*4882a593Smuzhiyun 	u32 nr_instances;
218*4882a593Smuzhiyun 	struct edac_device_instance *instances;
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun 	/* Event counters for the this whole EDAC Device */
221*4882a593Smuzhiyun 	struct edac_device_counter counters;
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 	/* edac sysfs device control for the 'name'
224*4882a593Smuzhiyun 	 * device this structure controls
225*4882a593Smuzhiyun 	 */
226*4882a593Smuzhiyun 	struct kobject kobj;
227*4882a593Smuzhiyun };
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun /* To get from the instance's wq to the beginning of the ctl structure */
230*4882a593Smuzhiyun #define to_edac_mem_ctl_work(w) \
231*4882a593Smuzhiyun 		container_of(w, struct mem_ctl_info, work)
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun #define to_edac_device_ctl_work(w) \
234*4882a593Smuzhiyun 		container_of(w,struct edac_device_ctl_info,work)
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun /*
237*4882a593Smuzhiyun  * The alloc() and free() functions for the 'edac_device' control info
238*4882a593Smuzhiyun  * structure. A MC driver will allocate one of these for each edac_device
239*4882a593Smuzhiyun  * it is going to control/register with the EDAC CORE.
240*4882a593Smuzhiyun  */
241*4882a593Smuzhiyun extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
242*4882a593Smuzhiyun 		unsigned sizeof_private,
243*4882a593Smuzhiyun 		char *edac_device_name, unsigned nr_instances,
244*4882a593Smuzhiyun 		char *edac_block_name, unsigned nr_blocks,
245*4882a593Smuzhiyun 		unsigned offset_value,
246*4882a593Smuzhiyun 		struct edac_dev_sysfs_block_attribute *block_attributes,
247*4882a593Smuzhiyun 		unsigned nr_attribs,
248*4882a593Smuzhiyun 		int device_index);
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun /* The offset value can be:
251*4882a593Smuzhiyun  *	-1 indicating no offset value
252*4882a593Smuzhiyun  *	0 for zero-based block numbers
253*4882a593Smuzhiyun  *	1 for 1-based block number
254*4882a593Smuzhiyun  *	other for other-based block number
255*4882a593Smuzhiyun  */
256*4882a593Smuzhiyun #define	BLOCK_OFFSET_VALUE_OFF	((unsigned) -1)
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun /**
261*4882a593Smuzhiyun  * edac_device_add_device: Insert the 'edac_dev' structure into the
262*4882a593Smuzhiyun  *	 edac_device global list and create sysfs entries associated with
263*4882a593Smuzhiyun  *	 edac_device structure.
264*4882a593Smuzhiyun  *
265*4882a593Smuzhiyun  * @edac_dev: pointer to edac_device structure to be added to the list
266*4882a593Smuzhiyun  *	'edac_device' structure.
267*4882a593Smuzhiyun  *
268*4882a593Smuzhiyun  * Returns:
269*4882a593Smuzhiyun  *	0 on Success, or an error code on failure
270*4882a593Smuzhiyun  */
271*4882a593Smuzhiyun extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun /**
274*4882a593Smuzhiyun  * edac_device_del_device:
275*4882a593Smuzhiyun  *	Remove sysfs entries for specified edac_device structure and
276*4882a593Smuzhiyun  *	then remove edac_device structure from global list
277*4882a593Smuzhiyun  *
278*4882a593Smuzhiyun  * @dev:
279*4882a593Smuzhiyun  *	Pointer to struct &device representing the edac device
280*4882a593Smuzhiyun  *	structure to remove.
281*4882a593Smuzhiyun  *
282*4882a593Smuzhiyun  * Returns:
283*4882a593Smuzhiyun  *	Pointer to removed edac_device structure,
284*4882a593Smuzhiyun  *	or %NULL if device not found.
285*4882a593Smuzhiyun  */
286*4882a593Smuzhiyun extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun /**
289*4882a593Smuzhiyun  * Log correctable errors.
290*4882a593Smuzhiyun  *
291*4882a593Smuzhiyun  * @edac_dev: pointer to struct &edac_device_ctl_info
292*4882a593Smuzhiyun  * @inst_nr: number of the instance where the CE error happened
293*4882a593Smuzhiyun  * @count: Number of errors to log.
294*4882a593Smuzhiyun  * @block_nr: number of the block where the CE error happened
295*4882a593Smuzhiyun  * @msg: message to be printed
296*4882a593Smuzhiyun  */
297*4882a593Smuzhiyun void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
298*4882a593Smuzhiyun 				 unsigned int count, int inst_nr, int block_nr,
299*4882a593Smuzhiyun 				 const char *msg);
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun /**
302*4882a593Smuzhiyun  * Log uncorrectable errors.
303*4882a593Smuzhiyun  *
304*4882a593Smuzhiyun  * @edac_dev: pointer to struct &edac_device_ctl_info
305*4882a593Smuzhiyun  * @inst_nr: number of the instance where the CE error happened
306*4882a593Smuzhiyun  * @count: Number of errors to log.
307*4882a593Smuzhiyun  * @block_nr: number of the block where the CE error happened
308*4882a593Smuzhiyun  * @msg: message to be printed
309*4882a593Smuzhiyun  */
310*4882a593Smuzhiyun void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
311*4882a593Smuzhiyun 				 unsigned int count, int inst_nr, int block_nr,
312*4882a593Smuzhiyun 				 const char *msg);
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun /**
315*4882a593Smuzhiyun  * edac_device_handle_ce(): Log a single correctable error
316*4882a593Smuzhiyun  *
317*4882a593Smuzhiyun  * @edac_dev: pointer to struct &edac_device_ctl_info
318*4882a593Smuzhiyun  * @inst_nr: number of the instance where the CE error happened
319*4882a593Smuzhiyun  * @block_nr: number of the block where the CE error happened
320*4882a593Smuzhiyun  * @msg: message to be printed
321*4882a593Smuzhiyun  */
322*4882a593Smuzhiyun static inline void
edac_device_handle_ce(struct edac_device_ctl_info * edac_dev,int inst_nr,int block_nr,const char * msg)323*4882a593Smuzhiyun edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
324*4882a593Smuzhiyun 		      int block_nr, const char *msg)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun 	edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun /**
330*4882a593Smuzhiyun  * edac_device_handle_ue(): Log a single uncorrectable error
331*4882a593Smuzhiyun  *
332*4882a593Smuzhiyun  * @edac_dev: pointer to struct &edac_device_ctl_info
333*4882a593Smuzhiyun  * @inst_nr: number of the instance where the UE error happened
334*4882a593Smuzhiyun  * @block_nr: number of the block where the UE error happened
335*4882a593Smuzhiyun  * @msg: message to be printed
336*4882a593Smuzhiyun  */
337*4882a593Smuzhiyun static inline void
edac_device_handle_ue(struct edac_device_ctl_info * edac_dev,int inst_nr,int block_nr,const char * msg)338*4882a593Smuzhiyun edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
339*4882a593Smuzhiyun 		      int block_nr, const char *msg)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun 	edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun /**
345*4882a593Smuzhiyun  * edac_device_alloc_index: Allocate a unique device index number
346*4882a593Smuzhiyun  *
347*4882a593Smuzhiyun  * Returns:
348*4882a593Smuzhiyun  *	allocated index number
349*4882a593Smuzhiyun  */
350*4882a593Smuzhiyun extern int edac_device_alloc_index(void);
351*4882a593Smuzhiyun extern const char *edac_layer_name[];
352*4882a593Smuzhiyun #endif
353