1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Defines, structures, APIs for edac_device
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * (C) 2007 Linux Networx (http://lnxi.com)
5*4882a593Smuzhiyun * This file may be distributed under the terms of the
6*4882a593Smuzhiyun * GNU General Public License.
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * Written by Thayne Harbaugh
9*4882a593Smuzhiyun * Based on work by Dan Hollis <goemon at anime dot net> and others.
10*4882a593Smuzhiyun * http://www.anime.net/~goemon/linux-ecc/
11*4882a593Smuzhiyun *
12*4882a593Smuzhiyun * NMI handling support added by
13*4882a593Smuzhiyun * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * Refactored for multi-source files:
16*4882a593Smuzhiyun * Doug Thompson <norsk5@xmission.com>
17*4882a593Smuzhiyun *
18*4882a593Smuzhiyun * Please look at Documentation/driver-api/edac.rst for more info about
19*4882a593Smuzhiyun * EDAC core structs and functions.
20*4882a593Smuzhiyun */
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #ifndef _EDAC_DEVICE_H_
23*4882a593Smuzhiyun #define _EDAC_DEVICE_H_
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun #include <linux/completion.h>
26*4882a593Smuzhiyun #include <linux/device.h>
27*4882a593Smuzhiyun #include <linux/edac.h>
28*4882a593Smuzhiyun #include <linux/kobject.h>
29*4882a593Smuzhiyun #include <linux/list.h>
30*4882a593Smuzhiyun #include <linux/types.h>
31*4882a593Smuzhiyun #include <linux/sysfs.h>
32*4882a593Smuzhiyun #include <linux/workqueue.h>
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun /*
36*4882a593Smuzhiyun * The following are the structures to provide for a generic
37*4882a593Smuzhiyun * or abstract 'edac_device'. This set of structures and the
38*4882a593Smuzhiyun * code that implements the APIs for the same, provide for
39*4882a593Smuzhiyun * registering EDAC type devices which are NOT standard memory.
40*4882a593Smuzhiyun *
41*4882a593Smuzhiyun * CPU caches (L1 and L2)
42*4882a593Smuzhiyun * DMA engines
43*4882a593Smuzhiyun * Core CPU switches
44*4882a593Smuzhiyun * Fabric switch units
45*4882a593Smuzhiyun * PCIe interface controllers
46*4882a593Smuzhiyun * other EDAC/ECC type devices that can be monitored for
47*4882a593Smuzhiyun * errors, etc.
48*4882a593Smuzhiyun *
49*4882a593Smuzhiyun * It allows for a 2 level set of hierarchy. For example:
50*4882a593Smuzhiyun *
51*4882a593Smuzhiyun * cache could be composed of L1, L2 and L3 levels of cache.
52*4882a593Smuzhiyun * Each CPU core would have its own L1 cache, while sharing
53*4882a593Smuzhiyun * L2 and maybe L3 caches.
54*4882a593Smuzhiyun *
55*4882a593Smuzhiyun * View them arranged, via the sysfs presentation:
56*4882a593Smuzhiyun * /sys/devices/system/edac/..
57*4882a593Smuzhiyun *
58*4882a593Smuzhiyun * mc/ <existing memory device directory>
59*4882a593Smuzhiyun * cpu/cpu0/.. <L1 and L2 block directory>
60*4882a593Smuzhiyun * /L1-cache/ce_count
61*4882a593Smuzhiyun * /ue_count
62*4882a593Smuzhiyun * /L2-cache/ce_count
63*4882a593Smuzhiyun * /ue_count
64*4882a593Smuzhiyun * cpu/cpu1/.. <L1 and L2 block directory>
65*4882a593Smuzhiyun * /L1-cache/ce_count
66*4882a593Smuzhiyun * /ue_count
67*4882a593Smuzhiyun * /L2-cache/ce_count
68*4882a593Smuzhiyun * /ue_count
69*4882a593Smuzhiyun * ...
70*4882a593Smuzhiyun *
71*4882a593Smuzhiyun * the L1 and L2 directories would be "edac_device_block's"
72*4882a593Smuzhiyun */
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun struct edac_device_counter {
75*4882a593Smuzhiyun u32 ue_count;
76*4882a593Smuzhiyun u32 ce_count;
77*4882a593Smuzhiyun };
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun /* forward reference */
80*4882a593Smuzhiyun struct edac_device_ctl_info;
81*4882a593Smuzhiyun struct edac_device_block;
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun /* edac_dev_sysfs_attribute structure
84*4882a593Smuzhiyun * used for driver sysfs attributes in mem_ctl_info
85*4882a593Smuzhiyun * for extra controls and attributes:
86*4882a593Smuzhiyun * like high level error Injection controls
87*4882a593Smuzhiyun */
88*4882a593Smuzhiyun struct edac_dev_sysfs_attribute {
89*4882a593Smuzhiyun struct attribute attr;
90*4882a593Smuzhiyun ssize_t (*show)(struct edac_device_ctl_info *, char *);
91*4882a593Smuzhiyun ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
92*4882a593Smuzhiyun };
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun /* edac_dev_sysfs_block_attribute structure
95*4882a593Smuzhiyun *
96*4882a593Smuzhiyun * used in leaf 'block' nodes for adding controls/attributes
97*4882a593Smuzhiyun *
98*4882a593Smuzhiyun * each block in each instance of the containing control structure
99*4882a593Smuzhiyun * can have an array of the following. The show and store functions
100*4882a593Smuzhiyun * will be filled in with the show/store function in the
101*4882a593Smuzhiyun * low level driver.
102*4882a593Smuzhiyun *
103*4882a593Smuzhiyun * The 'value' field will be the actual value field used for
104*4882a593Smuzhiyun * counting
105*4882a593Smuzhiyun */
106*4882a593Smuzhiyun struct edac_dev_sysfs_block_attribute {
107*4882a593Smuzhiyun struct attribute attr;
108*4882a593Smuzhiyun ssize_t (*show)(struct kobject *, struct attribute *, char *);
109*4882a593Smuzhiyun ssize_t (*store)(struct kobject *, struct attribute *,
110*4882a593Smuzhiyun const char *, size_t);
111*4882a593Smuzhiyun struct edac_device_block *block;
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun unsigned int value;
114*4882a593Smuzhiyun };
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun /* device block control structure */
117*4882a593Smuzhiyun struct edac_device_block {
118*4882a593Smuzhiyun struct edac_device_instance *instance; /* Up Pointer */
119*4882a593Smuzhiyun char name[EDAC_DEVICE_NAME_LEN + 1];
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun struct edac_device_counter counters; /* basic UE and CE counters */
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun int nr_attribs; /* how many attributes */
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun /* this block's attributes, could be NULL */
126*4882a593Smuzhiyun struct edac_dev_sysfs_block_attribute *block_attributes;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun /* edac sysfs device control */
129*4882a593Smuzhiyun struct kobject kobj;
130*4882a593Smuzhiyun };
131*4882a593Smuzhiyun
132*4882a593Smuzhiyun /* device instance control structure */
133*4882a593Smuzhiyun struct edac_device_instance {
134*4882a593Smuzhiyun struct edac_device_ctl_info *ctl; /* Up pointer */
135*4882a593Smuzhiyun char name[EDAC_DEVICE_NAME_LEN + 4];
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun struct edac_device_counter counters; /* instance counters */
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun u32 nr_blocks; /* how many blocks */
140*4882a593Smuzhiyun struct edac_device_block *blocks; /* block array */
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun /* edac sysfs device control */
143*4882a593Smuzhiyun struct kobject kobj;
144*4882a593Smuzhiyun };
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun /*
148*4882a593Smuzhiyun * Abstract edac_device control info structure
149*4882a593Smuzhiyun *
150*4882a593Smuzhiyun */
151*4882a593Smuzhiyun struct edac_device_ctl_info {
152*4882a593Smuzhiyun /* for global list of edac_device_ctl_info structs */
153*4882a593Smuzhiyun struct list_head link;
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun struct module *owner; /* Module owner of this control struct */
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun int dev_idx;
158*4882a593Smuzhiyun
159*4882a593Smuzhiyun /* Per instance controls for this edac_device */
160*4882a593Smuzhiyun int log_ue; /* boolean for logging UEs */
161*4882a593Smuzhiyun int log_ce; /* boolean for logging CEs */
162*4882a593Smuzhiyun int panic_on_ue; /* boolean for panic'ing on an UE */
163*4882a593Smuzhiyun unsigned poll_msec; /* number of milliseconds to poll interval */
164*4882a593Smuzhiyun unsigned long delay; /* number of jiffies for poll_msec */
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun /* Additional top controller level attributes, but specified
167*4882a593Smuzhiyun * by the low level driver.
168*4882a593Smuzhiyun *
169*4882a593Smuzhiyun * Set by the low level driver to provide attributes at the
170*4882a593Smuzhiyun * controller level, same level as 'ue_count' and 'ce_count' above.
171*4882a593Smuzhiyun * An array of structures, NULL terminated
172*4882a593Smuzhiyun *
173*4882a593Smuzhiyun * If attributes are desired, then set to array of attributes
174*4882a593Smuzhiyun * If no attributes are desired, leave NULL
175*4882a593Smuzhiyun */
176*4882a593Smuzhiyun struct edac_dev_sysfs_attribute *sysfs_attributes;
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun /* pointer to main 'edac' subsys in sysfs */
179*4882a593Smuzhiyun struct bus_type *edac_subsys;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun /* the internal state of this controller instance */
182*4882a593Smuzhiyun int op_state;
183*4882a593Smuzhiyun /* work struct for this instance */
184*4882a593Smuzhiyun struct delayed_work work;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun /* pointer to edac polling checking routine:
187*4882a593Smuzhiyun * If NOT NULL: points to polling check routine
188*4882a593Smuzhiyun * If NULL: Then assumes INTERRUPT operation, where
189*4882a593Smuzhiyun * MC driver will receive events
190*4882a593Smuzhiyun */
191*4882a593Smuzhiyun void (*edac_check) (struct edac_device_ctl_info * edac_dev);
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun struct device *dev; /* pointer to device structure */
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun const char *mod_name; /* module name */
196*4882a593Smuzhiyun const char *ctl_name; /* edac controller name */
197*4882a593Smuzhiyun const char *dev_name; /* pci/platform/etc... name */
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun void *pvt_info; /* pointer to 'private driver' info */
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun unsigned long start_time; /* edac_device load start time (jiffies) */
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun struct completion removal_complete;
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun /* sysfs top name under 'edac' directory
206*4882a593Smuzhiyun * and instance name:
207*4882a593Smuzhiyun * cpu/cpu0/...
208*4882a593Smuzhiyun * cpu/cpu1/...
209*4882a593Smuzhiyun * cpu/cpu2/...
210*4882a593Smuzhiyun * ...
211*4882a593Smuzhiyun */
212*4882a593Smuzhiyun char name[EDAC_DEVICE_NAME_LEN + 1];
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun /* Number of instances supported on this control structure
215*4882a593Smuzhiyun * and the array of those instances
216*4882a593Smuzhiyun */
217*4882a593Smuzhiyun u32 nr_instances;
218*4882a593Smuzhiyun struct edac_device_instance *instances;
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun /* Event counters for the this whole EDAC Device */
221*4882a593Smuzhiyun struct edac_device_counter counters;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun /* edac sysfs device control for the 'name'
224*4882a593Smuzhiyun * device this structure controls
225*4882a593Smuzhiyun */
226*4882a593Smuzhiyun struct kobject kobj;
227*4882a593Smuzhiyun };
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /* To get from the instance's wq to the beginning of the ctl structure */
230*4882a593Smuzhiyun #define to_edac_mem_ctl_work(w) \
231*4882a593Smuzhiyun container_of(w, struct mem_ctl_info, work)
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun #define to_edac_device_ctl_work(w) \
234*4882a593Smuzhiyun container_of(w,struct edac_device_ctl_info,work)
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun /*
237*4882a593Smuzhiyun * The alloc() and free() functions for the 'edac_device' control info
238*4882a593Smuzhiyun * structure. A MC driver will allocate one of these for each edac_device
239*4882a593Smuzhiyun * it is going to control/register with the EDAC CORE.
240*4882a593Smuzhiyun */
241*4882a593Smuzhiyun extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
242*4882a593Smuzhiyun unsigned sizeof_private,
243*4882a593Smuzhiyun char *edac_device_name, unsigned nr_instances,
244*4882a593Smuzhiyun char *edac_block_name, unsigned nr_blocks,
245*4882a593Smuzhiyun unsigned offset_value,
246*4882a593Smuzhiyun struct edac_dev_sysfs_block_attribute *block_attributes,
247*4882a593Smuzhiyun unsigned nr_attribs,
248*4882a593Smuzhiyun int device_index);
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun /* The offset value can be:
251*4882a593Smuzhiyun * -1 indicating no offset value
252*4882a593Smuzhiyun * 0 for zero-based block numbers
253*4882a593Smuzhiyun * 1 for 1-based block number
254*4882a593Smuzhiyun * other for other-based block number
255*4882a593Smuzhiyun */
256*4882a593Smuzhiyun #define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun /**
261*4882a593Smuzhiyun * edac_device_add_device: Insert the 'edac_dev' structure into the
262*4882a593Smuzhiyun * edac_device global list and create sysfs entries associated with
263*4882a593Smuzhiyun * edac_device structure.
264*4882a593Smuzhiyun *
265*4882a593Smuzhiyun * @edac_dev: pointer to edac_device structure to be added to the list
266*4882a593Smuzhiyun * 'edac_device' structure.
267*4882a593Smuzhiyun *
268*4882a593Smuzhiyun * Returns:
269*4882a593Smuzhiyun * 0 on Success, or an error code on failure
270*4882a593Smuzhiyun */
271*4882a593Smuzhiyun extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun /**
274*4882a593Smuzhiyun * edac_device_del_device:
275*4882a593Smuzhiyun * Remove sysfs entries for specified edac_device structure and
276*4882a593Smuzhiyun * then remove edac_device structure from global list
277*4882a593Smuzhiyun *
278*4882a593Smuzhiyun * @dev:
279*4882a593Smuzhiyun * Pointer to struct &device representing the edac device
280*4882a593Smuzhiyun * structure to remove.
281*4882a593Smuzhiyun *
282*4882a593Smuzhiyun * Returns:
283*4882a593Smuzhiyun * Pointer to removed edac_device structure,
284*4882a593Smuzhiyun * or %NULL if device not found.
285*4882a593Smuzhiyun */
286*4882a593Smuzhiyun extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun /**
289*4882a593Smuzhiyun * Log correctable errors.
290*4882a593Smuzhiyun *
291*4882a593Smuzhiyun * @edac_dev: pointer to struct &edac_device_ctl_info
292*4882a593Smuzhiyun * @inst_nr: number of the instance where the CE error happened
293*4882a593Smuzhiyun * @count: Number of errors to log.
294*4882a593Smuzhiyun * @block_nr: number of the block where the CE error happened
295*4882a593Smuzhiyun * @msg: message to be printed
296*4882a593Smuzhiyun */
297*4882a593Smuzhiyun void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
298*4882a593Smuzhiyun unsigned int count, int inst_nr, int block_nr,
299*4882a593Smuzhiyun const char *msg);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun /**
302*4882a593Smuzhiyun * Log uncorrectable errors.
303*4882a593Smuzhiyun *
304*4882a593Smuzhiyun * @edac_dev: pointer to struct &edac_device_ctl_info
305*4882a593Smuzhiyun * @inst_nr: number of the instance where the CE error happened
306*4882a593Smuzhiyun * @count: Number of errors to log.
307*4882a593Smuzhiyun * @block_nr: number of the block where the CE error happened
308*4882a593Smuzhiyun * @msg: message to be printed
309*4882a593Smuzhiyun */
310*4882a593Smuzhiyun void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
311*4882a593Smuzhiyun unsigned int count, int inst_nr, int block_nr,
312*4882a593Smuzhiyun const char *msg);
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun /**
315*4882a593Smuzhiyun * edac_device_handle_ce(): Log a single correctable error
316*4882a593Smuzhiyun *
317*4882a593Smuzhiyun * @edac_dev: pointer to struct &edac_device_ctl_info
318*4882a593Smuzhiyun * @inst_nr: number of the instance where the CE error happened
319*4882a593Smuzhiyun * @block_nr: number of the block where the CE error happened
320*4882a593Smuzhiyun * @msg: message to be printed
321*4882a593Smuzhiyun */
322*4882a593Smuzhiyun static inline void
edac_device_handle_ce(struct edac_device_ctl_info * edac_dev,int inst_nr,int block_nr,const char * msg)323*4882a593Smuzhiyun edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, int inst_nr,
324*4882a593Smuzhiyun int block_nr, const char *msg)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun edac_device_handle_ce_count(edac_dev, 1, inst_nr, block_nr, msg);
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun /**
330*4882a593Smuzhiyun * edac_device_handle_ue(): Log a single uncorrectable error
331*4882a593Smuzhiyun *
332*4882a593Smuzhiyun * @edac_dev: pointer to struct &edac_device_ctl_info
333*4882a593Smuzhiyun * @inst_nr: number of the instance where the UE error happened
334*4882a593Smuzhiyun * @block_nr: number of the block where the UE error happened
335*4882a593Smuzhiyun * @msg: message to be printed
336*4882a593Smuzhiyun */
337*4882a593Smuzhiyun static inline void
edac_device_handle_ue(struct edac_device_ctl_info * edac_dev,int inst_nr,int block_nr,const char * msg)338*4882a593Smuzhiyun edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, int inst_nr,
339*4882a593Smuzhiyun int block_nr, const char *msg)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun edac_device_handle_ue_count(edac_dev, 1, inst_nr, block_nr, msg);
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun
344*4882a593Smuzhiyun /**
345*4882a593Smuzhiyun * edac_device_alloc_index: Allocate a unique device index number
346*4882a593Smuzhiyun *
347*4882a593Smuzhiyun * Returns:
348*4882a593Smuzhiyun * allocated index number
349*4882a593Smuzhiyun */
350*4882a593Smuzhiyun extern int edac_device_alloc_index(void);
351*4882a593Smuzhiyun extern const char *edac_layer_name[];
352*4882a593Smuzhiyun #endif
353