1*4882a593Smuzhiyun /* 2*4882a593Smuzhiyun * Defines, structures, APIs for edac_mc module 3*4882a593Smuzhiyun * 4*4882a593Smuzhiyun * (C) 2007 Linux Networx (http://lnxi.com) 5*4882a593Smuzhiyun * This file may be distributed under the terms of the 6*4882a593Smuzhiyun * GNU General Public License. 7*4882a593Smuzhiyun * 8*4882a593Smuzhiyun * Written by Thayne Harbaugh 9*4882a593Smuzhiyun * Based on work by Dan Hollis <goemon at anime dot net> and others. 10*4882a593Smuzhiyun * http://www.anime.net/~goemon/linux-ecc/ 11*4882a593Smuzhiyun * 12*4882a593Smuzhiyun * NMI handling support added by 13*4882a593Smuzhiyun * Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com> 14*4882a593Smuzhiyun * 15*4882a593Smuzhiyun * Refactored for multi-source files: 16*4882a593Smuzhiyun * Doug Thompson <norsk5@xmission.com> 17*4882a593Smuzhiyun * 18*4882a593Smuzhiyun * Please look at Documentation/driver-api/edac.rst for more info about 19*4882a593Smuzhiyun * EDAC core structs and functions. 20*4882a593Smuzhiyun */ 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun #ifndef _EDAC_MC_H_ 23*4882a593Smuzhiyun #define _EDAC_MC_H_ 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun #include <linux/kernel.h> 26*4882a593Smuzhiyun #include <linux/types.h> 27*4882a593Smuzhiyun #include <linux/module.h> 28*4882a593Smuzhiyun #include <linux/spinlock.h> 29*4882a593Smuzhiyun #include <linux/smp.h> 30*4882a593Smuzhiyun #include <linux/pci.h> 31*4882a593Smuzhiyun #include <linux/time.h> 32*4882a593Smuzhiyun #include <linux/nmi.h> 33*4882a593Smuzhiyun #include <linux/rcupdate.h> 34*4882a593Smuzhiyun #include <linux/completion.h> 35*4882a593Smuzhiyun #include <linux/kobject.h> 36*4882a593Smuzhiyun #include <linux/platform_device.h> 37*4882a593Smuzhiyun #include <linux/workqueue.h> 38*4882a593Smuzhiyun #include <linux/edac.h> 39*4882a593Smuzhiyun 40*4882a593Smuzhiyun #if PAGE_SHIFT < 20 41*4882a593Smuzhiyun #define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT)) 42*4882a593Smuzhiyun #define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 43*4882a593Smuzhiyun #else /* PAGE_SHIFT > 20 */ 44*4882a593Smuzhiyun #define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20)) 45*4882a593Smuzhiyun #define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20)) 46*4882a593Smuzhiyun #endif 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun #define edac_printk(level, prefix, fmt, arg...) \ 49*4882a593Smuzhiyun printk(level "EDAC " prefix ": " fmt, ##arg) 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun #define edac_mc_printk(mci, level, fmt, arg...) \ 52*4882a593Smuzhiyun printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg) 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ 55*4882a593Smuzhiyun printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun #define edac_device_printk(ctl, level, fmt, arg...) \ 58*4882a593Smuzhiyun printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg) 59*4882a593Smuzhiyun 60*4882a593Smuzhiyun #define edac_pci_printk(ctl, level, fmt, arg...) \ 61*4882a593Smuzhiyun printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg) 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun /* prefixes for edac_printk() and edac_mc_printk() */ 64*4882a593Smuzhiyun #define EDAC_MC "MC" 65*4882a593Smuzhiyun #define EDAC_PCI "PCI" 66*4882a593Smuzhiyun #define EDAC_DEBUG "DEBUG" 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun extern const char * const edac_mem_types[]; 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun #ifdef CONFIG_EDAC_DEBUG 71*4882a593Smuzhiyun extern int edac_debug_level; 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun #define edac_dbg(level, fmt, ...) \ 74*4882a593Smuzhiyun do { \ 75*4882a593Smuzhiyun if (level <= edac_debug_level) \ 76*4882a593Smuzhiyun edac_printk(KERN_DEBUG, EDAC_DEBUG, \ 77*4882a593Smuzhiyun "%s: " fmt, __func__, ##__VA_ARGS__); \ 78*4882a593Smuzhiyun } while (0) 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun #else /* !CONFIG_EDAC_DEBUG */ 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun #define edac_dbg(level, fmt, ...) \ 83*4882a593Smuzhiyun do { \ 84*4882a593Smuzhiyun if (0) \ 85*4882a593Smuzhiyun edac_printk(KERN_DEBUG, EDAC_DEBUG, \ 86*4882a593Smuzhiyun "%s: " fmt, __func__, ##__VA_ARGS__); \ 87*4882a593Smuzhiyun } while (0) 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun #endif /* !CONFIG_EDAC_DEBUG */ 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \ 92*4882a593Smuzhiyun PCI_DEVICE_ID_ ## vend ## _ ## dev 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun #define edac_dev_name(dev) (dev)->dev_name 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun #define to_mci(k) container_of(k, struct mem_ctl_info, dev) 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun /** 99*4882a593Smuzhiyun * edac_mc_alloc() - Allocate and partially fill a struct &mem_ctl_info. 100*4882a593Smuzhiyun * 101*4882a593Smuzhiyun * @mc_num: Memory controller number 102*4882a593Smuzhiyun * @n_layers: Number of MC hierarchy layers 103*4882a593Smuzhiyun * @layers: Describes each layer as seen by the Memory Controller 104*4882a593Smuzhiyun * @sz_pvt: size of private storage needed 105*4882a593Smuzhiyun * 106*4882a593Smuzhiyun * 107*4882a593Smuzhiyun * Everything is kmalloc'ed as one big chunk - more efficient. 108*4882a593Smuzhiyun * Only can be used if all structures have the same lifetime - otherwise 109*4882a593Smuzhiyun * you have to allocate and initialize your own structures. 110*4882a593Smuzhiyun * 111*4882a593Smuzhiyun * Use edac_mc_free() to free mc structures allocated by this function. 112*4882a593Smuzhiyun * 113*4882a593Smuzhiyun * .. note:: 114*4882a593Smuzhiyun * 115*4882a593Smuzhiyun * drivers handle multi-rank memories in different ways: in some 116*4882a593Smuzhiyun * drivers, one multi-rank memory stick is mapped as one entry, while, in 117*4882a593Smuzhiyun * others, a single multi-rank memory stick would be mapped into several 118*4882a593Smuzhiyun * entries. Currently, this function will allocate multiple struct dimm_info 119*4882a593Smuzhiyun * on such scenarios, as grouping the multiple ranks require drivers change. 120*4882a593Smuzhiyun * 121*4882a593Smuzhiyun * Returns: 122*4882a593Smuzhiyun * On success, return a pointer to struct mem_ctl_info pointer; 123*4882a593Smuzhiyun * %NULL otherwise 124*4882a593Smuzhiyun */ 125*4882a593Smuzhiyun struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, 126*4882a593Smuzhiyun unsigned int n_layers, 127*4882a593Smuzhiyun struct edac_mc_layer *layers, 128*4882a593Smuzhiyun unsigned int sz_pvt); 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun /** 131*4882a593Smuzhiyun * edac_get_owner - Return the owner's mod_name of EDAC MC 132*4882a593Smuzhiyun * 133*4882a593Smuzhiyun * Returns: 134*4882a593Smuzhiyun * Pointer to mod_name string when EDAC MC is owned. NULL otherwise. 135*4882a593Smuzhiyun */ 136*4882a593Smuzhiyun extern const char *edac_get_owner(void); 137*4882a593Smuzhiyun 138*4882a593Smuzhiyun /* 139*4882a593Smuzhiyun * edac_mc_add_mc_with_groups() - Insert the @mci structure into the mci 140*4882a593Smuzhiyun * global list and create sysfs entries associated with @mci structure. 141*4882a593Smuzhiyun * 142*4882a593Smuzhiyun * @mci: pointer to the mci structure to be added to the list 143*4882a593Smuzhiyun * @groups: optional attribute groups for the driver-specific sysfs entries 144*4882a593Smuzhiyun * 145*4882a593Smuzhiyun * Returns: 146*4882a593Smuzhiyun * 0 on Success, or an error code on failure 147*4882a593Smuzhiyun */ 148*4882a593Smuzhiyun extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci, 149*4882a593Smuzhiyun const struct attribute_group **groups); 150*4882a593Smuzhiyun #define edac_mc_add_mc(mci) edac_mc_add_mc_with_groups(mci, NULL) 151*4882a593Smuzhiyun 152*4882a593Smuzhiyun /** 153*4882a593Smuzhiyun * edac_mc_free() - Frees a previously allocated @mci structure 154*4882a593Smuzhiyun * 155*4882a593Smuzhiyun * @mci: pointer to a struct mem_ctl_info structure 156*4882a593Smuzhiyun */ 157*4882a593Smuzhiyun extern void edac_mc_free(struct mem_ctl_info *mci); 158*4882a593Smuzhiyun 159*4882a593Smuzhiyun /** 160*4882a593Smuzhiyun * edac_has_mcs() - Check if any MCs have been allocated. 161*4882a593Smuzhiyun * 162*4882a593Smuzhiyun * Returns: 163*4882a593Smuzhiyun * True if MC instances have been registered successfully. 164*4882a593Smuzhiyun * False otherwise. 165*4882a593Smuzhiyun */ 166*4882a593Smuzhiyun extern bool edac_has_mcs(void); 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun /** 169*4882a593Smuzhiyun * edac_mc_find() - Search for a mem_ctl_info structure whose index is @idx. 170*4882a593Smuzhiyun * 171*4882a593Smuzhiyun * @idx: index to be seek 172*4882a593Smuzhiyun * 173*4882a593Smuzhiyun * If found, return a pointer to the structure. 174*4882a593Smuzhiyun * Else return NULL. 175*4882a593Smuzhiyun */ 176*4882a593Smuzhiyun extern struct mem_ctl_info *edac_mc_find(int idx); 177*4882a593Smuzhiyun 178*4882a593Smuzhiyun /** 179*4882a593Smuzhiyun * find_mci_by_dev() - Scan list of controllers looking for the one that 180*4882a593Smuzhiyun * manages the @dev device. 181*4882a593Smuzhiyun * 182*4882a593Smuzhiyun * @dev: pointer to a struct device related with the MCI 183*4882a593Smuzhiyun * 184*4882a593Smuzhiyun * Returns: on success, returns a pointer to struct &mem_ctl_info; 185*4882a593Smuzhiyun * %NULL otherwise. 186*4882a593Smuzhiyun */ 187*4882a593Smuzhiyun extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); 188*4882a593Smuzhiyun 189*4882a593Smuzhiyun /** 190*4882a593Smuzhiyun * edac_mc_del_mc() - Remove sysfs entries for mci structure associated with 191*4882a593Smuzhiyun * @dev and remove mci structure from global list. 192*4882a593Smuzhiyun * 193*4882a593Smuzhiyun * @dev: Pointer to struct &device representing mci structure to remove. 194*4882a593Smuzhiyun * 195*4882a593Smuzhiyun * Returns: pointer to removed mci structure, or %NULL if device not found. 196*4882a593Smuzhiyun */ 197*4882a593Smuzhiyun extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun /** 200*4882a593Smuzhiyun * edac_mc_find_csrow_by_page() - Ancillary routine to identify what csrow 201*4882a593Smuzhiyun * contains a memory page. 202*4882a593Smuzhiyun * 203*4882a593Smuzhiyun * @mci: pointer to a struct mem_ctl_info structure 204*4882a593Smuzhiyun * @page: memory page to find 205*4882a593Smuzhiyun * 206*4882a593Smuzhiyun * Returns: on success, returns the csrow. -1 if not found. 207*4882a593Smuzhiyun */ 208*4882a593Smuzhiyun extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, 209*4882a593Smuzhiyun unsigned long page); 210*4882a593Smuzhiyun 211*4882a593Smuzhiyun /** 212*4882a593Smuzhiyun * edac_raw_mc_handle_error() - Reports a memory event to userspace without 213*4882a593Smuzhiyun * doing anything to discover the error location. 214*4882a593Smuzhiyun * 215*4882a593Smuzhiyun * @e: error description 216*4882a593Smuzhiyun * 217*4882a593Smuzhiyun * This raw function is used internally by edac_mc_handle_error(). It should 218*4882a593Smuzhiyun * only be called directly when the hardware error come directly from BIOS, 219*4882a593Smuzhiyun * like in the case of APEI GHES driver. 220*4882a593Smuzhiyun */ 221*4882a593Smuzhiyun void edac_raw_mc_handle_error(struct edac_raw_error_desc *e); 222*4882a593Smuzhiyun 223*4882a593Smuzhiyun /** 224*4882a593Smuzhiyun * edac_mc_handle_error() - Reports a memory event to userspace. 225*4882a593Smuzhiyun * 226*4882a593Smuzhiyun * @type: severity of the error (CE/UE/Fatal) 227*4882a593Smuzhiyun * @mci: a struct mem_ctl_info pointer 228*4882a593Smuzhiyun * @error_count: Number of errors of the same type 229*4882a593Smuzhiyun * @page_frame_number: mem page where the error occurred 230*4882a593Smuzhiyun * @offset_in_page: offset of the error inside the page 231*4882a593Smuzhiyun * @syndrome: ECC syndrome 232*4882a593Smuzhiyun * @top_layer: Memory layer[0] position 233*4882a593Smuzhiyun * @mid_layer: Memory layer[1] position 234*4882a593Smuzhiyun * @low_layer: Memory layer[2] position 235*4882a593Smuzhiyun * @msg: Message meaningful to the end users that 236*4882a593Smuzhiyun * explains the event 237*4882a593Smuzhiyun * @other_detail: Technical details about the event that 238*4882a593Smuzhiyun * may help hardware manufacturers and 239*4882a593Smuzhiyun * EDAC developers to analyse the event 240*4882a593Smuzhiyun */ 241*4882a593Smuzhiyun void edac_mc_handle_error(const enum hw_event_mc_err_type type, 242*4882a593Smuzhiyun struct mem_ctl_info *mci, 243*4882a593Smuzhiyun const u16 error_count, 244*4882a593Smuzhiyun const unsigned long page_frame_number, 245*4882a593Smuzhiyun const unsigned long offset_in_page, 246*4882a593Smuzhiyun const unsigned long syndrome, 247*4882a593Smuzhiyun const int top_layer, 248*4882a593Smuzhiyun const int mid_layer, 249*4882a593Smuzhiyun const int low_layer, 250*4882a593Smuzhiyun const char *msg, 251*4882a593Smuzhiyun const char *other_detail); 252*4882a593Smuzhiyun 253*4882a593Smuzhiyun /* 254*4882a593Smuzhiyun * edac misc APIs 255*4882a593Smuzhiyun */ 256*4882a593Smuzhiyun extern char *edac_op_state_to_string(int op_state); 257*4882a593Smuzhiyun 258*4882a593Smuzhiyun #endif /* _EDAC_MC_H_ */ 259