diff options
Diffstat (limited to 'drivers/edac/edac_pci_sysfs.c')
-rw-r--r-- | drivers/edac/edac_pci_sysfs.c | 620 |
1 files changed, 620 insertions, 0 deletions
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c new file mode 100644 index 00000000000..fac94cae2c3 --- /dev/null +++ b/drivers/edac/edac_pci_sysfs.c @@ -0,0 +1,620 @@ +/* + * (C) 2005, 2006 Linux Networx (http://lnxi.com) + * This file may be distributed under the terms of the + * GNU General Public License. + * + * Written Doug Thompson <norsk5@xmission.com> + * + */ +#include <linux/module.h> +#include <linux/sysdev.h> +#include <linux/ctype.h> + +#include "edac_core.h" +#include "edac_module.h" + +#ifdef CONFIG_PCI + +#define EDAC_PCI_SYMLINK "device" + +static int check_pci_errors; /* default YES check PCI parity */ +static int edac_pci_panic_on_pe; /* default no panic on PCI Parity */ +static int edac_pci_log_pe = 1; /* log PCI parity errors */ +static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */ +static atomic_t pci_parity_count = ATOMIC_INIT(0); +static atomic_t pci_nonparity_count = ATOMIC_INIT(0); +static int edac_pci_poll_msec = 1000; + +static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ +static struct completion edac_pci_kobj_complete; +static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0); + +int edac_pci_get_check_errors(void) +{ + return check_pci_errors; +} + +int edac_pci_get_log_pe(void) +{ + return edac_pci_log_pe; +} + +int edac_pci_get_log_npe(void) +{ + return edac_pci_log_npe; +} + +int edac_pci_get_panic_on_pe(void) +{ + return edac_pci_panic_on_pe; +} + +int edac_pci_get_poll_msec(void) +{ + return edac_pci_poll_msec; +} + +/**************************** EDAC PCI sysfs instance *******************/ +static ssize_t instance_pe_count_show(struct edac_pci_ctl_info *pci, char *data) +{ + return sprintf(data, "%u\n", atomic_read(&pci->counters.pe_count)); +} + +static ssize_t instance_npe_count_show(struct edac_pci_ctl_info *pci, + char *data) +{ + return sprintf(data, "%u\n", atomic_read(&pci->counters.npe_count)); +} + +#define to_instance(k) container_of(k, struct edac_pci_ctl_info, kobj) +#define to_instance_attr(a) container_of(a, struct instance_attribute, attr) + +/* DEVICE instance kobject release() function */ +static void edac_pci_instance_release(struct kobject *kobj) +{ + struct edac_pci_ctl_info *pci; + + debugf1("%s()\n", __func__); + + pci = to_instance(kobj); + complete(&pci->kobj_complete); +} + +/* instance specific attribute structure */ +struct instance_attribute { + struct attribute attr; + ssize_t(*show) (struct edac_pci_ctl_info *, char *); + ssize_t(*store) (struct edac_pci_ctl_info *, const char *, size_t); +}; + +/* Function to 'show' fields from the edac_pci 'instance' structure */ +static ssize_t edac_pci_instance_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + struct edac_pci_ctl_info *pci = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->show) + return instance_attr->show(pci, buffer); + return -EIO; +} + +/* Function to 'store' fields into the edac_pci 'instance' structure */ +static ssize_t edac_pci_instance_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct edac_pci_ctl_info *pci = to_instance(kobj); + struct instance_attribute *instance_attr = to_instance_attr(attr); + + if (instance_attr->store) + return instance_attr->store(pci, buffer, count); + return -EIO; +} + +static struct sysfs_ops pci_instance_ops = { + .show = edac_pci_instance_show, + .store = edac_pci_instance_store +}; + +#define INSTANCE_ATTR(_name, _mode, _show, _store) \ +static struct instance_attribute attr_instance_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +INSTANCE_ATTR(pe_count, S_IRUGO, instance_pe_count_show, NULL); +INSTANCE_ATTR(npe_count, S_IRUGO, instance_npe_count_show, NULL); + +/* pci instance attributes */ +static struct instance_attribute *pci_instance_attr[] = { + &attr_instance_pe_count, + &attr_instance_npe_count, + NULL +}; + +/* the ktype for pci instance */ +static struct kobj_type ktype_pci_instance = { + .release = edac_pci_instance_release, + .sysfs_ops = &pci_instance_ops, + .default_attrs = (struct attribute **)pci_instance_attr, +}; + +static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx) +{ + int err; + + pci->kobj.parent = &edac_pci_kobj; + pci->kobj.ktype = &ktype_pci_instance; + + err = kobject_set_name(&pci->kobj, "pci%d", idx); + if (err) + return err; + + err = kobject_register(&pci->kobj); + if (err != 0) { + debugf2("%s() failed to register instance pci%d\n", + __func__, idx); + return err; + } + + debugf1("%s() Register instance 'pci%d' kobject\n", __func__, idx); + + return 0; +} + +static void +edac_pci_delete_instance_kobj(struct edac_pci_ctl_info *pci, int idx) +{ + init_completion(&pci->kobj_complete); + kobject_unregister(&pci->kobj); + wait_for_completion(&pci->kobj_complete); +} + +/***************************** EDAC PCI sysfs root **********************/ +#define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj) +#define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr) + +static ssize_t edac_pci_int_show(void *ptr, char *buffer) +{ + int *value = ptr; + return sprintf(buffer, "%d\n", *value); +} + +static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count) +{ + int *value = ptr; + + if (isdigit(*buffer)) + *value = simple_strtoul(buffer, NULL, 0); + + return count; +} + +struct edac_pci_dev_attribute { + struct attribute attr; + void *value; + ssize_t(*show) (void *, char *); + ssize_t(*store) (void *, const char *, size_t); +}; + +/* Set of show/store abstract level functions for PCI Parity object */ +static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + struct edac_pci_dev_attribute *edac_pci_dev; + edac_pci_dev = (struct edac_pci_dev_attribute *)attr; + + if (edac_pci_dev->show) + return edac_pci_dev->show(edac_pci_dev->value, buffer); + return -EIO; +} + +static ssize_t edac_pci_dev_store(struct kobject *kobj, + struct attribute *attr, const char *buffer, + size_t count) +{ + struct edac_pci_dev_attribute *edac_pci_dev; + edac_pci_dev = (struct edac_pci_dev_attribute *)attr; + + if (edac_pci_dev->show) + return edac_pci_dev->store(edac_pci_dev->value, buffer, count); + return -EIO; +} + +static struct sysfs_ops edac_pci_sysfs_ops = { + .show = edac_pci_dev_show, + .store = edac_pci_dev_store +}; + +#define EDAC_PCI_ATTR(_name,_mode,_show,_store) \ +static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = &_name, \ + .show = _show, \ + .store = _store, \ +}; + +#define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \ +static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .value = _data, \ + .show = _show, \ + .store = _store, \ +}; + +/* PCI Parity control files */ +EDAC_PCI_ATTR(check_pci_errors, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_log_pe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_log_npe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(edac_pci_panic_on_pe, S_IRUGO | S_IWUSR, edac_pci_int_show, + edac_pci_int_store); +EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); +EDAC_PCI_ATTR(pci_nonparity_count, S_IRUGO, edac_pci_int_show, NULL); + +/* Base Attributes of the memory ECC object */ +static struct edac_pci_dev_attribute *edac_pci_attr[] = { + &edac_pci_attr_check_pci_errors, + &edac_pci_attr_edac_pci_log_pe, + &edac_pci_attr_edac_pci_log_npe, + &edac_pci_attr_edac_pci_panic_on_pe, + &edac_pci_attr_pci_parity_count, + &edac_pci_attr_pci_nonparity_count, + NULL, +}; + +/* No memory to release */ +static void edac_pci_release(struct kobject *kobj) +{ + struct edac_pci_ctl_info *pci; + + pci = to_edacpci(kobj); + + debugf1("%s()\n", __func__); + complete(&pci->kobj_complete); +} + +static struct kobj_type ktype_edac_pci = { + .release = edac_pci_release, + .sysfs_ops = &edac_pci_sysfs_ops, + .default_attrs = (struct attribute **)edac_pci_attr, +}; + +/** + * edac_sysfs_pci_setup() + * + * setup the sysfs for EDAC PCI attributes + * assumes edac_class has already been initialized + */ +int edac_pci_register_main_kobj(void) +{ + int err; + struct sysdev_class *edac_class; + + debugf1("%s()\n", __func__); + + edac_class = edac_get_edac_class(); + if (edac_class == NULL) { + debugf1("%s() no edac_class\n", __func__); + return -ENODEV; + } + + edac_pci_kobj.ktype = &ktype_edac_pci; + + edac_pci_kobj.parent = &edac_class->kset.kobj; + + err = kobject_set_name(&edac_pci_kobj, "pci"); + if (err) + return err; + + /* Instanstiate the pci object */ + /* FIXME: maybe new sysdev_create_subdir() */ + err = kobject_register(&edac_pci_kobj); + + if (err) { + debugf1("Failed to register '.../edac/pci'\n"); + return err; + } + + debugf1("Registered '.../edac/pci' kobject\n"); + + return 0; +} + +/* + * edac_pci_unregister_main_kobj() + * + * perform the sysfs teardown for the PCI attributes + */ +void edac_pci_unregister_main_kobj(void) +{ + debugf0("%s()\n", __func__); + init_completion(&edac_pci_kobj_complete); + kobject_unregister(&edac_pci_kobj); + wait_for_completion(&edac_pci_kobj_complete); +} + +int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci) +{ + int err; + struct kobject *edac_kobj = &pci->kobj; + + if (atomic_inc_return(&edac_pci_sysfs_refcount) == 1) { + err = edac_pci_register_main_kobj(); + if (err) { + atomic_dec(&edac_pci_sysfs_refcount); + return err; + } + } + + err = edac_pci_create_instance_kobj(pci, pci->pci_idx); + if (err) { + if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) + edac_pci_unregister_main_kobj(); + } + + debugf0("%s() idx=%d\n", __func__, pci->pci_idx); + + err = sysfs_create_link(edac_kobj, &pci->dev->kobj, EDAC_PCI_SYMLINK); + if (err) { + debugf0("%s() sysfs_create_link() returned err= %d\n", + __func__, err); + return err; + } + + return 0; +} + +void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci) +{ + debugf0("%s()\n", __func__); + + edac_pci_delete_instance_kobj(pci, pci->pci_idx); + + sysfs_remove_link(&pci->kobj, EDAC_PCI_SYMLINK); + + if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) + edac_pci_unregister_main_kobj(); +} + +/************************ PCI error handling *************************/ +static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) +{ + int where; + u16 status; + + where = secondary ? PCI_SEC_STATUS : PCI_STATUS; + pci_read_config_word(dev, where, &status); + + /* If we get back 0xFFFF then we must suspect that the card has been + * pulled but the Linux PCI layer has not yet finished cleaning up. + * We don't want to report on such devices + */ + + if (status == 0xFFFF) { + u32 sanity; + + pci_read_config_dword(dev, 0, &sanity); + + if (sanity == 0xFFFFFFFF) + return 0; + } + + status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | + PCI_STATUS_PARITY; + + if (status) + /* reset only the bits we are interested in */ + pci_write_config_word(dev, where, status); + + return status; +} + +typedef void (*pci_parity_check_fn_t) (struct pci_dev * dev); + +/* Clear any PCI parity errors logged by this device. */ +static void edac_pci_dev_parity_clear(struct pci_dev *dev) +{ + u8 header_type; + + get_pci_parity_status(dev, 0); + + /* read the device TYPE, looking for bridges */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) + get_pci_parity_status(dev, 1); +} + +/* + * PCI Parity polling + * + */ +static void edac_pci_dev_parity_test(struct pci_dev *dev) +{ + u16 status; + u8 header_type; + + /* read the STATUS register on this device + */ + status = get_pci_parity_status(dev, 0); + + debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id); + + /* check the status reg for errors */ + if (status) { + if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Signaled System Error on %s\n", + pci_name(dev)); + atomic_inc(&pci_nonparity_count); + } + + if (status & (PCI_STATUS_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Master Data Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + + if (status & (PCI_STATUS_DETECTED_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, + "Detected Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + } + + /* read the device TYPE, looking for bridges */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id); + + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + /* On bridges, need to examine secondary status register */ + status = get_pci_parity_status(dev, 1); + + debugf2("PCI SEC_STATUS= 0x%04x %s\n", status, dev->dev.bus_id); + + /* check the secondary status reg for errors */ + if (status) { + if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Signaled System Error on %s\n", + pci_name(dev)); + atomic_inc(&pci_nonparity_count); + } + + if (status & (PCI_STATUS_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Master Data Parity Error on " + "%s\n", pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + + if (status & (PCI_STATUS_DETECTED_PARITY)) { + edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " + "Detected Parity Error on %s\n", + pci_name(dev)); + + atomic_inc(&pci_parity_count); + } + } + } +} + +/* + * pci_dev parity list iterator + * Scan the PCI device list for one iteration, looking for SERRORs + * Master Parity ERRORS or Parity ERRORs on primary or secondary devices + */ +static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) +{ + struct pci_dev *dev = NULL; + + /* request for kernel access to the next PCI device, if any, + * and while we are looking at it have its reference count + * bumped until we are done with it + */ + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + fn(dev); + } +} + +/* + * edac_pci_do_parity_check + * + * performs the actual PCI parity check operation + */ +void edac_pci_do_parity_check(void) +{ + unsigned long flags; + int before_count; + + debugf3("%s()\n", __func__); + + if (!check_pci_errors) + return; + + before_count = atomic_read(&pci_parity_count); + + /* scan all PCI devices looking for a Parity Error on devices and + * bridges + */ + local_irq_save(flags); + edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); + local_irq_restore(flags); + + /* Only if operator has selected panic on PCI Error */ + if (edac_pci_get_panic_on_pe()) { + /* If the count is different 'after' from 'before' */ + if (before_count != atomic_read(&pci_parity_count)) + panic("EDAC: PCI Parity Error"); + } +} + +void edac_pci_clear_parity_errors(void) +{ + /* Clear any PCI bus parity errors that devices initially have logged + * in their registers. + */ + edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); +} +void edac_pci_handle_pe(struct edac_pci_ctl_info *pci, const char *msg) +{ + + /* global PE counter incremented by edac_pci_do_parity_check() */ + atomic_inc(&pci->counters.pe_count); + + if (edac_pci_get_log_pe()) + edac_pci_printk(pci, KERN_WARNING, + "Parity Error ctl: %s %d: %s\n", + pci->ctl_name, pci->pci_idx, msg); + + /* + * poke all PCI devices and see which one is the troublemaker + * panic() is called if set + */ + edac_pci_do_parity_check(); +} + +EXPORT_SYMBOL_GPL(edac_pci_handle_pe); + +void edac_pci_handle_npe(struct edac_pci_ctl_info *pci, const char *msg) +{ + + /* global NPE counter incremented by edac_pci_do_parity_check() */ + atomic_inc(&pci->counters.npe_count); + + if (edac_pci_get_log_npe()) + edac_pci_printk(pci, KERN_WARNING, + "Non-Parity Error ctl: %s %d: %s\n", + pci->ctl_name, pci->pci_idx, msg); + + /* + * poke all PCI devices and see which one is the troublemaker + * panic() is called if set + */ + edac_pci_do_parity_check(); +} + +EXPORT_SYMBOL_GPL(edac_pci_handle_npe); + +/* + * Define the PCI parameter to the module + */ +module_param(check_pci_errors, int, 0644); +MODULE_PARM_DESC(check_pci_errors, + "Check for PCI bus parity errors: 0=off 1=on"); +module_param(edac_pci_panic_on_pe, int, 0644); +MODULE_PARM_DESC(edac_pci_panic_on_pe, + "Panic on PCI Bus Parity error: 0=off 1=on"); + +#endif /* CONFIG_PCI */ |