aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinas Vepstas <linas@linas.org>2005-11-03 18:50:04 -0600
committerPaul Mackerras <paulus@samba.org>2005-11-10 11:38:05 +1100
commit172ca9261800bacbbc7d320d9924d9b482dff8de (patch)
tree7abd6ddf1e6b9a147a0826c374f0d1bca80806d3 /include
parent7f79da7accd63a6adb84f4602f66779f6a701e7b (diff)
[PATCH] ppc64: PCI error event dispatcher
12-eeh-event-dispatcher.patch ppc64: EEH Recovery dispatcher thread This patch adds a mechanism to create recovery threads when an EEH event is received. Since an EEH freeze state may be detected within an interrupt context, we need to get out of the interrupt context before starting recovery. This dispatcher does this in two steps: first, it uses a workqueue to get out, and then lanuches a kernel thread, so that the recovery routine can sleep for exteded periods without upseting the keventd. A kernel thread is created with each EEH event, rather than having one long-running daemon started at boot time. This is because it is anticipated that EEH events will be very rare (very very rare, ideally) and so its pointless to cluter the process tables with a daemon that will almost never run. Signed-off-by: Linas Vepstas <linas@austin.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-powerpc/eeh_event.h52
-rw-r--r--include/asm-ppc64/eeh.h46
2 files changed, 65 insertions, 33 deletions
diff --git a/include/asm-powerpc/eeh_event.h b/include/asm-powerpc/eeh_event.h
new file mode 100644
index 00000000000..d168a30b386
--- /dev/null
+++ b/include/asm-powerpc/eeh_event.h
@@ -0,0 +1,52 @@
+/*
+ * eeh_event.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
+ */
+
+#ifndef ASM_PPC64_EEH_EVENT_H
+#define ASM_PPC64_EEH_EVENT_H
+
+/** EEH event -- structure holding pci controller data that describes
+ * a change in the isolation status of a PCI slot. A pointer
+ * to this struct is passed as the data pointer in a notify callback.
+ */
+struct eeh_event {
+ struct list_head list;
+ struct device_node *dn; /* struct device node */
+ struct pci_dev *dev; /* affected device */
+ int state;
+ int time_unavail; /* milliseconds until device might be available */
+};
+
+/**
+ * eeh_send_failure_event - generate a PCI error event
+ * @dev pci device
+ *
+ * This routine builds a PCI error event which will be delivered
+ * to all listeners on the peh_notifier_chain.
+ *
+ * This routine can be called within an interrupt context;
+ * the actual event will be delivered in a normal context
+ * (from a workqueue).
+ */
+int eeh_send_failure_event (struct device_node *dn,
+ struct pci_dev *dev,
+ int reset_state,
+ int time_unavail);
+
+#endif /* ASM_PPC64_EEH_EVENT_H */
diff --git a/include/asm-ppc64/eeh.h b/include/asm-ppc64/eeh.h
index 40c8eb57493..89f26ab3190 100644
--- a/include/asm-ppc64/eeh.h
+++ b/include/asm-ppc64/eeh.h
@@ -1,4 +1,4 @@
-/*
+/*
* eeh.h
* Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation.
*
@@ -6,12 +6,12 @@
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@@ -27,8 +27,6 @@
struct pci_dev;
struct device_node;
-struct device_node;
-struct notifier_block;
#ifdef CONFIG_EEH
@@ -37,6 +35,10 @@ struct notifier_block;
#define EEH_MODE_NOCHECK (1<<1)
#define EEH_MODE_ISOLATED (1<<2)
+/* Max number of EEH freezes allowed before we consider the device
+ * to be permanently disabled. */
+#define EEH_MAX_ALLOWED_FREEZES 5
+
void __init eeh_init(void);
unsigned long eeh_check_failure(const volatile void __iomem *token,
unsigned long val);
@@ -59,36 +61,14 @@ void eeh_add_device_late(struct pci_dev *);
* eeh_remove_device - undo EEH setup for the indicated pci device
* @dev: pci device to be removed
*
- * This routine should be when a device is removed from a running
- * system (e.g. by hotplug or dlpar).
+ * This routine should be called when a device is removed from
+ * a running system (e.g. by hotplug or dlpar). It unregisters
+ * the PCI device from the EEH subsystem. I/O errors affecting
+ * this device will no longer be detected after this call; thus,
+ * i/o errors affecting this slot may leave this device unusable.
*/
void eeh_remove_device(struct pci_dev *);
-#define EEH_DISABLE 0
-#define EEH_ENABLE 1
-#define EEH_RELEASE_LOADSTORE 2
-#define EEH_RELEASE_DMA 3
-
-/**
- * Notifier event flags.
- */
-#define EEH_NOTIFY_FREEZE 1
-
-/** EEH event -- structure holding pci slot data that describes
- * a change in the isolation status of a PCI slot. A pointer
- * to this struct is passed as the data pointer in a notify callback.
- */
-struct eeh_event {
- struct list_head list;
- struct pci_dev *dev;
- struct device_node *dn;
- int reset_state;
-};
-
-/** Register to find out about EEH events. */
-int eeh_register_notifier(struct notifier_block *nb);
-int eeh_unregister_notifier(struct notifier_block *nb);
-
/**
* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
*
@@ -129,7 +109,7 @@ static inline void eeh_remove_device(struct pci_dev *dev) { }
#define EEH_IO_ERROR_VALUE(size) (-1UL)
#endif /* CONFIG_EEH */
-/*
+/*
* MMIO read/write operations with EEH support.
*/
static inline u8 eeh_readb(const volatile void __iomem *addr)