From fd761fd876e4d1c0d07b6d93bc45c999fa596cb0 Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linas@linas.org>
Date: Thu, 3 Nov 2005 18:49:23 -0600
Subject: [PATCH] ppc64: serialize reports of PCI errors

07-eeh-report-race.patch

When a PCI slot is isolated, all PCI functions under that slot are affected.
If hese functions have separate device drivers, the EEH isolation event
might be reported multiple times. This patch adds a lock to prevent the
racing of such multiple reports. It also marks every device under the slot
as having experienced an EEH event, so that multiple reports may be
recognized more easily.

Signed-off-by: Linas Vepstas <linas@linas.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/ppc64/kernel/eeh.c | 98 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 85 insertions(+), 13 deletions(-)

diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c
index 0060934dffd..e7522f6da69 100644
--- a/arch/ppc64/kernel/eeh.c
+++ b/arch/ppc64/kernel/eeh.c
@@ -96,6 +96,9 @@ static int ibm_slot_error_detail;
 
 static int eeh_subsystem_enabled;
 
+/* Lock to avoid races due to multiple reports of an error */
+static DEFINE_SPINLOCK(confirm_error_lock);
+
 /* Buffer for reporting slot-error-detail rtas calls */
 static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
 static DEFINE_SPINLOCK(slot_errbuf_lock);
@@ -544,6 +547,55 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 	return pa | (token & (PAGE_SIZE-1));
 }
 
+/** 
+ * Return the "partitionable endpoint" (pe) under which this device lies
+ */
+static struct device_node * find_device_pe(struct device_node *dn)
+{
+	while ((dn->parent) && PCI_DN(dn->parent) &&
+	      (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
+		dn = dn->parent;
+	}
+	return dn;
+}
+
+/** Mark all devices that are peers of this device as failed.
+ *  Mark the device driver too, so that it can see the failure
+ *  immediately; this is critical, since some drivers poll
+ *  status registers in interrupts ... If a driver is polling,
+ *  and the slot is frozen, then the driver can deadlock in
+ *  an interrupt context, which is bad.
+ */
+
+static inline void __eeh_mark_slot (struct device_node *dn)
+{
+	while (dn) {
+		PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED;
+
+		if (dn->child)
+			__eeh_mark_slot (dn->child);
+		dn = dn->sibling;
+	}
+}
+
+static inline void __eeh_clear_slot (struct device_node *dn)
+{
+	while (dn) {
+		PCI_DN(dn)->eeh_mode &= ~EEH_MODE_ISOLATED;
+		if (dn->child)
+			__eeh_clear_slot (dn->child);
+		dn = dn->sibling;
+	}
+}
+
+static inline void eeh_clear_slot (struct device_node *dn)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&confirm_error_lock, flags);
+	__eeh_clear_slot (dn);
+	spin_unlock_irqrestore(&confirm_error_lock, flags);
+}
+
 /**
  * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
  * @dn device node
@@ -567,6 +619,8 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	int reset_state;
 	struct eeh_event  *event;
 	struct pci_dn *pdn;
+	struct device_node *pe_dn;
+	int rc = 0;
 
 	__get_cpu_var(total_mmio_ffs)++;
 
@@ -594,10 +648,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 		return 0;
 	}
 
-	/*
-	 * If we already have a pending isolation event for this
-	 * slot, we know it's bad already, we don't need to check...
+	/* If we already have a pending isolation event for this
+	 * slot, we know it's bad already, we don't need to check.
+	 * Do this checking under a lock; as multiple PCI devices
+	 * in one slot might report errors simultaneously, and we
+	 * only want one error recovery routine running.
 	 */
+	spin_lock_irqsave(&confirm_error_lock, flags);
+	rc = 1;
 	if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
 		atomic_inc(&eeh_fail_count);
 		if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) {
@@ -606,7 +664,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 				rets[0] = -1;	/* reset state unknown */
 			eeh_panic(dev, rets[0]);
 		}
-		return 0;
+		goto dn_unlock;
 	}
 
 	/*
@@ -623,7 +681,8 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 		printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
 		       ret, dn->full_name);
 		__get_cpu_var(false_positives)++;
-		return 0;
+		rc = 0;
+		goto dn_unlock;
 	}
 
 	/* If EEH is not supported on this device, punt. */
@@ -631,25 +690,33 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 		printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
 		       ret, dn->full_name);
 		__get_cpu_var(false_positives)++;
-		return 0;
+		rc = 0;
+		goto dn_unlock;
 	}
 
 	/* If not the kind of error we know about, punt. */
 	if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
 		__get_cpu_var(false_positives)++;
-		return 0;
+		rc = 0;
+		goto dn_unlock;
 	}
 
 	/* Note that config-io to empty slots may fail;
 	 * we recognize empty because they don't have children. */
 	if ((rets[0] == 5) && (dn->child == NULL)) {
 		__get_cpu_var(false_positives)++;
-		return 0;
+		rc = 0;
+		goto dn_unlock;
 	}
 
-	/* prevent repeated reports of this failure */
-	pdn->eeh_mode |= EEH_MODE_ISOLATED;
-	 __get_cpu_var(slot_resets)++;
+	__get_cpu_var(slot_resets)++;
+ 
+	/* Avoid repeated reports of this failure, including problems
+	 * with other functions on this device, and functions under
+	 * bridges. */
+	pe_dn = find_device_pe (dn);
+	__eeh_mark_slot (pe_dn);
+	spin_unlock_irqrestore(&confirm_error_lock, flags);
 
 	reset_state = rets[0];
 
@@ -678,10 +745,14 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	if (rets[0] != 5) dump_stack();
 	schedule_work(&eeh_event_wq);
 
-	return 0;
+	return 1;
+
+dn_unlock:
+	spin_unlock_irqrestore(&confirm_error_lock, flags);
+	return rc;
 }
 
-EXPORT_SYMBOL(eeh_dn_check_failure);
+EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
 
 /**
  * eeh_check_failure - check if all 1's data is due to EEH slot freeze
@@ -820,6 +891,7 @@ void __init eeh_init(void)
 	struct device_node *phb, *np;
 	struct eeh_early_enable_info info;
 
+	spin_lock_init(&confirm_error_lock);
 	spin_lock_init(&slot_errbuf_lock);
 
 	np = of_find_node_by_path("/rtas");
-- 
cgit v1.2.3