From 7ce14a315db866605e6ac0b17fb33d10d7e49ab4 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 8 Jan 2009 02:19:49 +0000 Subject: powerpc/pseries: Remove write only variable in PCI DLPAR Since we never hotplug add an isa bus, we never need to set primary. Delete this write-only variable. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/pci_dlpar.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 5e1ed3d60ee..ad152a0e394 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -137,11 +137,9 @@ EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) { struct pci_controller *phb; - int primary; pr_debug("PCI: Initializing new hotplug PHB %s\n", dn->full_name); - primary = list_empty(&hose_list); phb = pcibios_alloc_controller(dn); if (!phb) return NULL; -- cgit v1.2.3 From e27ed698b88b3387d326e84c0bbe9f83e19c747b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 22 Jan 2009 20:54:31 +0000 Subject: powerpc/pseries: Fix MSI-X interrupt querying We need to increment i in the loop that queries what interrupts firmware gave us, otherwise we'll incorrectly use the first value over and over. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index f15222bbe13..4af7aa3e2e0 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -199,7 +199,7 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) i = 0; list_for_each_entry(entry, &pdev->msi_list, list) { - hwirq = rtas_query_irq_number(pdn, i); + hwirq = rtas_query_irq_number(pdn, i++); if (hwirq < 0) { pr_debug("rtas_msi: error (%d) getting hwirq\n", rc); return hwirq; -- cgit v1.2.3 From 3a51c0cbea947dc9194e18f11661eaa4dbfc5c13 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 22 Jan 2009 20:54:31 +0000 Subject: powerpc/pseries: Add support for ibm,req#msi-x Firmware encodes the number of MSI-X requested by a device in a different property than for MSI. Pull the property name out as a parameter and share the logic for both cases. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/msi.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 4af7aa3e2e0..acf1070d65c 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -132,7 +132,7 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev) rtas_disable_msi(pdev); } -static int check_req_msi(struct pci_dev *pdev, int nvec) +static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) { struct device_node *dn; struct pci_dn *pdn; @@ -144,24 +144,34 @@ static int check_req_msi(struct pci_dev *pdev, int nvec) dn = pdn->node; - req_msi = of_get_property(dn, "ibm,req#msi", NULL); + req_msi = of_get_property(dn, prop_name, NULL); if (!req_msi) { - pr_debug("rtas_msi: No ibm,req#msi on %s\n", dn->full_name); + pr_debug("rtas_msi: No %s on %s\n", prop_name, dn->full_name); return -ENOENT; } if (*req_msi < nvec) { - pr_debug("rtas_msi: ibm,req#msi requests < %d MSIs\n", nvec); + pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec); return -ENOSPC; } return 0; } +static int check_req_msi(struct pci_dev *pdev, int nvec) +{ + return check_req(pdev, nvec, "ibm,req#msi"); +} + +static int check_req_msix(struct pci_dev *pdev, int nvec) +{ + return check_req(pdev, nvec, "ibm,req#msi-x"); +} + static int rtas_msi_check_device(struct pci_dev *pdev, int nvec, int type) { if (type == PCI_CAP_ID_MSIX) - pr_debug("rtas_msi: MSI-X untested, trying anyway.\n"); + return check_req_msix(pdev, nvec); return check_req_msi(pdev, nvec); } -- cgit v1.2.3 From 649781f82782d142443d895b98edbd8be4e75c56 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 22 Jan 2009 20:54:32 +0000 Subject: powerpc/pseries: Check for MSI-X also in rtas_msi_pci_irq_fixup() We also need to check that the device isn't using MSI-X in the irq fixup routine, otherwise we might leave MSI-Xs configured at boot. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/msi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index acf1070d65c..e6c80ac0769 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -244,8 +244,8 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) } /* No MSI -> MSIs can't have been assigned by fw, leave LSI */ - if (check_req_msi(pdev, 1)) { - dev_dbg(&pdev->dev, "rtas_msi: no req#msi, nothing to do.\n"); + if (check_req_msi(pdev, 1) && check_req_msix(pdev, 1)) { + dev_dbg(&pdev->dev, "rtas_msi: no req#msi/x, nothing to do.\n"); return; } -- cgit v1.2.3 From 6071ed0487c6ea8dcfadd9844b9b90944cd9de1e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 22 Jan 2009 20:54:33 +0000 Subject: powerpc/pseries: Return the number of MSIs we could allocate If we can't allocate the requested number of MSIs, we can still tell the generic code how many we were able to allocate. That can then be passed onto the driver, allowing it to request that many in future, and probably succeeed. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/msi.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index e6c80ac0769..073b518338a 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -71,11 +71,13 @@ static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs) } while (rtas_busy_delay(rc)); /* - * If the RTAS call succeeded, check the number of irqs is actually - * what we asked for. If not, return an error. + * If the RTAS call succeeded, return the number of irqs allocated. + * If not, make sure we return a negative error code. */ - if (rc == 0 && rtas_ret[0] != num_irqs) - rc = -ENOSPC; + if (rc == 0) + rc = rtas_ret[0]; + else if (rc > 0) + rc = -rc; pr_debug("rtas_msi: ibm,change_msi(func=%d,num=%d), got %d rc = %d\n", func, num_irqs, rtas_ret[0], rc); @@ -91,7 +93,7 @@ static void rtas_disable_msi(struct pci_dev *pdev) if (!pdn) return; - if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0)) + if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) pr_debug("rtas_msi: Setting MSIs to 0 failed!\n"); } @@ -195,14 +197,14 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (type == PCI_CAP_ID_MSI) { rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec); - if (rc) { + if (rc < 0) { pr_debug("rtas_msi: trying the old firmware call.\n"); rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec); } } else rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); - if (rc) { + if (rc != nvec) { pr_debug("rtas_msi: rtas_change_msi() failed\n"); return rc; } -- cgit v1.2.3 From 8535ef05a6904429ce72671c3035dbf05e6d5edf Mon Sep 17 00:00:00 2001 From: Mike Mason Date: Tue, 10 Feb 2009 11:12:21 +0000 Subject: powerpc/eeh: Only disable/enable LSI interrupts in EEH The EEH code disables and enables interrupts during the device recovery process. This is unnecessary for MSI and MSI-X interrupts because they are effectively disabled by the DMA Stopped state when an EEH error occurs. The current code is also incorrect for MSI-X interrupts. It doesn't take into account that MSI-X interrupts are tracked in a different way than LSI/MSI interrupts. This patch ensures only LSI interrupts are disabled/enabled. Signed-off-by: Mike Mason Acked-by: Linas Vepstas Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/eeh_driver.c | 68 +++++++++++++++++++---------- 1 file changed, 45 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 0ad56ff7b4a..380420f8c40 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -79,6 +79,40 @@ static int irq_in_use(unsigned int irq) return rc; } +/** + * eeh_disable_irq - disable interrupt for the recovering device + */ +static void eeh_disable_irq(struct pci_dev *dev) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + + /* Don't disable MSI and MSI-X interrupts. They are + * effectively disabled by the DMA Stopped state + * when an EEH error occurs. + */ + if (dev->msi_enabled || dev->msix_enabled) + return; + + if (!irq_in_use(dev->irq)) + return; + + PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; + disable_irq_nosync(dev->irq); +} + +/** + * eeh_enable_irq - enable interrupt for the recovering device + */ +static void eeh_enable_irq(struct pci_dev *dev) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + + if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { + PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; + enable_irq(dev->irq); + } +} + /* ------------------------------------------------------- */ /** * eeh_report_error - report pci error to each device driver @@ -98,11 +132,8 @@ static void eeh_report_error(struct pci_dev *dev, void *userdata) if (!driver) return; - if (irq_in_use (dev->irq)) { - struct device_node *dn = pci_device_to_OF_node(dev); - PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; - disable_irq_nosync(dev->irq); - } + eeh_disable_irq(dev); + if (!driver->err_handler || !driver->err_handler->error_detected) return; @@ -147,15 +178,12 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata) { enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev->driver; - struct device_node *dn = pci_device_to_OF_node(dev); if (!driver) return; - if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { - PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; - enable_irq(dev->irq); - } + eeh_enable_irq(dev); + if (!driver->err_handler || !driver->err_handler->slot_reset) return; @@ -174,17 +202,14 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata) static void eeh_report_resume(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev->driver; - struct device_node *dn = pci_device_to_OF_node(dev); dev->error_state = pci_channel_io_normal; if (!driver) return; - if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { - PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; - enable_irq(dev->irq); - } + eeh_enable_irq(dev); + if (!driver->err_handler || !driver->err_handler->resume) return; @@ -208,15 +233,12 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata) if (!driver) return; - if (irq_in_use (dev->irq)) { - struct device_node *dn = pci_device_to_OF_node(dev); - PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; - disable_irq_nosync(dev->irq); - } - if (!driver->err_handler) - return; - if (!driver->err_handler->error_detected) + eeh_disable_irq(dev); + + if (!driver->err_handler || + !driver->err_handler->error_detected) return; + driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); } -- cgit v1.2.3 From d523cc379da57f1c39f5db9c47bdaa94f74727ff Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 17 Feb 2009 00:18:49 +0000 Subject: powerpc/pseries: Return req#msi(-x) if request is larger If a driver asks for more MSIs than the devices "req#msi(-x)" property, we currently return -ENOSPC. This doesn't give the driver any chance to make a new request with a number that might work. So if "req#msi(-x)" is less than the request, return its value. To be 100% safe, make sure we return an error if req_msi == 0. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/msi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 073b518338a..081af6d7fa0 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -154,7 +154,11 @@ static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) if (*req_msi < nvec) { pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec); - return -ENOSPC; + + if (*req_msi == 0) /* Be paranoid */ + return -ENOSPC; + + return *req_msi; } return 0; -- cgit v1.2.3 From 448e2ca0e32a5c437650d634b6032ab732662338 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 17 Feb 2009 00:21:56 +0000 Subject: powerpc/pseries: Implement a quota system for MSIs There are hardware limitations on the number of available MSIs, which firmware expresses using a property named "ibm,pe-total-#msi". This property tells us how many MSIs are available for devices below the point in the PCI tree where we find the property. For old firmwares which don't have the property, we assume there are 8 MSIs available per "partitionable endpoint" (PE). The PE can be found using existing EEH code, which uses the methods described in PAPR. For our purposes we want the parent of the node that's identified using this method. When a driver requests n MSIs for a device, we first establish where the "ibm,pe-total-#msi" property above that device is, or we find the PE if the property is not found. In both cases we call this node the "pe_dn". We then count all non-bridge devices below the pe_dn, to establish how many devices in total may need MSIs. The quota is then simply the total available divided by the number of devices, if the request is less than or equal to the quota, the request is fine and we're done. If the request is greater than the quota, we try to determine if there are any "spare" MSIs which we can give to this device. Spare MSIs are found by looking for other devices which can never use their full quota, because their "req#msi(-x)" property is less than the quota. If we find any spare, we divide the spares by the number of devices that could request more than their quota. This ensures the spare MSIs are spread evenly amongst all over-quota requestors. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/msi.c | 178 ++++++++++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 081af6d7fa0..3e0d6ef3eca 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -174,12 +174,186 @@ static int check_req_msix(struct pci_dev *pdev, int nvec) return check_req(pdev, nvec, "ibm,req#msi-x"); } +/* Quota calculation */ + +static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) +{ + struct device_node *dn; + const u32 *p; + + dn = of_node_get(pci_device_to_OF_node(dev)); + while (dn) { + p = of_get_property(dn, "ibm,pe-total-#msi", NULL); + if (p) { + pr_debug("rtas_msi: found prop on dn %s\n", + dn->full_name); + *total = *p; + return dn; + } + + dn = of_get_next_parent(dn); + } + + return NULL; +} + +static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) +{ + struct device_node *dn; + + /* Found our PE and assume 8 at that point. */ + + dn = pci_device_to_OF_node(dev); + if (!dn) + return NULL; + + dn = find_device_pe(dn); + if (!dn) + return NULL; + + /* We actually want the parent */ + dn = of_get_parent(dn); + if (!dn) + return NULL; + + /* Hardcode of 8 for old firmwares */ + *total = 8; + pr_debug("rtas_msi: using PE dn %s\n", dn->full_name); + + return dn; +} + +struct msi_counts { + struct device_node *requestor; + int num_devices; + int request; + int quota; + int spare; + int over_quota; +}; + +static void *count_non_bridge_devices(struct device_node *dn, void *data) +{ + struct msi_counts *counts = data; + const u32 *p; + u32 class; + + pr_debug("rtas_msi: counting %s\n", dn->full_name); + + p = of_get_property(dn, "class-code", NULL); + class = p ? *p : 0; + + if ((class >> 8) != PCI_CLASS_BRIDGE_PCI) + counts->num_devices++; + + return NULL; +} + +static void *count_spare_msis(struct device_node *dn, void *data) +{ + struct msi_counts *counts = data; + const u32 *p; + int req; + + if (dn == counts->requestor) + req = counts->request; + else { + /* We don't know if a driver will try to use MSI or MSI-X, + * so we just have to punt and use the larger of the two. */ + req = 0; + p = of_get_property(dn, "ibm,req#msi", NULL); + if (p) + req = *p; + + p = of_get_property(dn, "ibm,req#msi-x", NULL); + if (p) + req = max(req, (int)*p); + } + + if (req < counts->quota) + counts->spare += counts->quota - req; + else if (req > counts->quota) + counts->over_quota++; + + return NULL; +} + +static int msi_quota_for_device(struct pci_dev *dev, int request) +{ + struct device_node *pe_dn; + struct msi_counts counts; + int total; + + pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev), + request); + + pe_dn = find_pe_total_msi(dev, &total); + if (!pe_dn) + pe_dn = find_pe_dn(dev, &total); + + if (!pe_dn) { + pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev)); + goto out; + } + + pr_debug("rtas_msi: found PE %s\n", pe_dn->full_name); + + memset(&counts, 0, sizeof(struct msi_counts)); + + /* Work out how many devices we have below this PE */ + traverse_pci_devices(pe_dn, count_non_bridge_devices, &counts); + + if (counts.num_devices == 0) { + pr_err("rtas_msi: found 0 devices under PE for %s\n", + pci_name(dev)); + goto out; + } + + counts.quota = total / counts.num_devices; + if (request <= counts.quota) + goto out; + + /* else, we have some more calculating to do */ + counts.requestor = pci_device_to_OF_node(dev); + counts.request = request; + traverse_pci_devices(pe_dn, count_spare_msis, &counts); + + /* If the quota isn't an integer multiple of the total, we can + * use the remainder as spare MSIs for anyone that wants them. */ + counts.spare += total % counts.num_devices; + + /* Divide any spare by the number of over-quota requestors */ + if (counts.over_quota) + counts.quota += counts.spare / counts.over_quota; + + /* And finally clamp the request to the possibly adjusted quota */ + request = min(counts.quota, request); + + pr_debug("rtas_msi: request clamped to quota %d\n", request); +out: + of_node_put(pe_dn); + + return request; +} + static int rtas_msi_check_device(struct pci_dev *pdev, int nvec, int type) { + int quota, rc; + if (type == PCI_CAP_ID_MSIX) - return check_req_msix(pdev, nvec); + rc = check_req_msix(pdev, nvec); + else + rc = check_req_msi(pdev, nvec); + + if (rc) + return rc; - return check_req_msi(pdev, nvec); + quota = msi_quota_for_device(pdev, nvec); + + if (quota && quota < nvec) + return quota; + + return 0; } static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -- cgit v1.2.3 From 94afa5a5f54235c4612198768b6a2fa2e2366f44 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 5 Mar 2009 14:44:26 +0000 Subject: powerpc/pseries: Reject discontiguous/non-zero based MSI-X requests There's no way for us to express to firmware that we want a discontiguous, or non-zero based, range of MSI-X entries. So we must reject such requests. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/msi.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 3e0d6ef3eca..bf2e1ac4130 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -356,6 +356,27 @@ static int rtas_msi_check_device(struct pci_dev *pdev, int nvec, int type) return 0; } +static int check_msix_entries(struct pci_dev *pdev) +{ + struct msi_desc *entry; + int expected; + + /* There's no way for us to express to firmware that we want + * a discontiguous, or non-zero based, range of MSI-X entries. + * So we must reject such requests. */ + + expected = 0; + list_for_each_entry(entry, &pdev->msi_list, list) { + if (entry->msi_attrib.entry_nr != expected) { + pr_debug("rtas_msi: bad MSI-X entries.\n"); + return -EINVAL; + } + expected++; + } + + return 0; +} + static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct pci_dn *pdn; @@ -367,6 +388,9 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (!pdn) return -ENODEV; + if (type == PCI_CAP_ID_MSIX && check_msix_entries(pdev)) + return -EINVAL; + /* * Try the new more explicit firmware interface, if that fails fall * back to the old interface. The old interface is known to never -- cgit v1.2.3 From 1bac0221554d2e9153e6aff2272ee833b5bff980 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 5 Mar 2009 17:36:39 +0000 Subject: powerpc/pseries: The pseries MSI code depends on EEH Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/Kconfig | 5 +++++ arch/powerpc/platforms/pseries/Makefile | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index ddc2a307cd5..095ff6f846b 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -25,6 +25,11 @@ config EEH depends on PPC_PSERIES && PCI default y if !EMBEDDED +config PSERIES_MSI + bool + depends on PCI_MSI && EEH + default y + config SCANLOG tristate "Scanlog dump interface" depends on RTAS_PROC && PPC_PSERIES diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index dfe574af2dc..0ce691df595 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_PCI) += pci.o pci_dlpar.o -obj-$(CONFIG_PCI_MSI) += msi.o +obj-$(CONFIG_PSERIES_MSI) += msi.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o -- cgit v1.2.3 From 28794d34ecb6815a3fa0a4256027c9b081a17c5f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 10 Mar 2009 17:53:27 +0000 Subject: powerpc/kconfig: Kill PPC_MULTIPLATFORM CONFIG_PPC_MULTIPLATFORM is a remain of the pre-powerpc days and isn't really meaningful anymore. It was basically equivalent to PPC64 || 6xx. This removes it along with the following changes: - 32-bit platforms that relied on PPC32 && PPC_MULTIPLATFORM now rely on 6xx which is what they want anyway. - A new symbol, PPC_BOOK3S, is defined that represent compliance with the "Server" variant of the architecture. This is set when either 6xx or PPC64 is set and open the door for future BOOK3E 64-bit. - 64-bit platforms that relied on PPC64 && PPC_MULTIPLATFORM now use PPC64 && PPC_BOOK3S - A separate and selectable CONFIG_PPC_OF_BOOT_TRAMPOLINE option is now used to control the use of prom_init.c Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 095ff6f846b..c0e6ec240f4 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -1,5 +1,5 @@ config PPC_PSERIES - depends on PPC_MULTIPLATFORM && PPC64 + depends on PPC64 && PPC_BOOK3S bool "IBM pSeries & new (POWER5-based) iSeries" select MPIC select PPC_I8259 -- cgit v1.2.3 From c5785f9e1c1c07c791fdc471f5c7fda4a5855b0c Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 9 Mar 2009 00:00:00 +0000 Subject: powerpc/pseries: Failed reconfig notifier chain call cleanup The return code from invoking the notifier chain when updating the ibm,dynamic-memory property is not handled properly. In failure cases (rc == NOTIFY_BAD) we should be restoring the original value of the property. In success (rc == NOTIFY_OK) we should be returning zero from the calling routine. Signed-off-by: Nathan Fontenot Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/reconfig.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index c591a25b0b0..b6f1b137d42 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -468,9 +468,13 @@ static int do_update_property(char *buf, size_t bufsize) rc = blocking_notifier_call_chain(&pSeries_reconfig_chain, action, value); + if (rc == NOTIFY_BAD) { + rc = prom_update_property(np, oldprop, newprop); + return -ENOMEM; + } } - return rc; + return 0; } /** -- cgit v1.2.3 From fc59a3fc8eed3a2c811e64ec65015d7eb1459ace Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 11 Mar 2009 17:55:52 +0000 Subject: powerpc: Add virtual processor dispatch trace log pseries SPLPAR machines are able to retrieve a log of dispatch and preempt events from the hypervisor. With this information, we can see when and why each dispatch & preempt is occuring. This change adds a set of debugfs files allowing userspace to read this dispatch log. Based on initial patches from Nishanth Aravamudan . Signed-off-by: Jeremy Kerr Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/Kconfig | 10 + arch/powerpc/platforms/pseries/Makefile | 1 + arch/powerpc/platforms/pseries/dtl.c | 274 ++++++++++++++++++++++++ arch/powerpc/platforms/pseries/plpar_wrappers.h | 10 + 4 files changed, 295 insertions(+) create mode 100644 arch/powerpc/platforms/pseries/dtl.c (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index c0e6ec240f4..f0e6f28427b 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -68,3 +68,13 @@ config CMM makes sense for a system running in an LPAR where the unused pages will be reused for other LPARs. The interface allows firmware to balance memory across many LPARs. + +config DTL + bool "Dispatch Trace Log" + depends on PPC_SPLPAR && DEBUG_FS + help + SPLPAR machines can log hypervisor preempt & dispatch events to a + kernel buffer. Saying Y here will enable logging these events, + which are accessible through a debugfs file. + + Say N if you are unsure. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 0ce691df595..790c0b872d4 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -25,3 +25,4 @@ obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o obj-$(CONFIG_CMM) += cmm.o +obj-$(CONFIG_DTL) += dtl.o diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c new file mode 100644 index 00000000000..dc9b0f81e60 --- /dev/null +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -0,0 +1,274 @@ +/* + * Virtual Processor Dispatch Trace Log + * + * (C) Copyright IBM Corporation 2009 + * + * Author: Jeremy Kerr + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include + +#include "plpar_wrappers.h" + +/* + * Layout of entries in the hypervisor's DTL buffer. Although we don't + * actually access the internals of an entry (we only need to know the size), + * we might as well define it here for reference. + */ +struct dtl_entry { + u8 dispatch_reason; + u8 preempt_reason; + u16 processor_id; + u32 enqueue_to_dispatch_time; + u32 ready_to_enqueue_time; + u32 waiting_to_ready_time; + u64 timebase; + u64 fault_addr; + u64 srr0; + u64 srr1; +}; + +struct dtl { + struct dtl_entry *buf; + struct dentry *file; + int cpu; + int buf_entries; + u64 last_idx; +}; +static DEFINE_PER_CPU(struct dtl, dtl); + +/* + * Dispatch trace log event mask: + * 0x7: 0x1: voluntary virtual processor waits + * 0x2: time-slice preempts + * 0x4: virtual partition memory page faults + */ +static u8 dtl_event_mask = 0x7; + + +/* + * Size of per-cpu log buffers. Default is just under 16 pages worth. + */ +static int dtl_buf_entries = (16 * 85); + + +static int dtl_enable(struct dtl *dtl) +{ + unsigned long addr; + int ret, hwcpu; + + /* only allow one reader */ + if (dtl->buf) + return -EBUSY; + + /* we need to store the original allocation size for use during read */ + dtl->buf_entries = dtl_buf_entries; + + dtl->buf = kmalloc_node(dtl->buf_entries * sizeof(struct dtl_entry), + GFP_KERNEL, cpu_to_node(dtl->cpu)); + if (!dtl->buf) { + printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", + __func__, dtl->cpu); + return -ENOMEM; + } + + /* Register our dtl buffer with the hypervisor. The HV expects the + * buffer size to be passed in the second word of the buffer */ + ((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry); + + hwcpu = get_hard_smp_processor_id(dtl->cpu); + addr = __pa(dtl->buf); + ret = register_dtl(hwcpu, addr); + if (ret) { + printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) " + "failed with %d\n", __func__, dtl->cpu, hwcpu, ret); + kfree(dtl->buf); + return -EIO; + } + + /* set our initial buffer indices */ + dtl->last_idx = lppaca[dtl->cpu].dtl_idx = 0; + + /* enable event logging */ + lppaca[dtl->cpu].dtl_enable_mask = dtl_event_mask; + + return 0; +} + +static void dtl_disable(struct dtl *dtl) +{ + int hwcpu = get_hard_smp_processor_id(dtl->cpu); + + lppaca[dtl->cpu].dtl_enable_mask = 0x0; + + unregister_dtl(hwcpu, __pa(dtl->buf)); + + kfree(dtl->buf); + dtl->buf = NULL; + dtl->buf_entries = 0; +} + +/* file interface */ + +static int dtl_file_open(struct inode *inode, struct file *filp) +{ + struct dtl *dtl = inode->i_private; + int rc; + + rc = dtl_enable(dtl); + if (rc) + return rc; + + filp->private_data = dtl; + return 0; +} + +static int dtl_file_release(struct inode *inode, struct file *filp) +{ + struct dtl *dtl = inode->i_private; + dtl_disable(dtl); + return 0; +} + +static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, + loff_t *pos) +{ + int rc, cur_idx, last_idx, n_read, n_req, read_size; + struct dtl *dtl; + + if ((len % sizeof(struct dtl_entry)) != 0) + return -EINVAL; + + dtl = filp->private_data; + + /* requested number of entries to read */ + n_req = len / sizeof(struct dtl_entry); + + /* actual number of entries read */ + n_read = 0; + + cur_idx = lppaca[dtl->cpu].dtl_idx; + last_idx = dtl->last_idx; + + if (cur_idx - last_idx > dtl->buf_entries) { + pr_debug("%s: hv buffer overflow for cpu %d, samples lost\n", + __func__, dtl->cpu); + } + + cur_idx %= dtl->buf_entries; + last_idx %= dtl->buf_entries; + + /* read the tail of the buffer if we've wrapped */ + if (last_idx > cur_idx) { + read_size = min(n_req, dtl->buf_entries - last_idx); + + rc = copy_to_user(buf, &dtl->buf[last_idx], + read_size * sizeof(struct dtl_entry)); + if (rc) + return -EFAULT; + + last_idx = 0; + n_req -= read_size; + n_read += read_size; + buf += read_size * sizeof(struct dtl_entry); + } + + /* .. and now the head */ + read_size = min(n_req, cur_idx - last_idx); + rc = copy_to_user(buf, &dtl->buf[last_idx], + read_size * sizeof(struct dtl_entry)); + if (rc) + return -EFAULT; + + n_read += read_size; + dtl->last_idx += n_read; + + return n_read * sizeof(struct dtl_entry); +} + +static struct file_operations dtl_fops = { + .open = dtl_file_open, + .release = dtl_file_release, + .read = dtl_file_read, + .llseek = no_llseek, +}; + +static struct dentry *dtl_dir; + +static int dtl_setup_file(struct dtl *dtl) +{ + char name[10]; + + sprintf(name, "cpu-%d", dtl->cpu); + + dtl->file = debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops); + if (!dtl->file) + return -ENOMEM; + + return 0; +} + +static int dtl_init(void) +{ + struct dentry *event_mask_file, *buf_entries_file; + int rc, i; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return -ENODEV; + + /* set up common debugfs structure */ + + rc = -ENOMEM; + dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root); + if (!dtl_dir) { + printk(KERN_WARNING "%s: can't create dtl root dir\n", + __func__); + goto err; + } + + event_mask_file = debugfs_create_x8("dtl_event_mask", 0600, + dtl_dir, &dtl_event_mask); + buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600, + dtl_dir, &dtl_buf_entries); + + if (!event_mask_file || !buf_entries_file) { + printk(KERN_WARNING "%s: can't create dtl files\n", __func__); + goto err_remove_dir; + } + + /* set up the per-cpu log structures */ + for_each_possible_cpu(i) { + struct dtl *dtl = &per_cpu(dtl, i); + dtl->cpu = i; + + rc = dtl_setup_file(dtl); + if (rc) + goto err_remove_dir; + } + + return 0; + +err_remove_dir: + debugfs_remove_recursive(dtl_dir); +err: + return rc; +} +arch_initcall(dtl_init); diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index d967c1893ab..a24a6b2333b 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h @@ -43,6 +43,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa) return vpa_call(0x3, cpu, vpa); } +static inline long unregister_dtl(unsigned long cpu, unsigned long vpa) +{ + return vpa_call(0x6, cpu, vpa); +} + +static inline long register_dtl(unsigned long cpu, unsigned long vpa) +{ + return vpa_call(0x2, cpu, vpa); +} + static inline long plpar_page_set_loaned(unsigned long vpa) { unsigned long cmo_page_sz = cmo_get_page_size(); -- cgit v1.2.3 From 82631f5dd114e52239fb3d1e270a49d37c088b46 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 23 Mar 2009 16:55:08 +0000 Subject: powerpc: Add write barrier before enabling DTL flags Currently, we don't enforce any ordering for updates to the lppaca when enabling dtl logging, so we may end up enabling logging before the index fields have been established. This change adds a smp_wmb() before doing the actual enable. Signed-off-by: Jeremy Kerr Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/dtl.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/platforms/pseries') diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index dc9b0f81e60..fafcaa0e81e 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -107,6 +107,10 @@ static int dtl_enable(struct dtl *dtl) /* set our initial buffer indices */ dtl->last_idx = lppaca[dtl->cpu].dtl_idx = 0; + /* ensure that our updates to the lppaca fields have occurred before + * we actually enable the logging */ + smp_wmb(); + /* enable event logging */ lppaca[dtl->cpu].dtl_enable_mask = dtl_event_mask; -- cgit v1.2.3