diff options
Diffstat (limited to 'arch/powerpc/platforms')
25 files changed, 1982 insertions, 735 deletions
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 33545d352e9..932538a93c2 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -272,4 +272,14 @@ config CPM2 you wish to build a kernel for a machine with a CPM2 coprocessor on it (826x, 827x, 8560). +config AXON_RAM + tristate "Axon DDR2 memory device driver" + depends on PPC_IBM_CELL_BLADE + default m + help + It registers one block device per Axon's DDR2 memory bank found + on a system. Block devices are called axonram?, their major and + minor numbers are available in /proc/devices, /proc/partitions or + in /sys/block/axonram?/dev. + endmenu diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 9b2b386ccf4..ac8032034fb 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -73,4 +73,14 @@ config CBE_CPUFREQ For details, take a look at <file:Documentation/cpu-freq/>. If you don't have such processor, say N +config CBE_CPUFREQ_PMI + tristate "CBE frequency scaling using PMI interface" + depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL + default n + help + Select this, if you want to use the PMI interface + to switch frequencies. Using PMI, the + processor will not only be able to run at lower speed, + but also at lower core voltage. + endmenu diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 869af89df6f..f88a7c76f29 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -4,7 +4,9 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \ obj-$(CONFIG_CBE_RAS) += ras.o obj-$(CONFIG_CBE_THERM) += cbe_thermal.o -obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o +obj-$(CONFIG_CBE_CPUFREQ_PMI) += cbe_cpufreq_pmi.o +obj-$(CONFIG_CBE_CPUFREQ) += cbe-cpufreq.o +cbe-cpufreq-y += cbe_cpufreq_pervasive.o cbe_cpufreq.o ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o @@ -23,3 +25,5 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ $(spu-priv1-y) \ $(spu-manage-y) \ spufs/ + +obj-$(CONFIG_PCI_MSI) += axon_msi.o diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c new file mode 100644 index 00000000000..4c9ab5b70ba --- /dev/null +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -0,0 +1,445 @@ +/* + * Copyright 2007, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <linux/reboot.h> + +#include <asm/dcr.h> +#include <asm/machdep.h> +#include <asm/prom.h> + + +/* + * MSIC registers, specified as offsets from dcr_base + */ +#define MSIC_CTRL_REG 0x0 + +/* Base Address registers specify FIFO location in BE memory */ +#define MSIC_BASE_ADDR_HI_REG 0x3 +#define MSIC_BASE_ADDR_LO_REG 0x4 + +/* Hold the read/write offsets into the FIFO */ +#define MSIC_READ_OFFSET_REG 0x5 +#define MSIC_WRITE_OFFSET_REG 0x6 + + +/* MSIC control register flags */ +#define MSIC_CTRL_ENABLE 0x0001 +#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002 +#define MSIC_CTRL_IRQ_ENABLE 0x0008 +#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010 + +/* + * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB. + * Currently we're using a 64KB FIFO size. + */ +#define MSIC_FIFO_SIZE_SHIFT 16 +#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT) + +/* + * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits + * 8-9 of the MSIC control reg. + */ +#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300) + +/* + * We need to mask the read/write offsets to make sure they stay within + * the bounds of the FIFO. Also they should always be 16-byte aligned. + */ +#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu) + +/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */ +#define MSIC_FIFO_ENTRY_SIZE 0x10 + + +struct axon_msic { + struct device_node *dn; + struct irq_host *irq_host; + __le32 *fifo; + dcr_host_t dcr_host; + struct list_head list; + u32 read_offset; + u32 dcr_base; +}; + +static LIST_HEAD(axon_msic_list); + +static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) +{ + pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n); + + dcr_write(msic->dcr_host, msic->dcr_base + dcr_n, val); +} + +static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n) +{ + return dcr_read(msic->dcr_host, msic->dcr_base + dcr_n); +} + +static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) +{ + struct axon_msic *msic = get_irq_data(irq); + u32 write_offset, msi; + int idx; + + write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG); + pr_debug("axon_msi: original write_offset 0x%x\n", write_offset); + + /* write_offset doesn't wrap properly, so we have to mask it */ + write_offset &= MSIC_FIFO_SIZE_MASK; + + while (msic->read_offset != write_offset) { + idx = msic->read_offset / sizeof(__le32); + msi = le32_to_cpu(msic->fifo[idx]); + msi &= 0xFFFF; + + pr_debug("axon_msi: woff %x roff %x msi %x\n", + write_offset, msic->read_offset, msi); + + msic->read_offset += MSIC_FIFO_ENTRY_SIZE; + msic->read_offset &= MSIC_FIFO_SIZE_MASK; + + if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) + generic_handle_irq(msi); + else + pr_debug("axon_msi: invalid irq 0x%x!\n", msi); + } + + desc->chip->eoi(irq); +} + +static struct axon_msic *find_msi_translator(struct pci_dev *dev) +{ + struct irq_host *irq_host; + struct device_node *dn, *tmp; + const phandle *ph; + struct axon_msic *msic = NULL; + + dn = pci_device_to_OF_node(dev); + if (!dn) { + dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); + return NULL; + } + + for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { + ph = of_get_property(dn, "msi-translator", NULL); + if (ph) + break; + } + + if (!ph) { + dev_dbg(&dev->dev, + "axon_msi: no msi-translator property found\n"); + goto out_error; + } + + tmp = dn; + dn = of_find_node_by_phandle(*ph); + if (!dn) { + dev_dbg(&dev->dev, + "axon_msi: msi-translator doesn't point to a node\n"); + goto out_error; + } + + irq_host = irq_find_host(dn); + if (!irq_host) { + dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n", + dn->full_name); + goto out_error; + } + + msic = irq_host->host_data; + +out_error: + of_node_put(dn); + of_node_put(tmp); + + return msic; +} + +static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type) +{ + if (!find_msi_translator(dev)) + return -ENODEV; + + return 0; +} + +static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) +{ + struct device_node *dn, *tmp; + struct msi_desc *entry; + int len; + const u32 *prop; + + dn = pci_device_to_OF_node(dev); + if (!dn) { + dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); + return -ENODEV; + } + + entry = list_first_entry(&dev->msi_list, struct msi_desc, list); + + for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { + if (entry->msi_attrib.is_64) { + prop = of_get_property(dn, "msi-address-64", &len); + if (prop) + break; + } + + prop = of_get_property(dn, "msi-address-32", &len); + if (prop) + break; + } + + if (!prop) { + dev_dbg(&dev->dev, + "axon_msi: no msi-address-(32|64) properties found\n"); + return -ENOENT; + } + + switch (len) { + case 8: + msg->address_hi = prop[0]; + msg->address_lo = prop[1]; + break; + case 4: + msg->address_hi = 0; + msg->address_lo = prop[0]; + break; + default: + dev_dbg(&dev->dev, + "axon_msi: malformed msi-address-(32|64) property\n"); + of_node_put(dn); + return -EINVAL; + } + + of_node_put(dn); + + return 0; +} + +static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + unsigned int virq, rc; + struct msi_desc *entry; + struct msi_msg msg; + struct axon_msic *msic; + + msic = find_msi_translator(dev); + if (!msic) + return -ENODEV; + + rc = setup_msi_msg_address(dev, &msg); + if (rc) + return rc; + + /* We rely on being able to stash a virq in a u16 */ + BUILD_BUG_ON(NR_IRQS > 65536); + + list_for_each_entry(entry, &dev->msi_list, list) { + virq = irq_create_direct_mapping(msic->irq_host); + if (virq == NO_IRQ) { + dev_warn(&dev->dev, + "axon_msi: virq allocation failed!\n"); + return -1; + } + dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq); + + set_irq_msi(virq, entry); + msg.data = virq; + write_msi_msg(virq, &msg); + } + + return 0; +} + +static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *entry; + + dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n"); + + list_for_each_entry(entry, &dev->msi_list, list) { + if (entry->irq == NO_IRQ) + continue; + + set_irq_msi(entry->irq, NULL); + irq_dispose_mapping(entry->irq); + } +} + +static struct irq_chip msic_irq_chip = { + .mask = mask_msi_irq, + .unmask = unmask_msi_irq, + .shutdown = unmask_msi_irq, + .typename = "AXON-MSI", +}; + +static int msic_host_map(struct irq_host *h, unsigned int virq, + irq_hw_number_t hw) +{ + set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq); + + return 0; +} + +static int msic_host_match(struct irq_host *host, struct device_node *dn) +{ + struct axon_msic *msic = host->host_data; + + return msic->dn == dn; +} + +static struct irq_host_ops msic_host_ops = { + .match = msic_host_match, + .map = msic_host_map, +}; + +static int axon_msi_notify_reboot(struct notifier_block *nb, + unsigned long code, void *data) +{ + struct axon_msic *msic; + u32 tmp; + + list_for_each_entry(msic, &axon_msic_list, list) { + pr_debug("axon_msi: disabling %s\n", msic->dn->full_name); + tmp = msic_dcr_read(msic, MSIC_CTRL_REG); + tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; + msic_dcr_write(msic, MSIC_CTRL_REG, tmp); + } + + return 0; +} + +static struct notifier_block axon_msi_reboot_notifier = { + .notifier_call = axon_msi_notify_reboot +}; + +static int axon_msi_setup_one(struct device_node *dn) +{ + struct page *page; + struct axon_msic *msic; + unsigned int virq; + int dcr_len; + + pr_debug("axon_msi: setting up dn %s\n", dn->full_name); + + msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL); + if (!msic) { + printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n", + dn->full_name); + goto out; + } + + msic->dcr_base = dcr_resource_start(dn, 0); + dcr_len = dcr_resource_len(dn, 0); + + if (msic->dcr_base == 0 || dcr_len == 0) { + printk(KERN_ERR + "axon_msi: couldn't parse dcr properties on %s\n", + dn->full_name); + goto out; + } + + msic->dcr_host = dcr_map(dn, msic->dcr_base, dcr_len); + if (!DCR_MAP_OK(msic->dcr_host)) { + printk(KERN_ERR "axon_msi: dcr_map failed for %s\n", + dn->full_name); + goto out_free_msic; + } + + page = alloc_pages_node(of_node_to_nid(dn), GFP_KERNEL, + get_order(MSIC_FIFO_SIZE_BYTES)); + if (!page) { + printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n", + dn->full_name); + goto out_free_msic; + } + + msic->fifo = page_address(page); + + msic->irq_host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, NR_IRQS, + &msic_host_ops, 0); + if (!msic->irq_host) { + printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n", + dn->full_name); + goto out_free_fifo; + } + + msic->irq_host->host_data = msic; + + virq = irq_of_parse_and_map(dn, 0); + if (virq == NO_IRQ) { + printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n", + dn->full_name); + goto out_free_host; + } + + msic->dn = of_node_get(dn); + + set_irq_data(virq, msic); + set_irq_chained_handler(virq, axon_msi_cascade); + pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq); + + /* Enable the MSIC hardware */ + msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, (u64)msic->fifo >> 32); + msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG, + (u64)msic->fifo & 0xFFFFFFFF); + msic_dcr_write(msic, MSIC_CTRL_REG, + MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE | + MSIC_CTRL_FIFO_SIZE); + + list_add(&msic->list, &axon_msic_list); + + printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name); + + return 0; + +out_free_host: + kfree(msic->irq_host); +out_free_fifo: + __free_pages(virt_to_page(msic->fifo), get_order(MSIC_FIFO_SIZE_BYTES)); +out_free_msic: + kfree(msic); +out: + + return -1; +} + +static int axon_msi_init(void) +{ + struct device_node *dn; + int found = 0; + + pr_debug("axon_msi: initialising ...\n"); + + for_each_compatible_node(dn, NULL, "ibm,axon-msic") { + if (axon_msi_setup_one(dn) == 0) + found++; + } + + if (found) { + ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; + ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; + ppc_md.msi_check_device = axon_msi_check_device; + + register_reboot_notifier(&axon_msi_reboot_notifier); + + pr_debug("axon_msi: registered callbacks!\n"); + } + + return 0; +} +arch_initcall(axon_msi_init); diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c index ab511d5b65a..0b6e8ee85ab 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c @@ -1,7 +1,7 @@ /* * cpufreq driver for the cell processor * - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 * * Author: Christian Krafft <krafft@de.ibm.com> * @@ -21,18 +21,11 @@ */ #include <linux/cpufreq.h> -#include <linux/timer.h> - -#include <asm/hw_irq.h> -#include <asm/io.h> #include <asm/machdep.h> -#include <asm/processor.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/pmi.h> #include <asm/of_platform.h> - +#include <asm/prom.h> #include "cbe_regs.h" +#include "cbe_cpufreq.h" static DEFINE_MUTEX(cbe_switch_mutex); @@ -50,159 +43,24 @@ static struct cpufreq_frequency_table cbe_freqs[] = { {0, CPUFREQ_TABLE_END}, }; -/* to write to MIC register */ -static u64 MIC_Slow_Fast_Timer_table[] = { - [0 ... 7] = 0x007fc00000000000ull, -}; - -/* more values for the MIC */ -static u64 MIC_Slow_Next_Timer_table[] = { - 0x0000240000000000ull, - 0x0000268000000000ull, - 0x000029C000000000ull, - 0x00002D0000000000ull, - 0x0000300000000000ull, - 0x0000334000000000ull, - 0x000039C000000000ull, - 0x00003FC000000000ull, -}; - -static unsigned int pmi_frequency_limit = 0; /* * hardware specific functions */ -static struct of_device *pmi_dev; - -#ifdef CONFIG_PPC_PMI -static int set_pmode_pmi(int cpu, unsigned int pmode) -{ - int ret; - pmi_message_t pmi_msg; -#ifdef DEBUG - u64 time; -#endif - - pmi_msg.type = PMI_TYPE_FREQ_CHANGE; - pmi_msg.data1 = cbe_cpu_to_node(cpu); - pmi_msg.data2 = pmode; - -#ifdef DEBUG - time = (u64) get_cycles(); -#endif - - pmi_send_message(pmi_dev, pmi_msg); - ret = pmi_msg.data2; - - pr_debug("PMI returned slow mode %d\n", ret); - -#ifdef DEBUG - time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */ - time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */ - pr_debug("had to wait %lu ns for a transition\n", time); -#endif - return ret; -} -#endif - -static int get_pmode(int cpu) +static int set_pmode(unsigned int cpu, unsigned int slow_mode) { - int ret; - struct cbe_pmd_regs __iomem *pmd_regs; - - pmd_regs = cbe_get_cpu_pmd_regs(cpu); - ret = in_be64(&pmd_regs->pmsr) & 0x07; - - return ret; -} - -static int set_pmode_reg(int cpu, unsigned int pmode) -{ - struct cbe_pmd_regs __iomem *pmd_regs; - struct cbe_mic_tm_regs __iomem *mic_tm_regs; - u64 flags; - u64 value; - - local_irq_save(flags); - - mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); - pmd_regs = cbe_get_cpu_pmd_regs(cpu); - - pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr); - pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0); - - out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); - out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); - - out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); - out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); - - value = in_be64(&pmd_regs->pmcr); - /* set bits to zero */ - value &= 0xFFFFFFFFFFFFFFF8ull; - /* set bits to next pmode */ - value |= pmode; - - out_be64(&pmd_regs->pmcr, value); - - /* wait until new pmode appears in status register */ - value = in_be64(&pmd_regs->pmsr) & 0x07; - while(value != pmode) { - cpu_relax(); - value = in_be64(&pmd_regs->pmsr) & 0x07; - } - - local_irq_restore(flags); - - return 0; -} + int rc; -static int set_pmode(int cpu, unsigned int slow_mode) { -#ifdef CONFIG_PPC_PMI - if (pmi_dev) - return set_pmode_pmi(cpu, slow_mode); + if (cbe_cpufreq_has_pmi) + rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode); else -#endif - return set_pmode_reg(cpu, slow_mode); -} - -static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg) -{ - u8 cpu; - u8 cbe_pmode_new; - - BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); + rc = cbe_cpufreq_set_pmode(cpu, slow_mode); - cpu = cbe_node_to_cpu(pmi_msg.data1); - cbe_pmode_new = pmi_msg.data2; + pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu)); - pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency; - - pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit); -} - -static int pmi_notifier(struct notifier_block *nb, - unsigned long event, void *data) -{ - struct cpufreq_policy *policy = data; - - if (event != CPUFREQ_INCOMPATIBLE) - return 0; - - cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit); - return 0; + return rc; } -static struct notifier_block pmi_notifier_block = { - .notifier_call = pmi_notifier, -}; - -static struct pmi_handler cbe_pmi_handler = { - .type = PMI_TYPE_FREQ_CHANGE, - .handle_pmi_message = cbe_cpufreq_handle_pmi, -}; - - /* * cpufreq functions */ @@ -221,8 +79,19 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) pr_debug("init cpufreq on CPU %d\n", policy->cpu); + /* + * Let's check we can actually get to the CELL regs + */ + if (!cbe_get_cpu_pmd_regs(policy->cpu) || + !cbe_get_cpu_mic_tm_regs(policy->cpu)) { + pr_info("invalid CBE regs pointers for cpufreq\n"); + return -EINVAL; + } + max_freqp = of_get_property(cpu, "clock-frequency", NULL); + of_node_put(cpu); + if (!max_freqp) return -EINVAL; @@ -239,10 +108,12 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) } policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - /* if DEBUG is enabled set_pmode() measures the correct latency of a transition */ + + /* if DEBUG is enabled set_pmode() measures the latency + * of a transition */ policy->cpuinfo.transition_latency = 25000; - cur_pmode = get_pmode(policy->cpu); + cur_pmode = cbe_cpufreq_get_pmode(policy->cpu); pr_debug("current pmode is at %d\n",cur_pmode); policy->cur = cbe_freqs[cur_pmode].frequency; @@ -253,21 +124,13 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu); - if (pmi_dev) { - /* frequency might get limited later, initialize limit with max_freq */ - pmi_frequency_limit = max_freq; - cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); - } - - /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */ + /* this ensures that policy->cpuinfo_min + * and policy->cpuinfo_max are set correctly */ return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs); } static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - if (pmi_dev) - cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); - cpufreq_frequency_table_put_attr(policy->cpu); return 0; } @@ -277,13 +140,13 @@ static int cbe_cpufreq_verify(struct cpufreq_policy *policy) return cpufreq_frequency_table_verify(policy, cbe_freqs); } - -static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, - unsigned int relation) +static int cbe_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) { int rc; struct cpufreq_freqs freqs; - int cbe_pmode_new; + unsigned int cbe_pmode_new; cpufreq_frequency_table_target(policy, cbe_freqs, @@ -298,12 +161,14 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target mutex_lock(&cbe_switch_mutex); cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", + pr_debug("setting frequency for cpu %d to %d kHz, " \ + "1/%d of max frequency\n", policy->cpu, cbe_freqs[cbe_pmode_new].frequency, cbe_freqs[cbe_pmode_new].index); rc = set_pmode(policy->cpu, cbe_pmode_new); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); mutex_unlock(&cbe_switch_mutex); @@ -326,28 +191,14 @@ static struct cpufreq_driver cbe_cpufreq_driver = { static int __init cbe_cpufreq_init(void) { -#ifdef CONFIG_PPC_PMI - struct device_node *np; -#endif if (!machine_is(cell)) return -ENODEV; -#ifdef CONFIG_PPC_PMI - np = of_find_node_by_type(NULL, "ibm,pmi"); - - pmi_dev = of_find_device_by_node(np); - if (pmi_dev) - pmi_register_handler(pmi_dev, &cbe_pmi_handler); -#endif return cpufreq_register_driver(&cbe_cpufreq_driver); } static void __exit cbe_cpufreq_exit(void) { -#ifdef CONFIG_PPC_PMI - if (pmi_dev) - pmi_unregister_handler(pmi_dev, &cbe_pmi_handler); -#endif cpufreq_unregister_driver(&cbe_cpufreq_driver); } diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.h b/arch/powerpc/platforms/cell/cbe_cpufreq.h new file mode 100644 index 00000000000..c1d86bfa92f --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_cpufreq.h @@ -0,0 +1,24 @@ +/* + * cbe_cpufreq.h + * + * This file contains the definitions used by the cbe_cpufreq driver. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 + * + * Author: Christian Krafft <krafft@de.ibm.com> + * + */ + +#include <linux/cpufreq.h> +#include <linux/types.h> + +int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode); +int cbe_cpufreq_get_pmode(int cpu); + +int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode); + +#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE) +extern bool cbe_cpufreq_has_pmi; +#else +#define cbe_cpufreq_has_pmi (0) +#endif diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c new file mode 100644 index 00000000000..163263b3e1c --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c @@ -0,0 +1,115 @@ +/* + * pervasive backend for the cbe_cpufreq driver + * + * This driver makes use of the pervasive unit to + * engage the desired frequency. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 + * + * Author: Christian Krafft <krafft@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/time.h> +#include <asm/machdep.h> +#include <asm/hw_irq.h> + +#include "cbe_regs.h" +#include "cbe_cpufreq.h" + +/* to write to MIC register */ +static u64 MIC_Slow_Fast_Timer_table[] = { + [0 ... 7] = 0x007fc00000000000ull, +}; + +/* more values for the MIC */ +static u64 MIC_Slow_Next_Timer_table[] = { + 0x0000240000000000ull, + 0x0000268000000000ull, + 0x000029C000000000ull, + 0x00002D0000000000ull, + 0x0000300000000000ull, + 0x0000334000000000ull, + 0x000039C000000000ull, + 0x00003FC000000000ull, +}; + + +int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode) +{ + struct cbe_pmd_regs __iomem *pmd_regs; + struct cbe_mic_tm_regs __iomem *mic_tm_regs; + u64 flags; + u64 value; +#ifdef DEBUG + long time; +#endif + + local_irq_save(flags); + + mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + +#ifdef DEBUG + time = jiffies; +#endif + + out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); + out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); + + out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); + out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); + + value = in_be64(&pmd_regs->pmcr); + /* set bits to zero */ + value &= 0xFFFFFFFFFFFFFFF8ull; + /* set bits to next pmode */ + value |= pmode; + + out_be64(&pmd_regs->pmcr, value); + +#ifdef DEBUG + /* wait until new pmode appears in status register */ + value = in_be64(&pmd_regs->pmsr) & 0x07; + while (value != pmode) { + cpu_relax(); + value = in_be64(&pmd_regs->pmsr) & 0x07; + } + + time = jiffies - time; + time = jiffies_to_msecs(time); + pr_debug("had to wait %lu ms for a transition using " \ + "pervasive unit\n", time); +#endif + local_irq_restore(flags); + + return 0; +} + + +int cbe_cpufreq_get_pmode(int cpu) +{ + int ret; + struct cbe_pmd_regs __iomem *pmd_regs; + + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + ret = in_be64(&pmd_regs->pmsr) & 0x07; + + return ret; +} + diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c new file mode 100644 index 00000000000..fc6f38982ff --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c @@ -0,0 +1,148 @@ +/* + * pmi backend for the cbe_cpufreq driver + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007 + * + * Author: Christian Krafft <krafft@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <asm/of_platform.h> +#include <asm/processor.h> +#include <asm/prom.h> +#include <asm/pmi.h> + +#ifdef DEBUG +#include <asm/time.h> +#endif + +#include "cbe_regs.h" +#include "cbe_cpufreq.h" + +static u8 pmi_slow_mode_limit[MAX_CBE]; + +bool cbe_cpufreq_has_pmi = false; +EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi); + +/* + * hardware specific functions + */ + +int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode) +{ + int ret; + pmi_message_t pmi_msg; +#ifdef DEBUG + long time; +#endif + pmi_msg.type = PMI_TYPE_FREQ_CHANGE; + pmi_msg.data1 = cbe_cpu_to_node(cpu); + pmi_msg.data2 = pmode; + +#ifdef DEBUG + time = jiffies; +#endif + pmi_send_message(pmi_msg); + +#ifdef DEBUG + time = jiffies - time; + time = jiffies_to_msecs(time); + pr_debug("had to wait %lu ms for a transition using " \ + "PMI\n", time); +#endif + ret = pmi_msg.data2; + pr_debug("PMI returned slow mode %d\n", ret); + + return ret; +} +EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi); + + +static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg) +{ + u8 node, slow_mode; + + BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE); + + node = pmi_msg.data1; + slow_mode = pmi_msg.data2; + + pmi_slow_mode_limit[node] = slow_mode; + + pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode); +} + +static int pmi_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct cpufreq_policy *policy = data; + struct cpufreq_frequency_table *cbe_freqs; + u8 node; + + cbe_freqs = cpufreq_frequency_get_table(policy->cpu); + node = cbe_cpu_to_node(policy->cpu); + + pr_debug("got notified, event=%lu, node=%u\n", event, node); + + if (pmi_slow_mode_limit[node] != 0) { + pr_debug("limiting node %d to slow mode %d\n", + node, pmi_slow_mode_limit[node]); + + cpufreq_verify_within_limits(policy, 0, + + cbe_freqs[pmi_slow_mode_limit[node]].frequency); + } + + return 0; +} + +static struct notifier_block pmi_notifier_block = { + .notifier_call = pmi_notifier, +}; + +static struct pmi_handler cbe_pmi_handler = { + .type = PMI_TYPE_FREQ_CHANGE, + .handle_pmi_message = cbe_cpufreq_handle_pmi, +}; + + + +static int __init cbe_cpufreq_pmi_init(void) +{ + cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0; + + if (!cbe_cpufreq_has_pmi) + return -ENODEV; + + cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); + + return 0; +} + +static void __exit cbe_cpufreq_pmi_exit(void) +{ + cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER); + pmi_unregister_handler(&cbe_pmi_handler); +} + +module_init(cbe_cpufreq_pmi_init); +module_exit(cbe_cpufreq_pmi_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index 12c9674b4b1..c8f7f000742 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -174,6 +174,13 @@ static struct device_node *cbe_get_be_node(int cpu_id) cpu_handle = of_get_property(np, "cpus", &len); + /* + * the CAB SLOF tree is non compliant, so we just assume + * there is only one node + */ + if (WARN_ON_ONCE(!cpu_handle)) + return np; + for (i=0; i<len; i++) if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL)) return np; diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index f370f0fa6f4..e4132f8f51b 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -292,7 +292,7 @@ static struct attribute_group ppe_attribute_group = { /* * initialize throttling with default values */ -static void __init init_default_values(void) +static int __init init_default_values(void) { int cpu; struct cbe_pmd_regs __iomem *pmd_regs; @@ -339,25 +339,40 @@ static void __init init_default_values(void) for_each_possible_cpu (cpu) { pr_debug("processing cpu %d\n", cpu); sysdev = get_cpu_sysdev(cpu); + + if (!sysdev) { + pr_info("invalid sysdev pointer for cbe_thermal\n"); + return -EINVAL; + } + pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); + if (!pmd_regs) { + pr_info("invalid CBE regs pointer for cbe_thermal\n"); + return -EINVAL; + } + out_be64(&pmd_regs->tm_str2, str2); out_be64(&pmd_regs->tm_str1.val, str1.val); out_be64(&pmd_regs->tm_tpr.val, tpr.val); out_be64(&pmd_regs->tm_cr1.val, cr1.val); out_be64(&pmd_regs->tm_cr2, cr2); } + + return 0; } static int __init thermal_init(void) { - init_default_values(); + int rc = init_default_values(); - spu_add_sysdev_attr_group(&spu_attribute_group); - cpu_add_sysdev_attr_group(&ppe_attribute_group); + if (rc == 0) { + spu_add_sysdev_attr_group(&spu_attribute_group); + cpu_add_sysdev_attr_group(&ppe_attribute_group); + } - return 0; + return rc; } module_init(thermal_init); diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 96a8f609690..90124228b8f 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -35,18 +35,37 @@ #include <asm/spu.h> #include <asm/spu_priv1.h> #include <asm/xmon.h> +#include <asm/prom.h> +#include "spu_priv1_mmio.h" const struct spu_management_ops *spu_management_ops; EXPORT_SYMBOL_GPL(spu_management_ops); const struct spu_priv1_ops *spu_priv1_ops; +EXPORT_SYMBOL_GPL(spu_priv1_ops); -static struct list_head spu_list[MAX_NUMNODES]; -static LIST_HEAD(spu_full_list); -static DEFINE_MUTEX(spu_mutex); -static DEFINE_SPINLOCK(spu_list_lock); +struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; +EXPORT_SYMBOL_GPL(cbe_spu_info); -EXPORT_SYMBOL_GPL(spu_priv1_ops); +/* + * Protects cbe_spu_info and spu->number. + */ +static DEFINE_SPINLOCK(spu_lock); + +/* + * List of all spus in the system. + * + * This list is iterated by callers from irq context and callers that + * want to sleep. Thus modifications need to be done with both + * spu_full_list_lock and spu_full_list_mutex held, while iterating + * through it requires either of these locks. + * + * In addition spu_full_list_lock protects all assignmens to + * spu->mm. + */ +static LIST_HEAD(spu_full_list); +static DEFINE_SPINLOCK(spu_full_list_lock); +static DEFINE_MUTEX(spu_full_list_mutex); void spu_invalidate_slbs(struct spu *spu) { @@ -65,12 +84,12 @@ void spu_flush_all_slbs(struct mm_struct *mm) struct spu *spu; unsigned long flags; - spin_lock_irqsave(&spu_list_lock, flags); + spin_lock_irqsave(&spu_full_list_lock, flags); list_for_each_entry(spu, &spu_full_list, full_list) { if (spu->mm == mm) spu_invalidate_slbs(spu); } - spin_unlock_irqrestore(&spu_list_lock, flags); + spin_unlock_irqrestore(&spu_full_list_lock, flags); } /* The hack below stinks... try to do something better one of @@ -88,9 +107,9 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm) { unsigned long flags; - spin_lock_irqsave(&spu_list_lock, flags); + spin_lock_irqsave(&spu_full_list_lock, flags); spu->mm = mm; - spin_unlock_irqrestore(&spu_list_lock, flags); + spin_unlock_irqrestore(&spu_full_list_lock, flags); if (mm) mm_needs_global_tlbie(mm); } @@ -390,7 +409,7 @@ static void spu_free_irqs(struct spu *spu) free_irq(spu->irqs[2], spu); } -static void spu_init_channels(struct spu *spu) +void spu_init_channels(struct spu *spu) { static const struct { unsigned channel; @@ -423,46 +442,7 @@ static void spu_init_channels(struct spu *spu) out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); } } - -struct spu *spu_alloc_node(int node) -{ - struct spu *spu = NULL; - - mutex_lock(&spu_mutex); - if (!list_empty(&spu_list[node])) { - spu = list_entry(spu_list[node].next, struct spu, list); - list_del_init(&spu->list); - pr_debug("Got SPU %d %d\n", spu->number, spu->node); - } - mutex_unlock(&spu_mutex); - - if (spu) - spu_init_channels(spu); - return spu; -} -EXPORT_SYMBOL_GPL(spu_alloc_node); - -struct spu *spu_alloc(void) -{ - struct spu *spu = NULL; - int node; - - for (node = 0; node < MAX_NUMNODES; node++) { - spu = spu_alloc_node(node); - if (spu) - break; - } - - return spu; -} - -void spu_free(struct spu *spu) -{ - mutex_lock(&spu_mutex); - list_add_tail(&spu->list, &spu_list[spu->node]); - mutex_unlock(&spu_mutex); -} -EXPORT_SYMBOL_GPL(spu_free); +EXPORT_SYMBOL_GPL(spu_init_channels); static int spu_shutdown(struct sys_device *sysdev) { @@ -481,12 +461,12 @@ struct sysdev_class spu_sysdev_class = { int spu_add_sysdev_attr(struct sysdev_attribute *attr) { struct spu *spu; - mutex_lock(&spu_mutex); + mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysdev_create_file(&spu->sysdev, attr); + mutex_unlock(&spu_full_list_mutex); - mutex_unlock(&spu_mutex); return 0; } EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); @@ -494,12 +474,12 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); int spu_add_sysdev_attr_group(struct attribute_group *attrs) { struct spu *spu; - mutex_lock(&spu_mutex); + mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysfs_create_group(&spu->sysdev.kobj, attrs); + mutex_unlock(&spu_full_list_mutex); - mutex_unlock(&spu_mutex); return 0; } EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); @@ -508,24 +488,22 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); void spu_remove_sysdev_attr(struct sysdev_attribute *attr) { struct spu *spu; - mutex_lock(&spu_mutex); + mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysdev_remove_file(&spu->sysdev, attr); - - mutex_unlock(&spu_mutex); + mutex_unlock(&spu_full_list_mutex); } EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); void spu_remove_sysdev_attr_group(struct attribute_group *attrs) { struct spu *spu; - mutex_lock(&spu_mutex); + mutex_lock(&spu_full_list_mutex); list_for_each_entry(spu, &spu_full_list, full_list) sysfs_remove_group(&spu->sysdev.kobj, attrs); - - mutex_unlock(&spu_mutex); + mutex_unlock(&spu_full_list_mutex); } EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); @@ -553,16 +531,19 @@ static int __init create_spu(void *data) int ret; static int number; unsigned long flags; + struct timespec ts; ret = -ENOMEM; spu = kzalloc(sizeof (*spu), GFP_KERNEL); if (!spu) goto out; + spu->alloc_state = SPU_FREE; + spin_lock_init(&spu->register_lock); - mutex_lock(&spu_mutex); + spin_lock(&spu_lock); spu->number = number++; - mutex_unlock(&spu_mutex); + spin_unlock(&spu_lock); ret = spu_create_spu(spu, data); @@ -579,15 +560,22 @@ static int __init create_spu(void *data) if (ret) goto out_free_irqs; - mutex_lock(&spu_mutex); - spin_lock_irqsave(&spu_list_lock, flags); - list_add(&spu->list, &spu_list[spu->node]); + mutex_lock(&cbe_spu_info[spu->node].list_mutex); + list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus); + cbe_spu_info[spu->node].n_spus++; + mutex_unlock(&cbe_spu_info[spu->node].list_mutex); + + mutex_lock(&spu_full_list_mutex); + spin_lock_irqsave(&spu_full_list_lock, flags); list_add(&spu->full_list, &spu_full_list); - spin_unlock_irqrestore(&spu_list_lock, flags); - mutex_unlock(&spu_mutex); + spin_unlock_irqrestore(&spu_full_list_lock, flags); + mutex_unlock(&spu_full_list_mutex); + + spu->stats.util_state = SPU_UTIL_IDLE_LOADED; + ktime_get_ts(&ts); + spu->stats.tstamp = timespec_to_ns(&ts); - spu->stats.utilization_state = SPU_UTIL_IDLE; - spu->stats.tstamp = jiffies; + INIT_LIST_HEAD(&spu->aff_list); goto out; @@ -608,12 +596,20 @@ static const char *spu_state_names[] = { static unsigned long long spu_acct_time(struct spu *spu, enum spu_utilization_state state) { + struct timespec ts; unsigned long long time = spu->stats.times[state]; - if (spu->stats.utilization_state == state) - time += jiffies - spu->stats.tstamp; + /* + * If the spu is idle or the context is stopped, utilization + * statistics are not updated. Apply the time delta from the + * last recorded state of the spu. + */ + if (spu->stats.util_state == state) { + ktime_get_ts(&ts); + time += timespec_to_ns(&ts) - spu->stats.tstamp; + } - return jiffies_to_msecs(time); + return time / NSEC_PER_MSEC; } @@ -623,11 +619,11 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) return sprintf(buf, "%s %llu %llu %llu %llu " "%llu %llu %llu %llu %llu %llu %llu %llu\n", - spu_state_names[spu->stats.utilization_state], + spu_state_names[spu->stats.util_state], spu_acct_time(spu, SPU_UTIL_USER), spu_acct_time(spu, SPU_UTIL_SYSTEM), spu_acct_time(spu, SPU_UTIL_IOWAIT), - spu_acct_time(spu, SPU_UTIL_IDLE), + spu_acct_time(spu, SPU_UTIL_IDLE_LOADED), spu->stats.vol_ctx_switch, spu->stats.invol_ctx_switch, spu->stats.slb_flt, @@ -640,12 +636,146 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf) static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL); +/* Hardcoded affinity idxs for QS20 */ +#define SPES_PER_BE 8 +static int QS20_reg_idxs[SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; +static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; + +static struct spu *spu_lookup_reg(int node, u32 reg) +{ + struct spu *spu; + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg) + return spu; + } + return NULL; +} + +static void init_aff_QS20_harcoded(void) +{ + int node, i; + struct spu *last_spu, *spu; + u32 reg; + + for (node = 0; node < MAX_NUMNODES; node++) { + last_spu = NULL; + for (i = 0; i < SPES_PER_BE; i++) { + reg = QS20_reg_idxs[i]; + spu = spu_lookup_reg(node, reg); + if (!spu) + continue; + spu->has_mem_affinity = QS20_reg_memory[reg]; + if (last_spu) + list_add_tail(&spu->aff_list, + &last_spu->aff_list); + last_spu = spu; + } + } +} + +static int of_has_vicinity(void) +{ + struct spu* spu; + + spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list); + return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; +} + +static struct spu *aff_devnode_spu(int cbe, struct device_node *dn) +{ + struct spu *spu; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) + if (spu_devnode(spu) == dn) + return spu; + return NULL; +} + +static struct spu * +aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid) +{ + struct spu *spu; + const phandle *vic_handles; + int lenp, i; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { + if (spu_devnode(spu) == avoid) + continue; + vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp); + for (i=0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == target->linux_phandle) + return spu; + } + } + return NULL; +} + +static void init_aff_fw_vicinity_node(int cbe) +{ + struct spu *spu, *last_spu; + struct device_node *vic_dn, *last_spu_dn; + phandle avoid_ph; + const phandle *vic_handles; + const char *name; + int lenp, i, added, mem_aff; + + last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list); + avoid_ph = 0; + for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { + last_spu_dn = spu_devnode(last_spu); + vic_handles = get_property(last_spu_dn, "vicinity", &lenp); + + for (i = 0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == avoid_ph) + continue; + + vic_dn = of_find_node_by_phandle(vic_handles[i]); + if (!vic_dn) + continue; + + name = get_property(vic_dn, "name", NULL); + if (strcmp(name, "spe") == 0) { + spu = aff_devnode_spu(cbe, vic_dn); + avoid_ph = last_spu_dn->linux_phandle; + } + else { + mem_aff = strcmp(name, "mic-tm") == 0; + spu = aff_node_next_to(cbe, vic_dn, last_spu_dn); + if (!spu) + continue; + if (mem_aff) { + last_spu->has_mem_affinity = 1; + spu->has_mem_affinity = 1; + } + avoid_ph = vic_dn->linux_phandle; + } + list_add_tail(&spu->aff_list, &last_spu->aff_list); + last_spu = spu; + break; + } + } +} + +static void init_aff_fw_vicinity(void) +{ + int cbe; + + /* sets has_mem_affinity for each spu, as long as the + * spu->aff_list list, linking each spu to its neighbors + */ + for (cbe = 0; cbe < MAX_NUMNODES; cbe++) + init_aff_fw_vicinity_node(cbe); +} + static int __init init_spu_base(void) { int i, ret = 0; - for (i = 0; i < MAX_NUMNODES; i++) - INIT_LIST_HEAD(&spu_list[i]); + for (i = 0; i < MAX_NUMNODES; i++) { + mutex_init(&cbe_spu_info[i].list_mutex); + INIT_LIST_HEAD(&cbe_spu_info[i].spus); + } if (!spu_management_ops) goto out; @@ -675,16 +805,25 @@ static int __init init_spu_base(void) fb_append_extra_logo(&logo_spe_clut224, ret); } + mutex_lock(&spu_full_list_mutex); xmon_register_spus(&spu_full_list); - + crash_register_spus(&spu_full_list); + mutex_unlock(&spu_full_list_mutex); spu_add_sysdev_attr(&attr_stat); + if (of_has_vicinity()) { + init_aff_fw_vicinity(); + } else { + long root = of_get_flat_dt_root(); + if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) + init_aff_QS20_harcoded(); + } + return 0; out_unregister_sysdev_class: sysdev_class_unregister(&spu_sysdev_class); out: - return ret; } module_init(init_spu_base); diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 261b507a901..dd2c6688c8a 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -34,14 +34,27 @@ struct spufs_calls spufs_calls = { * this file is not used and the syscalls directly enter the fs code */ asmlinkage long sys_spu_create(const char __user *name, - unsigned int flags, mode_t mode) + unsigned int flags, mode_t mode, int neighbor_fd) { long ret; struct module *owner = spufs_calls.owner; + struct file *neighbor; + int fput_needed; ret = -ENOSYS; if (owner && try_module_get(owner)) { - ret = spufs_calls.create_thread(name, flags, mode); + if (flags & SPU_CREATE_AFFINITY_SPU) { + neighbor = fget_light(neighbor_fd, &fput_needed); + if (neighbor) { + ret = spufs_calls.create_thread(name, flags, + mode, neighbor); + fput_light(neighbor, fput_needed); + } + } + else { + ret = spufs_calls.create_thread(name, flags, + mode, NULL); + } module_put(owner); } return ret; diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 6d7bd60f538..6694f86d700 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -22,6 +22,7 @@ #include <linux/fs.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/slab.h> #include <asm/atomic.h> #include <asm/spu.h> @@ -55,12 +56,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) ctx->ops = &spu_backing_ops; ctx->owner = get_task_mm(current); INIT_LIST_HEAD(&ctx->rq); + INIT_LIST_HEAD(&ctx->aff_list); if (gang) spu_gang_add_ctx(gang, ctx); ctx->cpus_allowed = current->cpus_allowed; spu_set_timeslice(ctx); - ctx->stats.execution_state = SPUCTX_UTIL_USER; - ctx->stats.tstamp = jiffies; + ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; atomic_inc(&nr_spu_contexts); goto out; @@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref) spu_fini_csa(&ctx->csa); if (ctx->gang) spu_gang_remove_ctx(ctx->gang, ctx); + if (ctx->prof_priv_kref) + kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); BUG_ON(!list_empty(&ctx->rq)); atomic_dec(&nr_spu_contexts); kfree(ctx); @@ -166,6 +169,39 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags) void spu_acquire_saved(struct spu_context *ctx) { spu_acquire(ctx); - if (ctx->state != SPU_STATE_SAVED) + if (ctx->state != SPU_STATE_SAVED) { + set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); spu_deactivate(ctx); + } +} + +/** + * spu_release_saved - unlock spu context and return it to the runqueue + * @ctx: context to unlock + */ +void spu_release_saved(struct spu_context *ctx) +{ + BUG_ON(ctx->state != SPU_STATE_SAVED); + + if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags)) + spu_activate(ctx, 0); + + spu_release(ctx); } + +void spu_set_profile_private_kref(struct spu_context *ctx, + struct kref *prof_info_kref, + void ( * prof_info_release) (struct kref *kref)) +{ + ctx->prof_priv_kref = prof_info_kref; + ctx->prof_priv_release = prof_info_release; +} +EXPORT_SYMBOL_GPL(spu_set_profile_private_kref); + +void *spu_get_profile_private_kref(struct spu_context *ctx) +{ + return ctx->prof_priv_kref; +} +EXPORT_SYMBOL_GPL(spu_get_profile_private_kref); + + diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 5d9ad5a0307..5e31799b1e3 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -226,7 +226,7 @@ static void spufs_arch_write_notes(struct file *file) spu_acquire_saved(ctx_info->ctx); for (j = 0; j < spufs_coredump_num_notes; j++) spufs_arch_write_note(ctx_info, j, file); - spu_release(ctx_info->ctx); + spu_release_saved(ctx_info->ctx); list_del(&ctx_info->list); kfree(ctx_info); } diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index f53a0743747..917eab4be48 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -179,16 +179,14 @@ int spufs_handle_class1(struct spu_context *ctx) if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) return 0; - spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT); + spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, dsisr, ctx->state); ctx->stats.hash_flt++; - if (ctx->state == SPU_STATE_RUNNABLE) { + if (ctx->state == SPU_STATE_RUNNABLE) ctx->spu->stats.hash_flt++; - spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT); - } /* we must not hold the lock when entering spu_handle_mm_fault */ spu_release(ctx); @@ -226,7 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx) } else spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); - spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); return ret; } EXPORT_SYMBOL_GPL(spufs_handle_class1); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index c2814ea96af..7de4e919687 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer, spu_acquire_saved(ctx); ret = __spufs_regs_read(ctx, buffer, size, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer, ret = copy_from_user(lscsa->gprs + *pos - size, buffer, size) ? -EFAULT : size; - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer, spu_acquire_saved(ctx); ret = __spufs_fpcr_read(ctx, buffer, size, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer, ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, buffer, size) ? -EFAULT : size; - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf, spu_acquire_saved(ctx); ret = __spufs_signal1_read(ctx, buf, len, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -934,6 +934,13 @@ static const struct file_operations spufs_signal1_fops = { .mmap = spufs_signal1_mmap, }; +static const struct file_operations spufs_signal1_nosched_fops = { + .open = spufs_signal1_open, + .release = spufs_signal1_release, + .write = spufs_signal1_write, + .mmap = spufs_signal1_mmap, +}; + static int spufs_signal2_open(struct inode *inode, struct file *file) { struct spufs_inode_info *i = SPUFS_I(inode); @@ -992,7 +999,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf, spu_acquire_saved(ctx); ret = __spufs_signal2_read(ctx, buf, len, pos); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1062,6 +1069,13 @@ static const struct file_operations spufs_signal2_fops = { .mmap = spufs_signal2_mmap, }; +static const struct file_operations spufs_signal2_nosched_fops = { + .open = spufs_signal2_open, + .release = spufs_signal2_release, + .write = spufs_signal2_write, + .mmap = spufs_signal2_mmap, +}; + static void spufs_signal1_type_set(void *data, u64 val) { struct spu_context *ctx = data; @@ -1612,7 +1626,7 @@ static void spufs_decr_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->decr.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } static u64 __spufs_decr_get(void *data) @@ -1628,7 +1642,7 @@ static u64 spufs_decr_get(void *data) u64 ret; spu_acquire_saved(ctx); ret = __spufs_decr_get(data); - spu_release(ctx); + spu_release_saved(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, @@ -1637,17 +1651,21 @@ DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, static void spufs_decr_status_set(void *data, u64 val) { struct spu_context *ctx = data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); - lscsa->decr_status.slot[0] = (u32) val; - spu_release(ctx); + if (val) + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; + else + ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; + spu_release_saved(ctx); } static u64 __spufs_decr_status_get(void *data) { struct spu_context *ctx = data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; - return lscsa->decr_status.slot[0]; + if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) + return SPU_DECR_STATUS_RUNNING; + else + return 0; } static u64 spufs_decr_status_get(void *data) @@ -1656,7 +1674,7 @@ static u64 spufs_decr_status_get(void *data) u64 ret; spu_acquire_saved(ctx); ret = __spufs_decr_status_get(data); - spu_release(ctx); + spu_release_saved(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, @@ -1668,7 +1686,7 @@ static void spufs_event_mask_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->event_mask.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } static u64 __spufs_event_mask_get(void *data) @@ -1684,7 +1702,7 @@ static u64 spufs_event_mask_get(void *data) u64 ret; spu_acquire_saved(ctx); ret = __spufs_event_mask_get(data); - spu_release(ctx); + spu_release_saved(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, @@ -1708,7 +1726,7 @@ static u64 spufs_event_status_get(void *data) spu_acquire_saved(ctx); ret = __spufs_event_status_get(data); - spu_release(ctx); + spu_release_saved(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, @@ -1720,7 +1738,7 @@ static void spufs_srr0_set(void *data, u64 val) struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); lscsa->srr0.slot[0] = (u32) val; - spu_release(ctx); + spu_release_saved(ctx); } static u64 spufs_srr0_get(void *data) @@ -1730,7 +1748,7 @@ static u64 spufs_srr0_get(void *data) u64 ret; spu_acquire_saved(ctx); ret = lscsa->srr0.slot[0]; - spu_release(ctx); + spu_release_saved(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, @@ -1786,7 +1804,7 @@ static u64 spufs_lslr_get(void *data) spu_acquire_saved(ctx); ret = __spufs_lslr_get(data); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1850,7 +1868,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_mbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1888,7 +1906,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_ibox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1929,7 +1947,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_wbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -1979,7 +1997,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_dma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -2030,7 +2048,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, spin_lock(&ctx->csa.register_lock); ret = __spufs_proxydma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); - spu_release(ctx); + spu_release_saved(ctx); return ret; } @@ -2065,14 +2083,26 @@ static const char *ctx_state_names[] = { }; static unsigned long long spufs_acct_time(struct spu_context *ctx, - enum spuctx_execution_state state) + enum spu_utilization_state state) { - unsigned long time = ctx->stats.times[state]; + struct timespec ts; + unsigned long long time = ctx->stats.times[state]; - if (ctx->stats.execution_state == state) - time += jiffies - ctx->stats.tstamp; + /* + * In general, utilization statistics are updated by the controlling + * thread as the spu context moves through various well defined + * state transitions, but if the context is lazily loaded its + * utilization statistics are not updated as the controlling thread + * is not tightly coupled with the execution of the spu context. We + * calculate and apply the time delta from the last recorded state + * of the spu context. + */ + if (ctx->spu && ctx->stats.util_state == state) { + ktime_get_ts(&ts); + time += timespec_to_ns(&ts) - ctx->stats.tstamp; + } - return jiffies_to_msecs(time); + return time / NSEC_PER_MSEC; } static unsigned long long spufs_slb_flts(struct spu_context *ctx) @@ -2107,11 +2137,11 @@ static int spufs_show_stat(struct seq_file *s, void *private) spu_acquire(ctx); seq_printf(s, "%s %llu %llu %llu %llu " "%llu %llu %llu %llu %llu %llu %llu %llu\n", - ctx_state_names[ctx->stats.execution_state], - spufs_acct_time(ctx, SPUCTX_UTIL_USER), - spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM), - spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT), - spufs_acct_time(ctx, SPUCTX_UTIL_LOADED), + ctx_state_names[ctx->stats.util_state], + spufs_acct_time(ctx, SPU_UTIL_USER), + spufs_acct_time(ctx, SPU_UTIL_SYSTEM), + spufs_acct_time(ctx, SPU_UTIL_IOWAIT), + spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED), ctx->stats.vol_ctx_switch, ctx->stats.invol_ctx_switch, spufs_slb_flts(ctx), @@ -2184,8 +2214,8 @@ struct tree_descr spufs_dir_nosched_contents[] = { { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, - { "signal1", &spufs_signal1_fops, 0666, }, - { "signal2", &spufs_signal2_fops, 0666, }, + { "signal1", &spufs_signal1_nosched_fops, 0222, }, + { "signal2", &spufs_signal2_nosched_fops, 0222, }, { "signal1_type", &spufs_signal1_type, 0666, }, { "signal2_type", &spufs_signal2_type, 0666, }, { "mss", &spufs_mss_fops, 0666, }, diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c index 212ea78f905..71a44325302 100644 --- a/arch/powerpc/platforms/cell/spufs/gang.c +++ b/arch/powerpc/platforms/cell/spufs/gang.c @@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void) kref_init(&gang->kref); mutex_init(&gang->mutex); + mutex_init(&gang->aff_mutex); INIT_LIST_HEAD(&gang->list); + INIT_LIST_HEAD(&gang->aff_list_head); out: return gang; @@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx) { mutex_lock(&gang->mutex); WARN_ON(ctx->gang != gang); + if (!list_empty(&ctx->aff_list)) { + list_del_init(&ctx->aff_list); + gang->aff_flags &= ~AFF_OFFSETS_SET; + } list_del_init(&ctx->gang_list); gang->contexts--; mutex_unlock(&gang->mutex); diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7eb4d6cbcb7..b3d0dd118dd 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -316,11 +316,107 @@ out: return ret; } -static int spufs_create_context(struct inode *inode, - struct dentry *dentry, - struct vfsmount *mnt, int flags, int mode) +static struct spu_context * +spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, + struct file *filp) +{ + struct spu_context *tmp, *neighbor; + int count, node; + int aff_supp; + + aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, + struct spu, cbe_list))->aff_list); + + if (!aff_supp) + return ERR_PTR(-EINVAL); + + if (flags & SPU_CREATE_GANG) + return ERR_PTR(-EINVAL); + + if (flags & SPU_CREATE_AFFINITY_MEM && + gang->aff_ref_ctx && + gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM) + return ERR_PTR(-EEXIST); + + if (gang->aff_flags & AFF_MERGED) + return ERR_PTR(-EBUSY); + + neighbor = NULL; + if (flags & SPU_CREATE_AFFINITY_SPU) { + if (!filp || filp->f_op != &spufs_context_fops) + return ERR_PTR(-EINVAL); + + neighbor = get_spu_context( + SPUFS_I(filp->f_dentry->d_inode)->i_ctx); + + if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && + !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && + !list_entry(neighbor->aff_list.next, struct spu_context, + aff_list)->aff_head) + return ERR_PTR(-EEXIST); + + if (gang != neighbor->gang) + return ERR_PTR(-EINVAL); + + count = 1; + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + count++; + if (list_empty(&neighbor->aff_list)) + count++; + + for (node = 0; node < MAX_NUMNODES; node++) { + if ((cbe_spu_info[node].n_spus - atomic_read( + &cbe_spu_info[node].reserved_spus)) >= count) + break; + } + + if (node == MAX_NUMNODES) + return ERR_PTR(-EEXIST); + } + + return neighbor; +} + +static void +spufs_set_affinity(unsigned int flags, struct spu_context *ctx, + struct spu_context *neighbor) +{ + if (flags & SPU_CREATE_AFFINITY_MEM) + ctx->gang->aff_ref_ctx = ctx; + + if (flags & SPU_CREATE_AFFINITY_SPU) { + if (list_empty(&neighbor->aff_list)) { + list_add_tail(&neighbor->aff_list, + &ctx->gang->aff_list_head); + neighbor->aff_head = 1; + } + + if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head) + || list_entry(neighbor->aff_list.next, struct spu_context, + aff_list)->aff_head) { + list_add(&ctx->aff_list, &neighbor->aff_list); + } else { + list_add_tail(&ctx->aff_list, &neighbor->aff_list); + if (neighbor->aff_head) { + neighbor->aff_head = 0; + ctx->aff_head = 1; + } + } + + if (!ctx->gang->aff_ref_ctx) + ctx->gang->aff_ref_ctx = ctx; + } +} + +static int +spufs_create_context(struct inode *inode, struct dentry *dentry, + struct vfsmount *mnt, int flags, int mode, + struct file *aff_filp) { int ret; + int affinity; + struct spu_gang *gang; + struct spu_context *neighbor; ret = -EPERM; if ((flags & SPU_CREATE_NOSCHED) && @@ -336,9 +432,29 @@ static int spufs_create_context(struct inode *inode, if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) goto out_unlock; + gang = NULL; + neighbor = NULL; + affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); + if (affinity) { + gang = SPUFS_I(inode)->i_gang; + ret = -EINVAL; + if (!gang) + goto out_unlock; + mutex_lock(&gang->aff_mutex); + neighbor = spufs_assert_affinity(flags, gang, aff_filp); + if (IS_ERR(neighbor)) { + ret = PTR_ERR(neighbor); + goto out_aff_unlock; + } + } + ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); if (ret) - goto out_unlock; + goto out_aff_unlock; + + if (affinity) + spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx, + neighbor); /* * get references for dget and mntget, will be released @@ -352,6 +468,9 @@ static int spufs_create_context(struct inode *inode, goto out; } +out_aff_unlock: + if (affinity) + mutex_unlock(&gang->aff_mutex); out_unlock: mutex_unlock(&inode->i_mutex); out: @@ -450,7 +569,8 @@ out: static struct file_system_type spufs_type; -long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) +long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, + struct file *filp) { struct dentry *dentry; int ret; @@ -487,7 +607,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode) dentry, nd->mnt, mode); else return spufs_create_context(nd->dentry->d_inode, - dentry, nd->mnt, flags, mode); + dentry, nd->mnt, flags, mode, filp); out_dput: dput(dentry); diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 58ae13b7de8..0b50fa5cb39 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -18,15 +18,17 @@ void spufs_stop_callback(struct spu *spu) wake_up_all(&ctx->stop_wq); } -static inline int spu_stopped(struct spu_context *ctx, u32 * stat) +static inline int spu_stopped(struct spu_context *ctx, u32 *stat) { struct spu *spu; u64 pte_fault; *stat = ctx->ops->status_read(ctx); - if (ctx->state != SPU_STATE_RUNNABLE) - return 1; + spu = ctx->spu; + if (ctx->state != SPU_STATE_RUNNABLE || + test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) + return 1; pte_fault = spu->dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ? @@ -124,8 +126,10 @@ out: return ret; } -static int spu_run_init(struct spu_context *ctx, u32 * npc) +static int spu_run_init(struct spu_context *ctx, u32 *npc) { + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + if (ctx->flags & SPU_CREATE_ISOLATE) { unsigned long runcntl; @@ -151,16 +155,20 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc) ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); } + spuctx_switch_state(ctx, SPU_UTIL_USER); + return 0; } -static int spu_run_fini(struct spu_context *ctx, u32 * npc, - u32 * status) +static int spu_run_fini(struct spu_context *ctx, u32 *npc, + u32 *status) { int ret = 0; *status = ctx->ops->status_read(ctx); *npc = ctx->ops->npc_read(ctx); + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); spu_release(ctx); if (signal_pending(current)) @@ -289,10 +297,10 @@ static inline int spu_process_events(struct spu_context *ctx) return ret; } -long spufs_run_spu(struct file *file, struct spu_context *ctx, - u32 *npc, u32 *event) +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) { int ret; + struct spu *spu; u32 status; if (mutex_lock_interruptible(&ctx->run_mutex)) @@ -328,6 +336,17 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); if (unlikely(ret)) break; + spu = ctx->spu; + if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags))) { + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { + spu_switch_notify(spu, ctx); + continue; + } + } + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx); @@ -356,6 +375,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, (ctx->state == SPU_STATE_RUNNABLE)) ctx->stats.libassist++; + ctx->ops->master_stop(ctx); ret = spu_run_fini(ctx, npc, &status); spu_yield(ctx); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index e5b4dd1db28..227968b4779 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -51,9 +51,6 @@ struct spu_prio_array { DECLARE_BITMAP(bitmap, MAX_PRIO); struct list_head runq[MAX_PRIO]; spinlock_t runq_lock; - struct list_head active_list[MAX_NUMNODES]; - struct mutex active_mutex[MAX_NUMNODES]; - int nr_active[MAX_NUMNODES]; int nr_waiting; }; @@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx) ctx->policy = current->policy; /* - * A lot of places that don't hold active_mutex poke into + * A lot of places that don't hold list_mutex poke into * cpus_allowed, including grab_runnable_context which * already holds the runq_lock. So abuse runq_lock * to protect this field aswell. @@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx) { int node = ctx->spu->node; - mutex_lock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); __spu_update_sched_info(ctx); - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); } static int __node_allowed(struct spu_context *ctx, int node) @@ -169,56 +166,56 @@ static int node_allowed(struct spu_context *ctx, int node) return rval; } -/** - * spu_add_to_active_list - add spu to active list - * @spu: spu to add to the active list - */ -static void spu_add_to_active_list(struct spu *spu) -{ - int node = spu->node; - - mutex_lock(&spu_prio->active_mutex[node]); - spu_prio->nr_active[node]++; - list_add_tail(&spu->list, &spu_prio->active_list[node]); - mutex_unlock(&spu_prio->active_mutex[node]); -} +static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); -static void __spu_remove_from_active_list(struct spu *spu) +void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { - list_del_init(&spu->list); - spu_prio->nr_active[spu->node]--; + blocking_notifier_call_chain(&spu_switch_notifier, + ctx ? ctx->object_id : 0, spu); } -/** - * spu_remove_from_active_list - remove spu from active list - * @spu: spu to remove from the active list - */ -static void spu_remove_from_active_list(struct spu *spu) +static void notify_spus_active(void) { - int node = spu->node; - - mutex_lock(&spu_prio->active_mutex[node]); - __spu_remove_from_active_list(spu); - mutex_unlock(&spu_prio->active_mutex[node]); -} + int node; -static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); + /* + * Wake up the active spu_contexts. + * + * When the awakened processes see their "notify_active" flag is set, + * they will call spu_switch_notify(); + */ + for_each_online_node(node) { + struct spu *spu; -static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) -{ - blocking_notifier_call_chain(&spu_switch_notifier, - ctx ? ctx->object_id : 0, spu); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state != SPU_FREE) { + struct spu_context *ctx = spu->ctx; + set_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags); + mb(); + wake_up_all(&ctx->stop_wq); + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } } int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(&spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(&spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(&spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); /** * spu_bind_context - bind spu context to physical spu @@ -229,6 +226,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, spu->number, spu->node); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (ctx->flags & SPU_CREATE_NOSCHED) + atomic_inc(&cbe_spu_info[spu->node].reserved_spus); + if (!list_empty(&ctx->aff_list)) + atomic_inc(&ctx->gang->aff_sched_count); ctx->stats.slb_flt_base = spu->stats.slb_flt; ctx->stats.class2_intr_base = spu->stats.class2_intr; @@ -238,6 +241,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) ctx->spu = spu; ctx->ops = &spu_hw_ops; spu->pid = current->pid; + spu->tgid = current->tgid; spu_associate_mm(spu, ctx->owner); spu->ibox_callback = spufs_ibox_callback; spu->wbox_callback = spufs_wbox_callback; @@ -251,7 +255,153 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) spu_cpu_affinity_set(spu, raw_smp_processor_id()); spu_switch_notify(spu, ctx); ctx->state = SPU_STATE_RUNNABLE; - spu_switch_state(spu, SPU_UTIL_SYSTEM); + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); +} + +/* + * Must be used with the list_mutex held. + */ +static inline int sched_spu(struct spu *spu) +{ + BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); + + return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); +} + +static void aff_merge_remaining_ctxs(struct spu_gang *gang) +{ + struct spu_context *ctx; + + list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { + if (list_empty(&ctx->aff_list)) + list_add(&ctx->aff_list, &gang->aff_list_head); + } + gang->aff_flags |= AFF_MERGED; +} + +static void aff_set_offsets(struct spu_gang *gang) +{ + struct spu_context *ctx; + int offset; + + offset = -1; + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset--; + } + + offset = 0; + list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset++; + } + + gang->aff_flags |= AFF_OFFSETS_SET; +} + +static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, + int group_size, int lowest_offset) +{ + struct spu *spu; + int node, n; + + /* + * TODO: A better algorithm could be used to find a good spu to be + * used as reference location for the ctxs chain. + */ + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if ((!mem_aff || spu->has_mem_affinity) && + sched_spu(spu)) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + return spu; + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + return NULL; +} + +static void aff_set_ref_point_location(struct spu_gang *gang) +{ + int mem_aff, gs, lowest_offset; + struct spu_context *ctx; + struct spu *tmp; + + mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; + lowest_offset = 0; + gs = 0; + + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + gs++; + + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + lowest_offset = ctx->aff_offset; + } + + gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset); +} + +static struct spu *ctx_location(struct spu *ref, int offset, int node) +{ + struct spu *spu; + + spu = NULL; + if (offset >= 0) { + list_for_each_entry(spu, ref->aff_list.prev, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset--; + } + } else { + list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset++; + } + } + + return spu; +} + +/* + * affinity_check is called each time a context is going to be scheduled. + * It returns the spu ptr on which the context must run. + */ +static int has_affinity(struct spu_context *ctx) +{ + struct spu_gang *gang = ctx->gang; + + if (list_empty(&ctx->aff_list)) + return 0; + + mutex_lock(&gang->aff_mutex); + if (!gang->aff_ref_spu) { + if (!(gang->aff_flags & AFF_MERGED)) + aff_merge_remaining_ctxs(gang); + if (!(gang->aff_flags & AFF_OFFSETS_SET)) + aff_set_offsets(gang); + aff_set_ref_point_location(gang); + } + mutex_unlock(&gang->aff_mutex); + + return gang->aff_ref_spu != NULL; } /** @@ -263,9 +413,13 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, spu->pid, spu->number, spu->node); + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); - spu_switch_state(spu, SPU_UTIL_IDLE); - + if (spu->ctx->flags & SPU_CREATE_NOSCHED) + atomic_dec(&cbe_spu_info[spu->node].reserved_spus); + if (!list_empty(&ctx->aff_list)) + if (atomic_dec_and_test(&ctx->gang->aff_sched_count)) + ctx->gang->aff_ref_spu = NULL; spu_switch_notify(spu, NULL); spu_unmap_mappings(ctx); spu_save(&ctx->csa, spu); @@ -278,8 +432,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) spu->dma_callback = NULL; spu_associate_mm(spu, NULL); spu->pid = 0; + spu->tgid = 0; ctx->ops = &spu_backing_ops; - ctx->spu = NULL; spu->flags = 0; spu->ctx = NULL; @@ -287,6 +441,10 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) (spu->stats.slb_flt - ctx->stats.slb_flt_base); ctx->stats.class2_intr += (spu->stats.class2_intr - ctx->stats.class2_intr_base); + + /* This maps the underlying spu state to idle */ + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + ctx->spu = NULL; } /** @@ -352,18 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx) static struct spu *spu_get_idle(struct spu_context *ctx) { - struct spu *spu = NULL; - int node = cpu_to_node(raw_smp_processor_id()); - int n; + struct spu *spu; + int node, n; + + if (has_affinity(ctx)) { + node = ctx->gang->aff_ref_spu->node; + mutex_lock(&cbe_spu_info[node].list_mutex); + spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node); + if (spu && spu->alloc_state == SPU_FREE) + goto found; + mutex_unlock(&cbe_spu_info[node].list_mutex); + return NULL; + } + + node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; - spu = spu_alloc_node(node); - if (spu) - break; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state == SPU_FREE) + goto found; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); } + + return NULL; + + found: + spu->alloc_state = SPU_USED; + mutex_unlock(&cbe_spu_info[node].list_mutex); + pr_debug("Got SPU %d %d\n", spu->number, spu->node); + spu_init_channels(spu); return spu; } @@ -393,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx) if (!node_allowed(ctx, node)) continue; - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry(spu, &spu_prio->active_list[node], list) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { struct spu_context *tmp = spu->ctx; if (tmp->prio > ctx->prio && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); if (victim) { /* @@ -426,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx) victim = NULL; goto restart; } - spu_remove_from_active_list(spu); + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + mutex_unlock(&cbe_spu_info[node].list_mutex); + spu_unbind_context(spu, victim); victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; @@ -455,8 +640,6 @@ static struct spu *find_victim(struct spu_context *ctx) */ int spu_activate(struct spu_context *ctx, unsigned long flags) { - spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); - do { struct spu *spu; @@ -477,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags) if (!spu && rt_prio(ctx->prio)) spu = find_victim(ctx); if (spu) { + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_bind_context(spu, ctx); - spu_add_to_active_list(spu); + cbe_spu_info[node].nr_active++; + mutex_unlock(&cbe_spu_info[node].list_mutex); return 0; } @@ -500,7 +687,7 @@ static struct spu_context *grab_runnable_context(int prio, int node) int best; spin_lock(&spu_prio->runq_lock); - best = sched_find_first_bit(spu_prio->bitmap); + best = find_first_bit(spu_prio->bitmap, prio); while (best < prio) { struct list_head *rq = &spu_prio->runq[best]; @@ -527,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - spu_remove_from_active_list(spu); + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_unbind_context(spu, ctx); + spu->alloc_state = SPU_FREE; + cbe_spu_info[node].nr_active--; + mutex_unlock(&cbe_spu_info[node].list_mutex); + ctx->stats.vol_ctx_switch++; spu->stats.vol_ctx_switch++; - spu_free(spu); + if (new) wake_up(&new->stop_wq); } @@ -550,21 +743,11 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) */ void spu_deactivate(struct spu_context *ctx) { - /* - * We must never reach this for a nosched context, - * but handle the case gracefull instead of panicing. - */ - if (ctx->flags & SPU_CREATE_NOSCHED) { - WARN_ON(1); - return; - } - __spu_deactivate(ctx, 1, MAX_PRIO); - spuctx_switch_state(ctx, SPUCTX_UTIL_USER); } /** - * spu_yield - yield a physical spu if others are waiting + * spu_yield - yield a physical spu if others are waiting * @ctx: spu context to yield * * Check if there is a higher priority context waiting and if yes @@ -575,17 +758,12 @@ void spu_yield(struct spu_context *ctx) { if (!(ctx->flags & SPU_CREATE_NOSCHED)) { mutex_lock(&ctx->state_mutex); - if (__spu_deactivate(ctx, 0, MAX_PRIO)) - spuctx_switch_state(ctx, SPUCTX_UTIL_USER); - else { - spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED); - spu_switch_state(ctx->spu, SPU_UTIL_USER); - } + __spu_deactivate(ctx, 0, MAX_PRIO); mutex_unlock(&ctx->state_mutex); } } -static void spusched_tick(struct spu_context *ctx) +static noinline void spusched_tick(struct spu_context *ctx) { if (ctx->flags & SPU_CREATE_NOSCHED) return; @@ -596,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx) return; /* - * Unfortunately active_mutex ranks outside of state_mutex, so + * Unfortunately list_mutex ranks outside of state_mutex, so * we have to trylock here. If we fail give the context another * tick and try again. */ @@ -606,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx) new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { - - __spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); ctx->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; - spu_free(spu); + spu->alloc_state = SPU_FREE; + cbe_spu_info[spu->node].nr_active--; wake_up(&new->stop_wq); /* * We need to break out of the wait loop in @@ -632,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx) * * Return the number of tasks currently running or waiting to run. * - * Note that we don't take runq_lock / active_mutex here. Reading + * Note that we don't take runq_lock / list_mutex here. Reading * a single 32bit value is atomic on powerpc, and we don't care * about memory ordering issues here. */ @@ -641,7 +818,7 @@ static unsigned long count_active_contexts(void) int nr_active = 0, node; for (node = 0; node < MAX_NUMNODES; node++) - nr_active += spu_prio->nr_active[node]; + nr_active += cbe_spu_info[node].nr_active; nr_active += spu_prio->nr_waiting; return nr_active; @@ -681,19 +858,18 @@ static void spusched_wake(unsigned long data) static int spusched_thread(void *unused) { - struct spu *spu, *next; + struct spu *spu; int node; while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); schedule(); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, next, - &spu_prio->active_list[node], - list) - spusched_tick(spu->ctx); - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->ctx) + spusched_tick(spu->ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); } } @@ -751,10 +927,9 @@ int __init spu_sched_init(void) INIT_LIST_HEAD(&spu_prio->runq[i]); __clear_bit(i, spu_prio->bitmap); } - __set_bit(MAX_PRIO, spu_prio->bitmap); for (i = 0; i < MAX_NUMNODES; i++) { - mutex_init(&spu_prio->active_mutex[i]); - INIT_LIST_HEAD(&spu_prio->active_list[i]); + mutex_init(&cbe_spu_info[i].list_mutex); + INIT_LIST_HEAD(&cbe_spu_info[i].spus); } spin_lock_init(&spu_prio->runq_lock); @@ -783,9 +958,9 @@ int __init spu_sched_init(void) return err; } -void __exit spu_sched_exit(void) +void spu_sched_exit(void) { - struct spu *spu, *tmp; + struct spu *spu; int node; remove_proc_entry("spu_loadavg", NULL); @@ -794,13 +969,11 @@ void __exit spu_sched_exit(void) kthread_stop(spusched_task); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], - list) { - list_del_init(&spu->list); - spu_free(spu); - } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->alloc_state != SPU_FREE) + spu->alloc_state = SPU_FREE; + mutex_unlock(&cbe_spu_info[node].list_mutex); } kfree(spu_prio); } diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c index 4e19ed7a075..21a9c952d88 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_restore.c +++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c @@ -84,13 +84,13 @@ static inline void restore_decr(void) unsigned int decr_running; unsigned int decr; - /* Restore, Step 6: + /* Restore, Step 6(moved): * If the LSCSA "decrementer running" flag is set * then write the SPU_WrDec channel with the * decrementer value from LSCSA. */ offset = LSCSA_QW_OFFSET(decr_status); - decr_running = regs_spill[offset].slot[0]; + decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; if (decr_running) { offset = LSCSA_QW_OFFSET(decr); decr = regs_spill[offset].slot[0]; @@ -318,10 +318,10 @@ int main() build_dma_list(lscsa_ea); /* Step 3. */ restore_upper_240kb(lscsa_ea); /* Step 4. */ /* Step 5: done by 'exit'. */ - restore_decr(); /* Step 6. */ enqueue_putllc(lscsa_ea); /* Step 7. */ set_tag_update(); /* Step 8. */ read_tag_status(); /* Step 9. */ + restore_decr(); /* moved Step 6. */ read_llar_status(); /* Step 10. */ write_ppu_mb(); /* Step 11. */ write_ppuint_mb(); /* Step 12. */ diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped index 15183d209b5..f383b027e8b 100644 --- a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped @@ -10,7 +10,7 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x24fd8081, 0x1cd80081, 0x33001180, -0x42030003, +0x42034003, 0x33800284, 0x1c010204, 0x40200000, @@ -24,22 +24,22 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x23fffd84, 0x1c100183, 0x217ffa85, -0x3080a000, -0x3080a201, -0x3080a402, -0x3080a603, -0x3080a804, -0x3080aa05, -0x3080ac06, -0x3080ae07, -0x3080b008, -0x3080b209, -0x3080b40a, -0x3080b60b, -0x3080b80c, -0x3080ba0d, -0x3080bc0e, -0x3080be0f, +0x3080b000, +0x3080b201, +0x3080b402, +0x3080b603, +0x3080b804, +0x3080ba05, +0x3080bc06, +0x3080be07, +0x3080c008, +0x3080c209, +0x3080c40a, +0x3080c60b, +0x3080c80c, +0x3080ca0d, +0x3080cc0e, +0x3080ce0f, 0x00003ffc, 0x00000000, 0x00000000, @@ -48,19 +48,18 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x3ec00083, 0xb0a14103, 0x01a00204, -0x3ec10082, -0x4202800e, -0x04000703, -0xb0a14202, -0x21a00803, -0x3fbf028d, -0x3f20068d, -0x3fbe0682, +0x3ec10083, +0x4202c002, +0xb0a14203, +0x21a00802, +0x3fbf028a, +0x3f20050a, +0x3fbe0502, 0x3fe30102, 0x21a00882, -0x3f82028f, -0x3fe3078f, -0x3fbf0784, +0x3f82028b, +0x3fe3058b, +0x3fbf0584, 0x3f200204, 0x3fbe0204, 0x3fe30204, @@ -75,252 +74,285 @@ static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { 0x21a00083, 0x40800082, 0x21a00b02, -0x10002818, -0x42a00002, -0x32800007, -0x4207000c, -0x18008208, -0x40a0000b, -0x4080020a, -0x40800709, -0x00200000, -0x42070002, -0x3ac30384, +0x10002612, +0x42a00003, +0x42074006, +0x1800c204, +0x40a00008, +0x40800789, +0x1c010305, +0x34000302, 0x1cffc489, -0x00200000, -0x18008383, -0x38830382, -0x4cffc486, -0x3ac28185, -0xb0408584, -0x28830382, -0x1c020387, -0x38828182, -0xb0408405, -0x1802c408, -0x28828182, -0x217ff886, -0x04000583, -0x21a00803, -0x3fbe0682, -0x3fe30102, -0x04000106, -0x21a00886, -0x04000603, -0x21a00903, -0x40803c02, -0x21a00982, -0x40800003, -0x04000184, -0x21a00a04, +0x3ec00303, +0x3ec00287, +0xb0408403, +0x24000302, +0x34000282, +0x1c020306, +0xb0408207, +0x18020204, +0x24000282, +0x217ffa09, +0x04000402, +0x21a00802, +0x3fbe0504, +0x3fe30204, +0x21a00884, +0x42074002, +0x21a00902, +0x40803c03, +0x21a00983, +0x04000485, +0x21a00a05, 0x40802202, 0x21a00a82, -0x42028005, -0x34208702, -0x21002282, -0x21a00804, -0x21a00886, -0x3fbf0782, +0x21a00805, +0x21a00884, +0x3fbf0582, 0x3f200102, 0x3fbe0102, 0x3fe30102, 0x21a00902, 0x40804003, 0x21a00983, -0x21a00a04, +0x21a00a05, 0x40805a02, 0x21a00a82, 0x40800083, 0x21a00b83, 0x01a00c02, -0x01a00d83, -0x3420c282, +0x30809c03, +0x34000182, +0x14004102, +0x21002082, +0x01a00d82, +0x3080a003, +0x34000182, 0x21a00e02, -0x34210283, -0x21a00f03, -0x34200284, -0x77400200, -0x3421c282, +0x3080a203, +0x34000182, +0x21a00f02, +0x3080a403, +0x34000182, +0x77400100, +0x3080a603, +0x34000182, 0x21a00702, -0x34218283, -0x21a00083, -0x34214282, +0x3080a803, +0x34000182, +0x21a00082, +0x3080aa03, +0x34000182, 0x21a00b02, -0x4200480c, -0x00200000, -0x1c010286, -0x34220284, -0x34220302, -0x0f608203, -0x5c024204, -0x3b81810b, -0x42013c02, -0x00200000, -0x18008185, -0x38808183, -0x3b814182, -0x21004e84, +0x4020007f, +0x3080ae02, +0x42004805, +0x3080ac04, +0x34000103, +0x34000202, +0x1cffc183, +0x3b810106, +0x0f608184, +0x42013802, +0x5c020183, +0x38810102, +0x3b810102, +0x21000e83, 0x4020007f, 0x35000100, -0x000004e0, -0x000002a0, -0x000002e8, -0x00000428, +0x00000470, +0x000002f8, +0x00000430, 0x00000360, -0x000002e8, -0x000004a0, -0x00000468, +0x000002f8, 0x000003c8, +0x000004a8, +0x00000298, 0x00000360, +0x00200000, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40085, -0x10009c09, -0x3ac10606, -0xb060c105, -0x4020007f, -0x4020007f, +0x40800208, +0x3ec40084, +0x40800407, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, -0x38810602, -0xb0408586, -0x28810602, -0x32004180, -0x34204702, +0x38820282, +0x41004003, +0xb0408189, +0x28820282, +0x3881c282, +0xb0408304, +0x2881c282, +0x00400000, +0x40800003, +0x35000000, +0x30809e03, +0x34000182, 0x21a00382, 0x4020007f, -0x327fdc80, +0x327fde00, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40087, -0x40800405, -0x00200000, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800206, +0x3ec40084, +0x40800407, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, +0x38818282, 0x41004003, -0x38810602, -0x4020007f, -0xb0408188, -0x4020007f, -0x28810602, -0x41201002, -0x38814603, -0x10009c09, -0xb060c109, -0x4020007f, -0x28814603, +0xb040818a, +0x10005b0b, +0x41201003, +0x28818282, +0x3881c282, +0xb0408184, 0x41193f83, -0x38818602, 0x60ffc003, -0xb040818a, -0x28818602, -0x32003080, +0x2881c282, +0x38820282, +0xb0408189, +0x28820282, +0x327fef80, 0x409ffe02, 0x30801203, -0x40800204, -0x3ec40087, -0x41201008, -0x10009c14, -0x40800405, -0x3ac10609, -0x40800606, -0x3ac1460a, -0xb060c107, -0x3ac1860b, +0x40800207, +0x3ec40086, +0x4120100b, +0x10005b14, +0x40800404, +0x3ac1c289, +0x40800608, +0xb060c106, +0x3ac10286, +0x3ac2028a, 0x20801203, -0x38810602, -0xb0408409, -0x28810602, -0x38814603, -0xb060c40a, -0x4020007f, -0x28814603, +0x3881c282, 0x41193f83, -0x38818602, 0x60ffc003, -0xb040818b, -0x28818602, -0x32002380, -0x409ffe02, -0x30801204, -0x40800205, -0x3ec40083, -0x40800406, -0x3ac14607, -0x3ac18608, -0xb0810103, -0x41004002, -0x20801204, -0x4020007f, -0x38814603, -0x10009c0b, -0xb060c107, -0x4020007f, -0x4020007f, -0x28814603, -0x38818602, -0x4020007f, +0xb0408589, +0x2881c282, +0x38810282, +0xb0408586, +0x28810282, +0x38820282, +0xb040818a, +0x28820282, 0x4020007f, -0xb0408588, -0x28818602, +0x327fe280, +0x409ffe02, +0x30801203, +0x40800207, +0x3ec40084, +0x40800408, +0x10005b14, +0x40800609, +0x3ac1c28a, +0x3ac2028b, +0xb060c104, +0x3ac24284, +0x20801203, +0x41201003, +0x3881c282, +0xb040830a, +0x2881c282, +0x38820282, +0xb040818b, +0x41193f83, +0x60ffc003, +0x28820282, +0x38824282, +0xb0408184, +0x28824282, 0x4020007f, -0x32001780, +0x327fd580, 0x409ffe02, -0x1000640e, -0x40800204, +0x1000658e, +0x40800206, 0x30801203, -0x40800405, -0x3ec40087, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800407, +0x3ec40084, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, 0x413d8003, -0x38810602, +0x38818282, 0x4020007f, -0x327fd780, -0x409ffe02, -0x10007f0c, -0x40800205, -0x30801204, -0x40800406, -0x3ec40083, -0x3ac14607, -0x3ac18608, -0xb0810103, -0x413d8002, -0x20801204, -0x38814603, +0x327fd800, +0x409ffe03, +0x30801202, +0x40800207, +0x3ec40084, +0x10005b09, +0x3ac1c288, +0xb0408184, 0x4020007f, -0x327feb80, +0x4020007f, +0x20801202, +0x3881c282, +0xb0408308, +0x2881c282, +0x327fc680, 0x409ffe02, +0x1000588b, +0x40800208, 0x30801203, -0x40800204, -0x3ec40087, -0x40800405, -0x1000650a, -0x40800606, -0x3ac10608, -0x3ac14609, -0x3ac1860a, -0xb060c107, +0x40800407, +0x3ec40084, +0x3ac20289, +0xb060c104, +0x3ac1c284, 0x20801203, -0x38810602, -0xb0408588, -0x4020007f, -0x327fc980, -0x00400000, -0x40800003, -0x4020007f, -0x35000000, +0x413d8003, +0x38820282, +0x327fbd80, +0x00200000, +0x00000da0, +0x00000000, +0x00000000, +0x00000000, +0x00000d90, +0x00000000, +0x00000000, +0x00000000, +0x00000db0, +0x00000000, +0x00000000, +0x00000000, +0x00000dc0, +0x00000000, +0x00000000, +0x00000000, +0x00000d80, +0x00000000, +0x00000000, +0x00000000, +0x00000df0, +0x00000000, +0x00000000, +0x00000000, +0x00000de0, +0x00000000, +0x00000000, +0x00000000, +0x00000dd0, +0x00000000, +0x00000000, +0x00000000, +0x00000e04, +0x00000000, +0x00000000, 0x00000000, +0x00000e00, 0x00000000, 0x00000000, 0x00000000, diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 08b3530288a..8b20c0c1556 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -40,17 +40,13 @@ enum { struct spu_context_ops; struct spu_gang; -/* - * This is the state for spu utilization reporting to userspace. - * Because this state is visible to userspace it must never change and needs - * to be kept strictly separate from any internal state kept by the kernel. - */ -enum spuctx_execution_state { - SPUCTX_UTIL_USER = 0, - SPUCTX_UTIL_SYSTEM, - SPUCTX_UTIL_IOWAIT, - SPUCTX_UTIL_LOADED, - SPUCTX_UTIL_MAX +enum { + SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ +}; + +/* ctx->sched_flags */ +enum { + SPU_SCHED_NOTIFY_ACTIVE, }; struct spu_context { @@ -89,6 +85,8 @@ struct spu_context { struct list_head gang_list; struct spu_gang *gang; + struct kref *prof_priv_kref; + void ( * prof_priv_release) (struct kref *kref); /* owner thread */ pid_t tid; @@ -104,9 +102,9 @@ struct spu_context { /* statistics */ struct { /* updates protected by ctx->state_mutex */ - enum spuctx_execution_state execution_state; - unsigned long tstamp; /* time of last ctx switch */ - unsigned long times[SPUCTX_UTIL_MAX]; + enum spu_utilization_state util_state; + unsigned long long tstamp; /* time of last state switch */ + unsigned long long times[SPU_UTIL_MAX]; unsigned long long vol_ctx_switch; unsigned long long invol_ctx_switch; unsigned long long min_flt; @@ -118,6 +116,10 @@ struct spu_context { unsigned long long class2_intr_base; /* # at last ctx switch */ unsigned long long libassist; } stats; + + struct list_head aff_list; + int aff_head; + int aff_offset; }; struct spu_gang { @@ -125,8 +127,19 @@ struct spu_gang { struct mutex mutex; struct kref kref; int contexts; + + struct spu_context *aff_ref_ctx; + struct list_head aff_list_head; + struct mutex aff_mutex; + int aff_flags; + struct spu *aff_ref_spu; + atomic_t aff_sched_count; }; +/* Flag bits for spu_gang aff_flags */ +#define AFF_OFFSETS_SET 1 +#define AFF_MERGED 2 + struct mfc_dma_command { int32_t pad; /* reserved */ uint32_t lsa; /* local storage address */ @@ -190,10 +203,9 @@ extern struct tree_descr spufs_dir_contents[]; extern struct tree_descr spufs_dir_nosched_contents[]; /* system call implementation */ -long spufs_run_spu(struct file *file, - struct spu_context *ctx, u32 *npc, u32 *status); -long spufs_create(struct nameidata *nd, - unsigned int flags, mode_t mode); +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); +long spufs_create(struct nameidata *nd, unsigned int flags, + mode_t mode, struct file *filp); extern const struct file_operations spufs_context_fops; /* gang management */ @@ -206,6 +218,9 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); /* fault handling */ int spufs_handle_class1(struct spu_context *ctx); +/* affinity */ +struct spu *affinity_check(struct spu_context *ctx); + /* context management */ extern atomic_t nr_spu_contexts; static inline void spu_acquire(struct spu_context *ctx) @@ -227,15 +242,17 @@ void spu_unmap_mappings(struct spu_context *ctx); void spu_forget(struct spu_context *ctx); int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); void spu_acquire_saved(struct spu_context *ctx); +void spu_release_saved(struct spu_context *ctx); int spu_activate(struct spu_context *ctx, unsigned long flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); +void spu_switch_notify(struct spu *spu, struct spu_context *ctx); void spu_set_timeslice(struct spu_context *ctx); void spu_update_sched_info(struct spu_context *ctx); void __spu_update_sched_info(struct spu_context *ctx); int __init spu_sched_init(void); -void __exit spu_sched_exit(void); +void spu_sched_exit(void); extern char *isolated_loader; @@ -293,30 +310,34 @@ extern int spufs_coredump_num_notes; * line. */ static inline void spuctx_switch_state(struct spu_context *ctx, - enum spuctx_execution_state new_state) + enum spu_utilization_state new_state) { - WARN_ON(!mutex_is_locked(&ctx->state_mutex)); - - if (ctx->stats.execution_state != new_state) { - unsigned long curtime = jiffies; - - ctx->stats.times[ctx->stats.execution_state] += - curtime - ctx->stats.tstamp; - ctx->stats.tstamp = curtime; - ctx->stats.execution_state = new_state; - } -} + unsigned long long curtime; + signed long long delta; + struct timespec ts; + struct spu *spu; + enum spu_utilization_state old_state; -static inline void spu_switch_state(struct spu *spu, - enum spuctx_execution_state new_state) -{ - if (spu->stats.utilization_state != new_state) { - unsigned long curtime = jiffies; + ktime_get_ts(&ts); + curtime = timespec_to_ns(&ts); + delta = curtime - ctx->stats.tstamp; - spu->stats.times[spu->stats.utilization_state] += - curtime - spu->stats.tstamp; + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); + WARN_ON(delta < 0); + + spu = ctx->spu; + old_state = ctx->stats.util_state; + ctx->stats.util_state = new_state; + ctx->stats.tstamp = curtime; + + /* + * Update the physical SPU utilization statistics. + */ + if (spu) { + ctx->stats.times[old_state] += delta; + spu->stats.times[old_state] += delta; + spu->stats.util_state = new_state; spu->stats.tstamp = curtime; - spu->stats.utilization_state = new_state; } } diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 9c506ba08cd..27ffdae98e5 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -180,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) case MFC_CNTL_SUSPEND_COMPLETE: if (csa) { csa->priv2.mfc_control_RW = - in_be64(&priv2->mfc_control_RW) | + MFC_CNTL_SUSPEND_MASK | MFC_CNTL_SUSPEND_DMA_QUEUE; } break; @@ -190,9 +190,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == MFC_CNTL_SUSPEND_COMPLETE); if (csa) { - csa->priv2.mfc_control_RW = - in_be64(&priv2->mfc_control_RW) & - ~MFC_CNTL_SUSPEND_DMA_QUEUE; + csa->priv2.mfc_control_RW = 0; } break; } @@ -251,16 +249,8 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu) * Read MFC_CNTL[Ds]. Update saved copy of * CSA.MFC_CNTL[Ds]. */ - if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) { - csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; - csa->suspend_time = get_cycles(); - out_be64(&priv2->spu_chnlcntptr_RW, 7ULL); - eieio(); - csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW); - eieio(); - } else { - csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; - } + csa->priv2.mfc_control_RW |= + in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING; } static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) @@ -271,7 +261,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) * Write MFC_CNTL[Dh] set to a '1' to halt * the decrementer. */ - out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED); + out_be64(&priv2->mfc_control_RW, + MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK); eieio(); } @@ -615,7 +606,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu) static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; int i; /* Save, Step 42: @@ -626,7 +617,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); /* Save the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -983,13 +974,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu) */ } -static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +static inline void suspend_mfc_and_halt_decr(struct spu_state *csa, + struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; /* Restore, Step 7: - * Restore, Step 47. - * Write MFC_Cntl[Dh,Sc]='1','1' to suspend + * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend * the queue and halt the decrementer. */ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | @@ -1090,7 +1081,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; u64 idx; int i; @@ -1102,7 +1093,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) out_be64(&priv2->spu_chnldata_RW, 0UL); /* Reset the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -1289,7 +1280,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu) cycles_t resume_time = get_cycles(); cycles_t delta_time = resume_time - csa->suspend_time; + csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING; + if (csa->lscsa->decr.slot[0] < delta_time) { + csa->lscsa->decr_status.slot[0] |= + SPU_DECR_STATUS_WRAPPED; + } + csa->lscsa->decr.slot[0] -= delta_time; + } else { + csa->lscsa->decr_status.slot[0] = 0; } } @@ -1398,6 +1397,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu) send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); } +static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 47. + * Write MFC_Cntl[Sc,Sm]='1','0' to suspend + * the queue. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); + eieio(); +} + static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) { /* Restore, Step 49: @@ -1548,10 +1559,10 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) * "wrapped" flag is set, OR in a '1' to * CSA.SPU_Event_Status[Tm]. */ - if (csa->lscsa->decr_status.slot[0] == 1) { + if (csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) { csa->spu_chnldata_RW[0] |= 0x20; } - if ((csa->lscsa->decr_status.slot[0] == 1) && + if ((csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) && (csa->spu_chnlcnt_RW[0] == 0 && ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) && ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) { @@ -1562,18 +1573,13 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; int i; /* Restore, Step 59: + * Restore the following CH: [0,3,4,24,25,27] */ - - /* Restore CH 1 without count */ - out_be64(&priv2->spu_chnlcntptr_RW, 1); - out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]); - - /* Restore the following CH: [0,3,4,24,25,27] */ - for (i = 0; i < 7; i++) { + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { idx = ch_indices[i]; out_be64(&priv2->spu_chnlcntptr_RW, idx); eieio(); @@ -1932,7 +1938,7 @@ static void harvest(struct spu_state *prev, struct spu *spu) set_switch_pending(prev, spu); /* Step 5. */ stop_spu_isolate(spu); /* NEW. */ remove_other_spu_access(prev, spu); /* Step 6. */ - suspend_mfc(prev, spu); /* Step 7. */ + suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */ wait_suspend_mfc_complete(prev, spu); /* Step 8. */ if (!suspend_spe(prev, spu)) /* Step 9. */ clear_spu_status(prev, spu); /* Step 10. */ diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 8e37bdf4dfd..43f0fb88abb 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp, goto out; i = SPUFS_I(filp->f_path.dentry->d_inode); - ret = spufs_run_spu(filp, i->i_ctx, &npc, &status); + ret = spufs_run_spu(i->i_ctx, &npc, &status); if (put_user(npc, unpc)) ret = -EFAULT; @@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) } #endif -asmlinkage long sys_spu_create(const char __user *pathname, - unsigned int flags, mode_t mode) +asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags, + mode_t mode, struct file *neighbor) { char *tmp; int ret; @@ -90,7 +90,7 @@ asmlinkage long sys_spu_create(const char __user *pathname, ret = path_lookup(tmp, LOOKUP_PARENT| LOOKUP_OPEN|LOOKUP_CREATE, &nd); if (!ret) { - ret = spufs_create(&nd, flags, mode); + ret = spufs_create(&nd, flags, mode, neighbor); path_release(&nd); } putname(tmp); @@ -99,8 +99,32 @@ asmlinkage long sys_spu_create(const char __user *pathname, return ret; } +#ifndef MODULE +asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags, + mode_t mode, int neighbor_fd) +{ + int fput_needed; + struct file *neighbor; + long ret; + + if (flags & SPU_CREATE_AFFINITY_SPU) { + ret = -EBADF; + neighbor = fget_light(neighbor_fd, &fput_needed); + if (neighbor) { + ret = do_spu_create(pathname, flags, mode, neighbor); + fput_light(neighbor, fput_needed); + } + } + else { + ret = do_spu_create(pathname, flags, mode, NULL); + } + + return ret; +} +#endif + struct spufs_calls spufs_calls = { - .create_thread = sys_spu_create, + .create_thread = do_spu_create, .spu_run = do_spu_run, .owner = THIS_MODULE, }; |