diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-31 13:37:27 +1100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-01-31 13:37:27 +1100 |
commit | 8af03e782cae1e0a0f530ddd22301cdd12cf9dc0 (patch) | |
tree | c4af13a38bd3cc1a811a37f2358491f171052070 /arch/powerpc/platforms/cell | |
parent | 6232665040f9a23fafd9d94d4ae8d5a2dc850f65 (diff) | |
parent | 99e139126ab2e84be67969650f92eb37c12ab5cd (diff) |
Merge branch 'for-2.6.25' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
* 'for-2.6.25' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc: (454 commits)
[POWERPC] Cell IOMMU fixed mapping support
[POWERPC] Split out the ioid fetching/checking logic
[POWERPC] Add support to cell_iommu_setup_page_tables() for multiple windows
[POWERPC] Split out the IOMMU logic from cell_dma_dev_setup()
[POWERPC] Split cell_iommu_setup_hardware() into two parts
[POWERPC] Split out the logic that allocates struct iommus
[POWERPC] Allocate the hash table under 1G on cell
[POWERPC] Add set_dma_ops() to match get_dma_ops()
[POWERPC] 83xx: Clean up / convert mpc83xx board DTS files to v1 format.
[POWERPC] 85xx: Only invalidate TLB0 and TLB1
[POWERPC] 83xx: Fix typo in mpc837x compatible entries
[POWERPC] 85xx: convert sbc85* boards to use machine_device_initcall
[POWERPC] 83xx: rework platform Kconfig
[POWERPC] 85xx: rework platform Kconfig
[POWERPC] 86xx: Remove unused IRQ defines
[POWERPC] QE: Explicitly set address-cells and size cells for muram
[POWERPC] Convert StorCenter DTS file to /dts-v1/ format.
[POWERPC] 86xx: Convert all 86xx DTS files to /dts-v1/ format.
[PPC] Remove 85xx from arch/ppc
[PPC] Remove 83xx from arch/ppc
...
Diffstat (limited to 'arch/powerpc/platforms/cell')
24 files changed, 1530 insertions, 692 deletions
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 39d695cb969..c89964c6fb1 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -20,7 +20,7 @@ spu-manage-$(CONFIG_PPC_CELL_NATIVE) += spu_manage.o obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ spu_notify.o \ - spu_syscalls.o \ + spu_syscalls.o spu_fault.o \ $(spu-priv1-y) \ $(spu-manage-y) \ spufs/ diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c index 13d5a87f13b..ec7c8f45a21 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c @@ -21,8 +21,9 @@ */ #include <linux/cpufreq.h> +#include <linux/of_platform.h> + #include <asm/machdep.h> -#include <asm/of_platform.h> #include <asm/prom.h> #include <asm/cell-regs.h> #include "cbe_cpufreq.h" diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c index 6a2c1b0a9a9..69288f65314 100644 --- a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c +++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c @@ -23,7 +23,8 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/timer.h> -#include <asm/of_platform.h> +#include <linux/of_platform.h> + #include <asm/processor.h> #include <asm/prom.h> #include <asm/pmi.h> diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index 16a9b07e7b0..dbc338f187a 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -9,13 +9,13 @@ #include <linux/percpu.h> #include <linux/types.h> #include <linux/module.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> #include <asm/io.h> #include <asm/pgtable.h> #include <asm/prom.h> #include <asm/ptrace.h> -#include <asm/of_device.h> -#include <asm/of_platform.h> #include <asm/cell-regs.h> /* @@ -256,6 +256,7 @@ void __init cbe_regs_init(void) printk(KERN_ERR "cbe_regs: More BE chips than supported" "!\n"); cbe_regs_map_count--; + of_node_put(cpu); return; } map->cpu_node = cpu; diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c index 9d7c2ef940a..979d4b67efb 100644 --- a/arch/powerpc/platforms/cell/io-workarounds.c +++ b/arch/powerpc/platforms/cell/io-workarounds.c @@ -238,7 +238,7 @@ static void __init spider_pci_setup_chip(struct spider_pci_bus *bus) static void __init spider_pci_add_one(struct pci_controller *phb) { struct spider_pci_bus *bus = &spider_pci_busses[spider_pci_count]; - struct device_node *np = phb->arch_data; + struct device_node *np = phb->dn; struct resource rsrc; void __iomem *regs; @@ -309,15 +309,12 @@ static int __init spider_pci_workaround_init(void) { struct pci_controller *phb; - if (!machine_is(cell)) - return 0; - /* Find spider bridges. We assume they have been all probed * in setup_arch(). If that was to change, we would need to * update this code to cope with dynamically added busses */ list_for_each_entry(phb, &hose_list, list_node) { - struct device_node *np = phb->arch_data; + struct device_node *np = phb->dn; const char *model = of_get_property(np, "model", NULL); /* If no model property or name isn't exactly "pci", skip */ @@ -343,4 +340,4 @@ static int __init spider_pci_workaround_init(void) return 0; } -arch_initcall(spider_pci_workaround_init); +machine_arch_initcall(cell, spider_pci_workaround_init); diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index faabc3fdc13..df330666ccc 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -1,7 +1,7 @@ /* * IOMMU implementation for Cell Broadband Processor Architecture * - * (C) Copyright IBM Corporation 2006 + * (C) Copyright IBM Corporation 2006-2008 * * Author: Jeremy Kerr <jk@ozlabs.org> * @@ -26,14 +26,15 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/notifier.h> +#include <linux/of_platform.h> #include <asm/prom.h> #include <asm/iommu.h> #include <asm/machdep.h> #include <asm/pci-bridge.h> #include <asm/udbg.h> -#include <asm/of_platform.h> #include <asm/lmb.h> +#include <asm/firmware.h> #include <asm/cell-regs.h> #include "interrupt.h" @@ -305,29 +306,28 @@ static int cell_iommu_find_ioc(int nid, unsigned long *base) return -ENODEV; } -static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, unsigned long size) +static void cell_iommu_setup_page_tables(struct cbe_iommu *iommu, + unsigned long dbase, unsigned long dsize, + unsigned long fbase, unsigned long fsize) { struct page *page; - int ret, i; - unsigned long reg, segments, pages_per_segment, ptab_size, n_pte_pages; - unsigned long xlate_base; - unsigned int virq; - - if (cell_iommu_find_ioc(iommu->nid, &xlate_base)) - panic("%s: missing IOC register mappings for node %d\n", - __FUNCTION__, iommu->nid); + int i; + unsigned long reg, segments, pages_per_segment, ptab_size, stab_size, + n_pte_pages, base; - iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size); - iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset; + base = dbase; + if (fsize != 0) + base = min(fbase, dbase); - segments = size >> IO_SEGMENT_SHIFT; + segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT; pages_per_segment = 1ull << IO_PAGENO_BITS; pr_debug("%s: iommu[%d]: segments: %lu, pages per segment: %lu\n", __FUNCTION__, iommu->nid, segments, pages_per_segment); /* set up the segment table */ - page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0); + stab_size = segments * sizeof(unsigned long); + page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size)); BUG_ON(!page); iommu->stab = page_address(page); clear_page(iommu->stab); @@ -371,11 +371,25 @@ static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, unsigned long siz } pr_debug("Setting up IOMMU stab:\n"); - for (i = 0; i * (1ul << IO_SEGMENT_SHIFT) < size; i++) { + for (i = base >> IO_SEGMENT_SHIFT; i < segments; i++) { iommu->stab[i] = reg | (__pa(iommu->ptab) + n_pte_pages * IOMMU_PAGE_SIZE * i); pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]); } +} + +static void cell_iommu_enable_hardware(struct cbe_iommu *iommu) +{ + int ret; + unsigned long reg, xlate_base; + unsigned int virq; + + if (cell_iommu_find_ioc(iommu->nid, &xlate_base)) + panic("%s: missing IOC register mappings for node %d\n", + __FUNCTION__, iommu->nid); + + iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size); + iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset; /* ensure that the STEs have updated */ mb(); @@ -405,6 +419,13 @@ static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, unsigned long siz out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg); } +static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, + unsigned long base, unsigned long size) +{ + cell_iommu_setup_page_tables(iommu, base, size, 0, 0); + cell_iommu_enable_hardware(iommu); +} + #if 0/* Unused for now */ static struct iommu_window *find_window(struct cbe_iommu *iommu, unsigned long offset, unsigned long size) @@ -422,25 +443,36 @@ static struct iommu_window *find_window(struct cbe_iommu *iommu, } #endif +static inline u32 cell_iommu_get_ioid(struct device_node *np) +{ + const u32 *ioid; + + ioid = of_get_property(np, "ioid", NULL); + if (ioid == NULL) { + printk(KERN_WARNING "iommu: missing ioid for %s using 0\n", + np->full_name); + return 0; + } + + return *ioid; +} + static struct iommu_window * __init cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, unsigned long offset, unsigned long size, unsigned long pte_offset) { struct iommu_window *window; - const unsigned int *ioid; + u32 ioid; - ioid = of_get_property(np, "ioid", NULL); - if (ioid == NULL) - printk(KERN_WARNING "iommu: missing ioid for %s using 0\n", - np->full_name); + ioid = cell_iommu_get_ioid(np); window = kmalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid); BUG_ON(window == NULL); window->offset = offset; window->size = size; - window->ioid = ioid ? *ioid : 0; + window->ioid = ioid; window->iommu = iommu; window->pte_offset = pte_offset; @@ -489,16 +521,17 @@ static struct cbe_iommu *cell_iommu_for_node(int nid) return NULL; } -static void cell_dma_dev_setup(struct device *dev) +static unsigned long cell_dma_direct_offset; + +static unsigned long dma_iommu_fixed_base; +struct dma_mapping_ops dma_iommu_fixed_ops; + +static void cell_dma_dev_setup_iommu(struct device *dev) { struct iommu_window *window; struct cbe_iommu *iommu; struct dev_archdata *archdata = &dev->archdata; - /* If we run without iommu, no need to do anything */ - if (get_pci_dma_ops() == &dma_direct_ops) - return; - /* Current implementation uses the first window available in that * node's iommu. We -might- do something smarter later though it may * never be necessary @@ -515,6 +548,23 @@ static void cell_dma_dev_setup(struct device *dev) archdata->dma_data = &window->table; } +static void cell_dma_dev_setup_static(struct device *dev); + +static void cell_dma_dev_setup(struct device *dev) +{ + struct dev_archdata *archdata = &dev->archdata; + + /* Order is important here, these are not mutually exclusive */ + if (get_dma_ops(dev) == &dma_iommu_fixed_ops) + cell_dma_dev_setup_static(dev); + else if (get_pci_dma_ops() == &dma_iommu_ops) + cell_dma_dev_setup_iommu(dev); + else if (get_pci_dma_ops() == &dma_direct_ops) + archdata->dma_data = (void *)cell_dma_direct_offset; + else + BUG(); +} + static void cell_pci_dma_dev_setup(struct pci_dev *dev) { cell_dma_dev_setup(&dev->dev); @@ -560,10 +610,9 @@ static int __init cell_iommu_get_window(struct device_node *np, return 0; } -static void __init cell_iommu_init_one(struct device_node *np, unsigned long offset) +static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np) { struct cbe_iommu *iommu; - unsigned long base, size; int nid, i; /* Get node ID */ @@ -571,7 +620,7 @@ static void __init cell_iommu_init_one(struct device_node *np, unsigned long off if (nid < 0) { printk(KERN_ERR "iommu: failed to get node for %s\n", np->full_name); - return; + return NULL; } pr_debug("iommu: setting up iommu for node %d (%s)\n", nid, np->full_name); @@ -587,7 +636,7 @@ static void __init cell_iommu_init_one(struct device_node *np, unsigned long off if (cbe_nr_iommus >= NR_IOMMUS) { printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n", np->full_name); - return; + return NULL; } /* Init base fields */ @@ -598,6 +647,19 @@ static void __init cell_iommu_init_one(struct device_node *np, unsigned long off snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i); INIT_LIST_HEAD(&iommu->windows); + return iommu; +} + +static void __init cell_iommu_init_one(struct device_node *np, + unsigned long offset) +{ + struct cbe_iommu *iommu; + unsigned long base, size; + + iommu = cell_iommu_alloc(np); + if (!iommu) + return; + /* Obtain a window for it */ cell_iommu_get_window(np, &base, &size); @@ -605,7 +667,7 @@ static void __init cell_iommu_init_one(struct device_node *np, unsigned long off base, base + size - 1); /* Initialize the hardware */ - cell_iommu_setup_hardware(iommu, size); + cell_iommu_setup_hardware(iommu, base, size); /* Setup the iommu_table */ cell_iommu_setup_window(iommu, np, base, size, @@ -653,7 +715,7 @@ static int __init cell_iommu_init_disabled(void) /* If we have no Axon, we set up the spider DMA magic offset */ if (of_find_node_by_name(NULL, "axon") == NULL) - dma_direct_offset = SPIDER_DMA_OFFSET; + cell_dma_direct_offset = SPIDER_DMA_OFFSET; /* Now we need to check to see where the memory is mapped * in PCI space. We assume that all busses use the same dma @@ -687,20 +749,274 @@ static int __init cell_iommu_init_disabled(void) return -ENODEV; } - dma_direct_offset += base; + cell_dma_direct_offset += base; + + if (cell_dma_direct_offset != 0) + ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup; printk("iommu: disabled, direct DMA offset is 0x%lx\n", - dma_direct_offset); + cell_dma_direct_offset); return 0; } -static int __init cell_iommu_init(void) +/* + * Fixed IOMMU mapping support + * + * This code adds support for setting up a fixed IOMMU mapping on certain + * cell machines. For 64-bit devices this avoids the performance overhead of + * mapping and unmapping pages at runtime. 32-bit devices are unable to use + * the fixed mapping. + * + * The fixed mapping is established at boot, and maps all of physical memory + * 1:1 into device space at some offset. On machines with < 30 GB of memory + * we setup the fixed mapping immediately above the normal IOMMU window. + * + * For example a machine with 4GB of memory would end up with the normal + * IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In + * this case a 64-bit device wishing to DMA to 1GB would be told to DMA to + * 3GB, plus any offset required by firmware. The firmware offset is encoded + * in the "dma-ranges" property. + * + * On machines with 30GB or more of memory, we are unable to place the fixed + * mapping above the normal IOMMU window as we would run out of address space. + * Instead we move the normal IOMMU window to coincide with the hash page + * table, this region does not need to be part of the fixed mapping as no + * device should ever be DMA'ing to it. We then setup the fixed mapping + * from 0 to 32GB. + */ + +static u64 cell_iommu_get_fixed_address(struct device *dev) { + u64 cpu_addr, size, best_size, pci_addr = OF_BAD_ADDR; + struct device_node *tmp, *np; + const u32 *ranges = NULL; + int i, len, best; + + np = dev->archdata.of_node; + of_node_get(np); + ranges = of_get_property(np, "dma-ranges", &len); + while (!ranges && np) { + tmp = of_get_parent(np); + of_node_put(np); + np = tmp; + ranges = of_get_property(np, "dma-ranges", &len); + } + + if (!ranges) { + dev_dbg(dev, "iommu: no dma-ranges found\n"); + goto out; + } + + len /= sizeof(u32); + + /* dma-ranges format: + * 1 cell: pci space + * 2 cells: pci address + * 2 cells: parent address + * 2 cells: size + */ + for (i = 0, best = -1, best_size = 0; i < len; i += 7) { + cpu_addr = of_translate_dma_address(np, ranges +i + 3); + size = of_read_number(ranges + i + 5, 2); + + if (cpu_addr == 0 && size > best_size) { + best = i; + best_size = size; + } + } + + if (best >= 0) + pci_addr = of_read_number(ranges + best + 1, 2); + else + dev_dbg(dev, "iommu: no suitable range found!\n"); + +out: + of_node_put(np); + + return pci_addr; +} + +static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + + if (dma_mask == DMA_BIT_MASK(64)) { + if (cell_iommu_get_fixed_address(dev) == OF_BAD_ADDR) + dev_dbg(dev, "iommu: 64-bit OK, but bad addr\n"); + else { + dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + set_dma_ops(dev, &dma_iommu_fixed_ops); + cell_dma_dev_setup(dev); + } + } else { + dev_dbg(dev, "iommu: not 64-bit, using default ops\n"); + set_dma_ops(dev, get_pci_dma_ops()); + } + + *dev->dma_mask = dma_mask; + + return 0; +} + +static void cell_dma_dev_setup_static(struct device *dev) +{ + struct dev_archdata *archdata = &dev->archdata; + u64 addr; + + addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base; + archdata->dma_data = (void *)addr; + + dev_dbg(dev, "iommu: fixed addr = %lx\n", addr); +} + +static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, + struct device_node *np, unsigned long dbase, unsigned long dsize, + unsigned long fbase, unsigned long fsize) +{ + unsigned long base_pte, uaddr, *io_pte; + int i; + + dma_iommu_fixed_base = fbase; + + /* convert from bytes into page table indices */ + dbase = dbase >> IOMMU_PAGE_SHIFT; + dsize = dsize >> IOMMU_PAGE_SHIFT; + fbase = fbase >> IOMMU_PAGE_SHIFT; + fsize = fsize >> IOMMU_PAGE_SHIFT; + + pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase); + + io_pte = iommu->ptab; + base_pte = IOPTE_PP_W | IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW + | (cell_iommu_get_ioid(np) & IOPTE_IOID_Mask); + + uaddr = 0; + for (i = fbase; i < fbase + fsize; i++, uaddr += IOMMU_PAGE_SIZE) { + /* Don't touch the dynamic region */ + if (i >= dbase && i < (dbase + dsize)) { + pr_debug("iommu: static/dynamic overlap, skipping\n"); + continue; + } + io_pte[i] = base_pte | (__pa(uaddr) & IOPTE_RPN_Mask); + } + + mb(); +} + +static int __init cell_iommu_fixed_mapping_init(void) +{ + unsigned long dbase, dsize, fbase, fsize, hbase, hend; + struct cbe_iommu *iommu; struct device_node *np; - if (!machine_is(cell)) - return -ENODEV; + /* The fixed mapping is only supported on axon machines */ + np = of_find_node_by_name(NULL, "axon"); + if (!np) { + pr_debug("iommu: fixed mapping disabled, no axons found\n"); + return -1; + } + + /* The default setup is to have the fixed mapping sit after the + * dynamic region, so find the top of the largest IOMMU window + * on any axon, then add the size of RAM and that's our max value. + * If that is > 32GB we have to do other shennanigans. + */ + fbase = 0; + for_each_node_by_name(np, "axon") { + cell_iommu_get_window(np, &dbase, &dsize); + fbase = max(fbase, dbase + dsize); + } + + fbase = _ALIGN_UP(fbase, 1 << IO_SEGMENT_SHIFT); + fsize = lmb_phys_mem_size(); + + if ((fbase + fsize) <= 0x800000000) + hbase = 0; /* use the device tree window */ + else { + /* If we're over 32 GB we need to cheat. We can't map all of + * RAM with the fixed mapping, and also fit the dynamic + * region. So try to place the dynamic region where the hash + * table sits, drivers never need to DMA to it, we don't + * need a fixed mapping for that area. + */ + if (!htab_address) { + pr_debug("iommu: htab is NULL, on LPAR? Huh?\n"); + return -1; + } + hbase = __pa(htab_address); + hend = hbase + htab_size_bytes; + + /* The window must start and end on a segment boundary */ + if ((hbase != _ALIGN_UP(hbase, 1 << IO_SEGMENT_SHIFT)) || + (hend != _ALIGN_UP(hend, 1 << IO_SEGMENT_SHIFT))) { + pr_debug("iommu: hash window not segment aligned\n"); + return -1; + } + + /* Check the hash window fits inside the real DMA window */ + for_each_node_by_name(np, "axon") { + cell_iommu_get_window(np, &dbase, &dsize); + + if (hbase < dbase || (hend > (dbase + dsize))) { + pr_debug("iommu: hash window doesn't fit in" + "real DMA window\n"); + return -1; + } + } + + fbase = 0; + } + + /* Setup the dynamic regions */ + for_each_node_by_name(np, "axon") { + iommu = cell_iommu_alloc(np); + BUG_ON(!iommu); + + if (hbase == 0) + cell_iommu_get_window(np, &dbase, &dsize); + else { + dbase = hbase; + dsize = htab_size_bytes; + } + + pr_debug("iommu: setting up %d, dynamic window %lx-%lx " \ + "fixed window %lx-%lx\n", iommu->nid, dbase, + dbase + dsize, fbase, fbase + fsize); + + cell_iommu_setup_page_tables(iommu, dbase, dsize, fbase, fsize); + cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize, + fbase, fsize); + cell_iommu_enable_hardware(iommu); + cell_iommu_setup_window(iommu, np, dbase, dsize, 0); + } + + dma_iommu_fixed_ops = dma_direct_ops; + dma_iommu_fixed_ops.set_dma_mask = dma_set_mask_and_switch; + + dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch; + set_pci_dma_ops(&dma_iommu_ops); + + printk(KERN_DEBUG "IOMMU fixed mapping established.\n"); + + return 0; +} + +static int iommu_fixed_disabled; + +static int __init setup_iommu_fixed(char *str) +{ + if (strcmp(str, "off") == 0) + iommu_fixed_disabled = 1; + + return 1; +} +__setup("iommu_fixed=", setup_iommu_fixed); + +static int __init cell_iommu_init(void) +{ + struct device_node *np; /* If IOMMU is disabled or we have little enough RAM to not need * to enable it, we setup a direct mapping. @@ -717,6 +1033,9 @@ static int __init cell_iommu_init(void) ppc_md.tce_build = tce_build_cell; ppc_md.tce_free = tce_free_cell; + if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0) + goto bail; + /* Create an iommu for each /axon node. */ for_each_node_by_name(np, "axon") { if (np->parent == NULL || np->parent->parent != NULL) @@ -744,5 +1063,6 @@ static int __init cell_iommu_init(void) return 0; } -arch_initcall(cell_iommu_init); +machine_arch_initcall(cell, cell_iommu_init); +machine_arch_initcall(celleb_native, cell_iommu_init); diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c index 1ed30367888..69ed0d7f164 100644 --- a/arch/powerpc/platforms/cell/pmu.c +++ b/arch/powerpc/platforms/cell/pmu.c @@ -213,7 +213,7 @@ u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg) break; case pm_interval: - READ_SHADOW_REG(val, pm_interval); + READ_MMIO_UPPER32(val, pm_interval); break; case pm_start_stop: @@ -381,9 +381,6 @@ static int __init cbe_init_pm_irq(void) unsigned int irq; int rc, node; - if (!machine_is(cell)) - return 0; - for_each_node(node) { irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI | (node << IIC_IRQ_NODE_SHIFT)); @@ -404,7 +401,7 @@ static int __init cbe_init_pm_irq(void) return 0; } -arch_initcall(cbe_init_pm_irq); +machine_arch_initcall(cell, cbe_init_pm_irq); void cbe_sync_irq(int node) { diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 98e7ef8e6fc..e6534b519c9 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -30,6 +30,7 @@ #include <linux/console.h> #include <linux/mutex.h> #include <linux/memory_hotplug.h> +#include <linux/of_platform.h> #include <asm/mmu.h> #include <asm/processor.h> @@ -51,7 +52,6 @@ #include <asm/spu_priv1.h> #include <asm/udbg.h> #include <asm/mpic.h> -#include <asm/of_platform.h> #include <asm/cell-regs.h> #include "interrupt.h" @@ -85,9 +85,6 @@ static int __init cell_publish_devices(void) { int node; - if (!machine_is(cell)) - return 0; - /* Publish OF platform devices for southbridge IOs */ of_platform_bus_probe(NULL, NULL, NULL); @@ -101,7 +98,7 @@ static int __init cell_publish_devices(void) } return 0; } -device_initcall(cell_publish_devices); +machine_device_initcall(cell, cell_publish_devices); static void cell_mpic_cascade(unsigned int irq, struct irq_desc *desc) { diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index e4438456c86..efb3964457b 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -42,6 +42,7 @@ #include <asm/firmware.h> #include <asm/system.h> #include <asm/rtas.h> +#include <asm/cputhreads.h> #include "interrupt.h" #include <asm/udbg.h> @@ -182,7 +183,7 @@ static int smp_cell_cpu_bootable(unsigned int nr) */ if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT) && - !smt_enabled_at_boot && nr % 2 != 0) + !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) return 0; return 1; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index a0886220364..e45cfa84911 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -34,6 +34,7 @@ #include <linux/linux_logo.h> #include <asm/spu.h> #include <asm/spu_priv1.h> +#include <asm/spu_csa.h> #include <asm/xmon.h> #include <asm/prom.h> @@ -47,6 +48,13 @@ struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; EXPORT_SYMBOL_GPL(cbe_spu_info); /* + * The spufs fault-handling code needs to call force_sig_info to raise signals + * on DMA errors. Export it here to avoid general kernel-wide access to this + * function + */ +EXPORT_SYMBOL_GPL(force_sig_info); + +/* * Protects cbe_spu_info and spu->number. */ static DEFINE_SPINLOCK(spu_lock); @@ -66,6 +74,10 @@ static LIST_HEAD(spu_full_list); static DEFINE_SPINLOCK(spu_full_list_lock); static DEFINE_MUTEX(spu_full_list_mutex); +struct spu_slb { + u64 esid, vsid; +}; + void spu_invalidate_slbs(struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; @@ -114,40 +126,36 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm) } EXPORT_SYMBOL_GPL(spu_associate_mm); -static int __spu_trap_invalid_dma(struct spu *spu) +int spu_64k_pages_available(void) { - pr_debug("%s\n", __FUNCTION__); - spu->dma_callback(spu, SPE_EVENT_INVALID_DMA); - return 0; + return mmu_psize_defs[MMU_PAGE_64K].shift != 0; } +EXPORT_SYMBOL_GPL(spu_64k_pages_available); -static int __spu_trap_dma_align(struct spu *spu) +static void spu_restart_dma(struct spu *spu) { - pr_debug("%s\n", __FUNCTION__); - spu->dma_callback(spu, SPE_EVENT_DMA_ALIGNMENT); - return 0; -} + struct spu_priv2 __iomem *priv2 = spu->priv2; -static int __spu_trap_error(struct spu *spu) -{ - pr_debug("%s\n", __FUNCTION__); - spu->dma_callback(spu, SPE_EVENT_SPE_ERROR); - return 0; + if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags)) + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); } -static void spu_restart_dma(struct spu *spu) +static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) { struct spu_priv2 __iomem *priv2 = spu->priv2; - if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags)) - out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); + pr_debug("%s: adding SLB[%d] 0x%016lx 0x%016lx\n", + __func__, slbe, slb->vsid, slb->esid); + + out_be64(&priv2->slb_index_W, slbe); + out_be64(&priv2->slb_vsid_RW, slb->vsid); + out_be64(&priv2->slb_esid_RW, slb->esid); } static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) { - struct spu_priv2 __iomem *priv2 = spu->priv2; struct mm_struct *mm = spu->mm; - u64 esid, vsid, llp; + struct spu_slb slb; int psize; pr_debug("%s\n", __FUNCTION__); @@ -159,7 +167,7 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) printk("%s: invalid access during switch!\n", __func__); return 1; } - esid = (ea & ESID_MASK) | SLB_ESID_V; + slb.esid = (ea & ESID_MASK) | SLB_ESID_V; switch(REGION_ID(ea)) { case USER_REGION_ID: @@ -168,21 +176,21 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) #else psize = mm->context.user_psize; #endif - vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | - SLB_VSID_USER; + slb.vsid = (get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT) | SLB_VSID_USER; break; case VMALLOC_REGION_ID: if (ea < VMALLOC_END) psize = mmu_vmalloc_psize; else psize = mmu_io_psize; - vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | - SLB_VSID_KERNEL; + slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; break; case KERNEL_REGION_ID: psize = mmu_linear_psize; - vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | - SLB_VSID_KERNEL; + slb.vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) + << SLB_VSID_SHIFT) | SLB_VSID_KERNEL; break; default: /* Future: support kernel segments so that drivers @@ -191,11 +199,9 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) pr_debug("invalid region access at %016lx\n", ea); return 1; } - llp = mmu_psize_defs[psize].sllp; + slb.vsid |= mmu_psize_defs[psize].sllp; - out_be64(&priv2->slb_index_W, spu->slb_replace); - out_be64(&priv2->slb_vsid_RW, vsid | llp); - out_be64(&priv2->slb_esid_RW, esid); + spu_load_slb(spu, spu->slb_replace, &slb); spu->slb_replace++; if (spu->slb_replace >= 8) @@ -225,13 +231,83 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) return 1; } + spu->class_0_pending = 0; spu->dar = ea; spu->dsisr = dsisr; - mb(); + spu->stop_callback(spu); + + return 0; +} + +static void __spu_kernel_slb(void *addr, struct spu_slb *slb) +{ + unsigned long ea = (unsigned long)addr; + u64 llp; + + if (REGION_ID(ea) == KERNEL_REGION_ID) + llp = mmu_psize_defs[mmu_linear_psize].sllp; + else + llp = mmu_psize_defs[mmu_virtual_psize].sllp; + + slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | + SLB_VSID_KERNEL | llp; + slb->esid = (ea & ESID_MASK) | SLB_ESID_V; +} + +/** + * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the + * address @new_addr is present. + */ +static inline int __slb_present(struct spu_slb *slbs, int nr_slbs, + void *new_addr) +{ + unsigned long ea = (unsigned long)new_addr; + int i; + + for (i = 0; i < nr_slbs; i++) + if (!((slbs[i].esid ^ ea) & ESID_MASK)) + return 1; + return 0; } +/** + * Setup the SPU kernel SLBs, in preparation for a context save/restore. We + * need to map both the context save area, and the save/restore code. + * + * Because the lscsa and code may cross segment boundaires, we check to see + * if mappings are required for the start and end of each range. We currently + * assume that the mappings are smaller that one segment - if not, something + * is seriously wrong. + */ +void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa, + void *code, int code_size) +{ + struct spu_slb slbs[4]; + int i, nr_slbs = 0; + /* start and end addresses of both mappings */ + void *addrs[] = { + lscsa, (void *)lscsa + sizeof(*lscsa) - 1, + code, code + code_size - 1 + }; + + /* check the set of addresses, and create a new entry in the slbs array + * if there isn't already a SLB for that address */ + for (i = 0; i < ARRAY_SIZE(addrs); i++) { + if (__slb_present(slbs, nr_slbs, addrs[i])) + continue; + + __spu_kernel_slb(addrs[i], &slbs[nr_slbs]); + nr_slbs++; + } + + /* Add the set of SLBs */ + for (i = 0; i < nr_slbs; i++) + spu_load_slb(spu, i, &slbs[i]); +} +EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs); + static irqreturn_t spu_irq_class_0(int irq, void *data) { @@ -240,12 +316,13 @@ spu_irq_class_0(int irq, void *data) spu = data; + spin_lock(&spu->register_lock); mask = spu_int_mask_get(spu, 0); - stat = spu_int_stat_get(spu, 0); - stat &= mask; + stat = spu_int_stat_get(spu, 0) & mask; - spin_lock(&spu->register_lock); spu->class_0_pending |= stat; + spu->dsisr = spu_mfc_dsisr_get(spu); + spu->dar = spu_mfc_dar_get(spu); spin_unlock(&spu->register_lock); spu->stop_callback(spu); @@ -255,31 +332,6 @@ spu_irq_class_0(int irq, void *data) return IRQ_HANDLED; } -int -spu_irq_class_0_bottom(struct spu *spu) -{ - unsigned long flags; - unsigned long stat; - - spin_lock_irqsave(&spu->register_lock, flags); - stat = spu->class_0_pending; - spu->class_0_pending = 0; - - if (stat & 1) /* invalid DMA alignment */ - __spu_trap_dma_align(spu); - - if (stat & 2) /* invalid MFC DMA */ - __spu_trap_invalid_dma(spu); - - if (stat & 4) /* error on SPU */ - __spu_trap_error(spu); - - spin_unlock_irqrestore(&spu->register_lock, flags); - - return (stat & 0x7) ? -EIO : 0; -} -EXPORT_SYMBOL_GPL(spu_irq_class_0_bottom); - static irqreturn_t spu_irq_class_1(int irq, void *data) { @@ -294,24 +346,23 @@ spu_irq_class_1(int irq, void *data) stat = spu_int_stat_get(spu, 1) & mask; dar = spu_mfc_dar_get(spu); dsisr = spu_mfc_dsisr_get(spu); - if (stat & 2) /* mapping fault */ + if (stat & CLASS1_STORAGE_FAULT_INTR) spu_mfc_dsisr_set(spu, 0ul); spu_int_stat_clear(spu, 1, stat); spin_unlock(&spu->register_lock); pr_debug("%s: %lx %lx %lx %lx\n", __FUNCTION__, mask, stat, dar, dsisr); - if (stat & 1) /* segment fault */ + if (stat & CLASS1_SEGMENT_FAULT_INTR) __spu_trap_data_seg(spu, dar); - if (stat & 2) { /* mapping fault */ + if (stat & CLASS1_STORAGE_FAULT_INTR) __spu_trap_data_map(spu, dar, dsisr); - } - if (stat & 4) /* ls compare & suspend on get */ + if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_GET_INTR) ; - if (stat & 8) /* ls compare & suspend on put */ + if (stat & CLASS1_LS_COMPARE_SUSPEND_ON_PUT_INTR) ; return stat ? IRQ_HANDLED : IRQ_NONE; @@ -323,6 +374,8 @@ spu_irq_class_2(int irq, void *data) struct spu *spu; unsigned long stat; unsigned long mask; + const int mailbox_intrs = + CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR; spu = data; spin_lock(&spu->register_lock); @@ -330,31 +383,30 @@ spu_irq_class_2(int irq, void *data) mask = spu_int_mask_get(spu, 2); /* ignore interrupts we're not waiting for */ stat &= mask; - /* - * mailbox interrupts (0x1 and 0x10) are level triggered. - * mask them now before acknowledging. - */ - if (stat & 0x11) - spu_int_mask_and(spu, 2, ~(stat & 0x11)); + + /* mailbox interrupts are level triggered. mask them now before + * acknowledging */ + if (stat & mailbox_intrs) + spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs)); /* acknowledge all interrupts before the callbacks */ spu_int_stat_clear(spu, 2, stat); spin_unlock(&spu->register_lock); pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask); - if (stat & 1) /* PPC core mailbox */ + if (stat & CLASS2_MAILBOX_INTR) spu->ibox_callback(spu); - if (stat & 2) /* SPU stop-and-signal */ + if (stat & CLASS2_SPU_STOP_INTR) spu->stop_callback(spu); - if (stat & 4) /* SPU halted */ + if (stat & CLASS2_SPU_HALT_INTR) spu->stop_callback(spu); - if (stat & 8) /* DMA tag group complete */ + if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR) spu->mfc_callback(spu); - if (stat & 0x10) /* SPU mailbox threshold */ + if (stat & CLASS2_MAILBOX_THRESHOLD_INTR) spu->wbox_callback(spu); spu->stats.class2_intr++; @@ -479,13 +531,27 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); int spu_add_sysdev_attr_group(struct attribute_group *attrs) { struct spu *spu; + int rc = 0; mutex_lock(&spu_full_list_mutex); - list_for_each_entry(spu, &spu_full_list, full_list) - sysfs_create_group(&spu->sysdev.kobj, attrs); + list_for_each_entry(spu, &spu_full_list, full_list) { + rc = sysfs_create_group(&spu->sysdev.kobj, attrs); + + /* we're in trouble here, but try unwinding anyway */ + if (rc) { + printk(KERN_ERR "%s: can't create sysfs group '%s'\n", + __func__, attrs->name); + + list_for_each_entry_continue_reverse(spu, + &spu_full_list, full_list) + sysfs_remove_group(&spu->sysdev.kobj, attrs); + break; + } + } + mutex_unlock(&spu_full_list_mutex); - return 0; + return rc; } EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/platforms/cell/spu_fault.c new file mode 100644 index 00000000000..c8b1cd42905 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_fault.c @@ -0,0 +1,98 @@ +/* + * SPU mm fault handler + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2007 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * Author: Jeremy Kerr <jk@ozlabs.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/module.h> + +#include <asm/spu.h> +#include <asm/spu_csa.h> + +/* + * This ought to be kept in sync with the powerpc specific do_page_fault + * function. Currently, there are a few corner cases that we haven't had + * to handle fortunately. + */ +int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, + unsigned long dsisr, unsigned *flt) +{ + struct vm_area_struct *vma; + unsigned long is_write; + int ret; + +#if 0 + if (!IS_VALID_EA(ea)) { + return -EFAULT; + } +#endif /* XXX */ + if (mm == NULL) { + return -EFAULT; + } + if (mm->pgd == NULL) { + return -EFAULT; + } + + down_read(&mm->mmap_sem); + vma = find_vma(mm, ea); + if (!vma) + goto bad_area; + if (vma->vm_start <= ea) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, ea)) + goto bad_area; +good_area: + is_write = dsisr & MFC_DSISR_ACCESS_PUT; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (dsisr & MFC_DSISR_ACCESS_DENIED) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + ret = 0; + *flt = handle_mm_fault(mm, vma, ea, is_write); + if (unlikely(*flt & VM_FAULT_ERROR)) { + if (*flt & VM_FAULT_OOM) { + ret = -ENOMEM; + goto bad_area; + } else if (*flt & VM_FAULT_SIGBUS) { + ret = -EFAULT; + goto bad_area; + } + BUG(); + } + if (*flt & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + up_read(&mm->mmap_sem); + return ret; + +bad_area: + up_read(&mm->mmap_sem); + return -EFAULT; +} +EXPORT_SYMBOL_GPL(spu_handle_mm_fault); diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 1b010707488..d351bdebf5f 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -35,6 +35,7 @@ #include <asm/firmware.h> #include <asm/prom.h> +#include "spufs/spufs.h" #include "interrupt.h" struct device_node *spu_devnode(struct spu *spu) @@ -345,7 +346,7 @@ static int __init of_create_spu(struct spu *spu, void *data) } ret = spu_map_interrupts_old(spu, spe); if (ret) { - printk(KERN_ERR "%s: could not map interrupts", + printk(KERN_ERR "%s: could not map interrupts\n", spu->name); goto out_unmap; } @@ -369,6 +370,16 @@ static int of_destroy_spu(struct spu *spu) return 0; } +static void enable_spu_by_master_run(struct spu_context *ctx) +{ + ctx->ops->master_start(ctx); +} + +static void disable_spu_by_master_run(struct spu_context *ctx) +{ + ctx->ops->master_stop(ctx); +} + /* Hardcoded affinity idxs for qs20 */ #define QS20_SPES_PER_BE 8 static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; @@ -411,10 +422,15 @@ static void init_affinity_qs20_harcoded(void) static int of_has_vicinity(void) { - struct spu* spu; + struct device_node *dn; - spu = list_first_entry(&cbe_spu_info[0].spus, struct spu, cbe_list); - return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL; + for_each_node_by_type(dn, "spe") { + if (of_find_property(dn, "vicinity", NULL)) { + of_node_put(dn); + return 1; + } + } + return 0; } static struct spu *devnode_spu(int cbe, struct device_node *dn) @@ -525,7 +541,7 @@ static int __init init_affinity(void) if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) init_affinity_qs20_harcoded(); else - printk("No affinity configuration found"); + printk("No affinity configuration found\n"); } return 0; @@ -535,5 +551,7 @@ const struct spu_management_ops spu_management_of_ops = { .enumerate_spus = of_enumerate_spus, .create_spu = of_create_spu, .destroy_spu = of_destroy_spu, + .enable_spu = enable_spu_by_master_run, + .disable_spu = disable_spu_by_master_run, .init_affinity = init_affinity, }; diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile index 328afcf8950..d3a349fb42e 100644 --- a/arch/powerpc/platforms/cell/spufs/Makefile +++ b/arch/powerpc/platforms/cell/spufs/Makefile @@ -1,8 +1,8 @@ -obj-y += switch.o fault.o lscsa_alloc.o obj-$(CONFIG_SPU_FS) += spufs.o spufs-y += inode.o file.o context.o syscalls.o coredump.o spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o +spufs-y += switch.o fault.o lscsa_alloc.o # Rules to build switch.o with the help of SPU tool chain SPU_CROSS := spu- diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index ec01214e51e..50d98a154aa 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -106,16 +106,20 @@ static unsigned int spu_backing_mbox_stat_poll(struct spu_context *ctx, if (stat & 0xff0000) ret |= POLLIN | POLLRDNORM; else { - ctx->csa.priv1.int_stat_class0_RW &= ~0x1; - ctx->csa.priv1.int_mask_class2_RW |= 0x1; + ctx->csa.priv1.int_stat_class2_RW &= + ~CLASS2_MAILBOX_INTR; + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_INTR; } } if (events & (POLLOUT | POLLWRNORM)) { if (stat & 0x00ff00) ret = POLLOUT | POLLWRNORM; else { - ctx->csa.priv1.int_stat_class0_RW &= ~0x10; - ctx->csa.priv1.int_mask_class2_RW |= 0x10; + ctx->csa.priv1.int_stat_class2_RW &= + ~CLASS2_MAILBOX_THRESHOLD_INTR; + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR; } } spin_unlock_irq(&ctx->csa.register_lock); @@ -139,7 +143,7 @@ static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data) ret = 4; } else { /* make sure we get woken up by the interrupt */ - ctx->csa.priv1.int_mask_class2_RW |= 0x1UL; + ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR; ret = 0; } spin_unlock(&ctx->csa.register_lock); @@ -169,7 +173,8 @@ static int spu_backing_wbox_write(struct spu_context *ctx, u32 data) } else { /* make sure we get woken up by the interrupt when space becomes available */ - ctx->csa.priv1.int_mask_class2_RW |= 0x10; + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR; ret = 0; } spin_unlock(&ctx->csa.register_lock); @@ -268,6 +273,11 @@ static char *spu_backing_get_ls(struct spu_context *ctx) return ctx->csa.lscsa->ls; } +static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val) +{ + ctx->csa.priv2.spu_privcntl_RW = val; +} + static u32 spu_backing_runcntl_read(struct spu_context *ctx) { return ctx->csa.prob.spu_runcntl_RW; @@ -285,6 +295,11 @@ static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val) spin_unlock(&ctx->csa.register_lock); } +static void spu_backing_runcntl_stop(struct spu_context *ctx) +{ + spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP); +} + static void spu_backing_master_start(struct spu_context *ctx) { struct spu_state *csa = &ctx->csa; @@ -358,7 +373,7 @@ static int spu_backing_send_mfc_command(struct spu_context *ctx, static void spu_backing_restart_dma(struct spu_context *ctx) { - /* nothing to do here */ + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND; } struct spu_context_ops spu_backing_ops = { @@ -379,8 +394,10 @@ struct spu_context_ops spu_backing_ops = { .npc_write = spu_backing_npc_write, .status_read = spu_backing_status_read, .get_ls = spu_backing_get_ls, + .privcntl_write = spu_backing_privcntl_write, .runcntl_read = spu_backing_runcntl_read, .runcntl_write = spu_backing_runcntl_write, + .runcntl_stop = spu_backing_runcntl_stop, .master_start = spu_backing_master_start, .master_stop = spu_backing_master_stop, .set_mfc_query = spu_backing_set_mfc_query, diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index adf0a030d6f..133995ed5cc 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -52,6 +52,7 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) init_waitqueue_head(&ctx->wbox_wq); init_waitqueue_head(&ctx->stop_wq); init_waitqueue_head(&ctx->mfc_wq); + init_waitqueue_head(&ctx->run_wq); ctx->state = SPU_STATE_SAVED; ctx->ops = &spu_backing_ops; ctx->owner = get_task_mm(current); @@ -105,7 +106,17 @@ int put_spu_context(struct spu_context *ctx) void spu_forget(struct spu_context *ctx) { struct mm_struct *mm; - spu_acquire_saved(ctx); + + /* + * This is basically an open-coded spu_acquire_saved, except that + * we don't acquire the state mutex interruptible. + */ + mutex_lock(&ctx->state_mutex); + if (ctx->state != SPU_STATE_SAVED) { + set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); + spu_deactivate(ctx); + } + mm = ctx->owner; ctx->owner = NULL; mmput(mm); @@ -133,47 +144,23 @@ void spu_unmap_mappings(struct spu_context *ctx) } /** - * spu_acquire_runnable - lock spu contex and make sure it is in runnable state + * spu_acquire_saved - lock spu contex and make sure it is in saved state * @ctx: spu contex to lock - * - * Note: - * Returns 0 and with the context locked on success - * Returns negative error and with the context _unlocked_ on failure. */ -int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags) +int spu_acquire_saved(struct spu_context *ctx) { - int ret = -EINVAL; - - spu_acquire(ctx); - if (ctx->state == SPU_STATE_SAVED) { - /* - * Context is about to be freed, so we can't acquire it anymore. - */ - if (!ctx->owner) - goto out_unlock; - ret = spu_activate(ctx, flags); - if (ret) - goto out_unlock; - } + int ret; - return 0; - - out_unlock: - spu_release(ctx); - return ret; -} + ret = spu_acquire(ctx); + if (ret) + return ret; -/** - * spu_acquire_saved - lock spu contex and make sure it is in saved state - * @ctx: spu contex to lock - */ -void spu_acquire_saved(struct spu_context *ctx) -{ - spu_acquire(ctx); if (ctx->state != SPU_STATE_SAVED) { set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); spu_deactivate(ctx); } + + return 0; } /** diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 80f62363e1c..0c6a96b82b2 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -148,7 +148,9 @@ int spufs_coredump_extra_notes_size(void) fd = 0; while ((ctx = coredump_next_context(&fd)) != NULL) { - spu_acquire_saved(ctx); + rc = spu_acquire_saved(ctx); + if (rc) + break; rc = spufs_ctx_note_size(ctx, fd); spu_release_saved(ctx); if (rc < 0) @@ -224,7 +226,9 @@ int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset) fd = 0; while ((ctx = coredump_next_context(&fd)) != NULL) { - spu_acquire_saved(ctx); + rc = spu_acquire_saved(ctx); + if (rc) + return rc; for (j = 0; spufs_coredump_read[j].name != NULL; j++) { rc = spufs_arch_write_note(ctx, j, file, fd, foffset); diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 917eab4be48..eff4d291ba8 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -28,117 +28,71 @@ #include "spufs.h" -/* - * This ought to be kept in sync with the powerpc specific do_page_fault - * function. Currently, there are a few corner cases that we haven't had - * to handle fortunately. +/** + * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag. + * + * If the context was created with events, we just set the return event. + * Otherwise, send an appropriate signal to the process. */ -static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, - unsigned long dsisr, unsigned *flt) +static void spufs_handle_event(struct spu_context *ctx, + unsigned long ea, int type) { - struct vm_area_struct *vma; - unsigned long is_write; - int ret; + siginfo_t info; -#if 0 - if (!IS_VALID_EA(ea)) { - return -EFAULT; - } -#endif /* XXX */ - if (mm == NULL) { - return -EFAULT; - } - if (mm->pgd == NULL) { - return -EFAULT; + if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) { + ctx->event_return |= type; + wake_up_all(&ctx->stop_wq); + return; } - down_read(&mm->mmap_sem); - vma = find_vma(mm, ea); - if (!vma) - goto bad_area; - if (vma->vm_start <= ea) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (expand_stack(vma, ea)) - goto bad_area; -good_area: - is_write = dsisr & MFC_DSISR_ACCESS_PUT; - if (is_write) { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - if (dsisr & MFC_DSISR_ACCESS_DENIED) - goto bad_area; - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; + memset(&info, 0, sizeof(info)); + + switch (type) { + case SPE_EVENT_INVALID_DMA: + info.si_signo = SIGBUS; + info.si_code = BUS_OBJERR; + break; + case SPE_EVENT_SPE_DATA_STORAGE: + info.si_signo = SIGSEGV; + info.si_addr = (void __user *)ea; + info.si_code = SEGV_ACCERR; + ctx->ops->restart_dma(ctx); + break; + case SPE_EVENT_DMA_ALIGNMENT: + info.si_signo = SIGBUS; + /* DAR isn't set for an alignment fault :( */ + info.si_code = BUS_ADRALN; + break; + case SPE_EVENT_SPE_ERROR: + info.si_signo = SIGILL; + info.si_addr = (void __user *)(unsigned long) + ctx->ops->npc_read(ctx) - 4; + info.si_code = ILL_ILLOPC; + break; } - ret = 0; - *flt = handle_mm_fault(mm, vma, ea, is_write); - if (unlikely(*flt & VM_FAULT_ERROR)) { - if (*flt & VM_FAULT_OOM) { - ret = -ENOMEM; - goto bad_area; - } else if (*flt & VM_FAULT_SIGBUS) { - ret = -EFAULT; - goto bad_area; - } - BUG(); - } - if (*flt & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - up_read(&mm->mmap_sem); - return ret; -bad_area: - up_read(&mm->mmap_sem); - return -EFAULT; + if (info.si_signo) + force_sig_info(info.si_signo, &info, current); } -static void spufs_handle_dma_error(struct spu_context *ctx, - unsigned long ea, int type) +int spufs_handle_class0(struct spu_context *ctx) { - if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) { - ctx->event_return |= type; - wake_up_all(&ctx->stop_wq); - } else { - siginfo_t info; - memset(&info, 0, sizeof(info)); - - switch (type) { - case SPE_EVENT_INVALID_DMA: - info.si_signo = SIGBUS; - info.si_code = BUS_OBJERR; - break; - case SPE_EVENT_SPE_DATA_STORAGE: - info.si_signo = SIGBUS; - info.si_addr = (void __user *)ea; - info.si_code = BUS_ADRERR; - break; - case SPE_EVENT_DMA_ALIGNMENT: - info.si_signo = SIGBUS; - /* DAR isn't set for an alignment fault :( */ - info.si_code = BUS_ADRALN; - break; - case SPE_EVENT_SPE_ERROR: - info.si_signo = SIGILL; - info.si_addr = (void __user *)(unsigned long) - ctx->ops->npc_read(ctx) - 4; - info.si_code = ILL_ILLOPC; - break; - } - if (info.si_signo) - force_sig_info(info.si_signo, &info, current); - } -} + unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK; -void spufs_dma_callback(struct spu *spu, int type) -{ - spufs_handle_dma_error(spu->ctx, spu->dar, type); + if (likely(!stat)) + return 0; + + if (stat & CLASS0_DMA_ALIGNMENT_INTR) + spufs_handle_event(ctx, ctx->csa.dar, SPE_EVENT_DMA_ALIGNMENT); + + if (stat & CLASS0_INVALID_DMA_COMMAND_INTR) + spufs_handle_event(ctx, ctx->csa.dar, SPE_EVENT_INVALID_DMA); + + if (stat & CLASS0_SPU_ERROR_INTR) + spufs_handle_event(ctx, ctx->csa.dar, SPE_EVENT_SPE_ERROR); + + return -EIO; } -EXPORT_SYMBOL_GPL(spufs_dma_callback); /* * bottom half handler for page faults, we can't do this from @@ -154,7 +108,7 @@ int spufs_handle_class1(struct spu_context *ctx) u64 ea, dsisr, access; unsigned long flags; unsigned flt = 0; - int ret; + int ret, ret2; /* * dar and dsisr get passed from the registers @@ -165,16 +119,8 @@ int spufs_handle_class1(struct spu_context *ctx) * in time, we can still expect to get the same fault * the immediately after the context restore. */ - if (ctx->state == SPU_STATE_RUNNABLE) { - ea = ctx->spu->dar; - dsisr = ctx->spu->dsisr; - ctx->spu->dar= ctx->spu->dsisr = 0; - } else { - ea = ctx->csa.priv1.mfc_dar_RW; - dsisr = ctx->csa.priv1.mfc_dsisr_RW; - ctx->csa.priv1.mfc_dar_RW = 0; - ctx->csa.priv1.mfc_dsisr_RW = 0; - } + ea = ctx->csa.dar; + dsisr = ctx->csa.dsisr; if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) return 0; @@ -201,7 +147,22 @@ int spufs_handle_class1(struct spu_context *ctx) if (ret) ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt); - spu_acquire(ctx); + /* + * If spu_acquire fails due to a pending signal we just want to return + * EINTR to userspace even if that means missing the dma restart or + * updating the page fault statistics. + */ + ret2 = spu_acquire(ctx); + if (ret2) + goto out; + + /* + * Clear dsisr under ctxt lock after handling the fault, so that + * time slicing will not preempt the context while the page fault + * handler is running. Context switch code removes mappings. + */ + ctx->csa.dar = ctx->csa.dsisr = 0; + /* * If we handled the fault successfully and are in runnable * state, restart the DMA. @@ -222,9 +183,9 @@ int spufs_handle_class1(struct spu_context *ctx) if (ctx->spu) ctx->ops->restart_dma(ctx); } else - spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); + spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); + out: spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); return ret; } -EXPORT_SYMBOL_GPL(spufs_handle_class1); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index d9e56a50379..3fcd06418b0 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -40,6 +40,120 @@ #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000) +/* Simple attribute files */ +struct spufs_attr { + int (*get)(void *, u64 *); + int (*set)(void *, u64); + char get_buf[24]; /* enough to store a u64 and "\n\0" */ + char set_buf[24]; + void *data; + const char *fmt; /* format for read operation */ + struct mutex mutex; /* protects access to these buffers */ +}; + +static int spufs_attr_open(struct inode *inode, struct file *file, + int (*get)(void *, u64 *), int (*set)(void *, u64), + const char *fmt) +{ + struct spufs_attr *attr; + + attr = kmalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->get = get; + attr->set = set; + attr->data = inode->i_private; + attr->fmt = fmt; + mutex_init(&attr->mutex); + file->private_data = attr; + + return nonseekable_open(inode, file); +} + +static int spufs_attr_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static ssize_t spufs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct spufs_attr *attr; + size_t size; + ssize_t ret; + + attr = file->private_data; + if (!attr->get) + return -EACCES; + + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + + if (*ppos) { /* continued read */ + size = strlen(attr->get_buf); + } else { /* first read */ + u64 val; + ret = attr->get(attr->data, &val); + if (ret) + goto out; + + size = scnprintf(attr->get_buf, sizeof(attr->get_buf), + attr->fmt, (unsigned long long)val); + } + + ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); +out: + mutex_unlock(&attr->mutex); + return ret; +} + +static ssize_t spufs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct spufs_attr *attr; + u64 val; + size_t size; + ssize_t ret; + + attr = file->private_data; + if (!attr->set) + return -EACCES; + + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + + ret = -EFAULT; + size = min(sizeof(attr->set_buf) - 1, len); + if (copy_from_user(attr->set_buf, buf, size)) + goto out; + + ret = len; /* claim we got the whole input */ + attr->set_buf[size] = '\0'; + val = simple_strtol(attr->set_buf, NULL, 0); + attr->set(attr->data, val); +out: + mutex_unlock(&attr->mutex); + return ret; +} + +#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return spufs_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = spufs_attr_release, \ + .read = spufs_attr_read, \ + .write = spufs_attr_write, \ +}; + static int spufs_mem_open(struct inode *inode, struct file *file) @@ -84,9 +198,12 @@ spufs_mem_read(struct file *file, char __user *buffer, struct spu_context *ctx = file->private_data; ssize_t ret; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; ret = __spufs_mem_read(ctx, buffer, size, pos); spu_release(ctx); + return ret; } @@ -106,7 +223,10 @@ spufs_mem_write(struct file *file, const char __user *buffer, if (size > LS_SIZE - pos) size = LS_SIZE - pos; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; + local_store = ctx->ops->get_ls(ctx); ret = copy_from_user(local_store + pos, buffer, size); spu_release(ctx); @@ -146,7 +266,8 @@ static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma, pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n", addr0, address, offset); - spu_acquire(ctx); + if (spu_acquire(ctx)) + return NOPFN_REFAULT; if (ctx->state == SPU_STATE_SAVED) { vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) @@ -236,23 +357,32 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma, { struct spu_context *ctx = vma->vm_file->private_data; unsigned long area, offset = address - vma->vm_start; - int ret; offset += vma->vm_pgoff << PAGE_SHIFT; if (offset >= ps_size) return NOPFN_SIGBUS; - /* error here usually means a signal.. we might want to test - * the error code more precisely though + /* + * We have to wait for context to be loaded before we have + * pages to hand out to the user, but we don't want to wait + * with the mmap_sem held. + * It is possible to drop the mmap_sem here, but then we need + * to return NOPFN_REFAULT because the mappings may have + * hanged. */ - ret = spu_acquire_runnable(ctx, 0); - if (ret) + if (spu_acquire(ctx)) return NOPFN_REFAULT; - area = ctx->spu->problem_phys + ps_offs; - vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT); - spu_release(ctx); + if (ctx->state == SPU_STATE_SAVED) { + up_read(¤t->mm->mmap_sem); + spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); + down_read(¤t->mm->mmap_sem); + } else { + area = ctx->spu->problem_phys + ps_offs; + vm_insert_pfn(vma, address, (area + offset) >> PAGE_SHIFT); + } + spu_release(ctx); return NOPFN_REFAULT; } @@ -286,25 +416,32 @@ static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma) #define spufs_cntl_mmap NULL #endif /* !SPUFS_MMAP_4K */ -static u64 spufs_cntl_get(void *data) +static int spufs_cntl_get(void *data, u64 *val) { struct spu_context *ctx = data; - u64 val; + int ret; - spu_acquire(ctx); - val = ctx->ops->status_read(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; + *val = ctx->ops->status_read(ctx); spu_release(ctx); - return val; + return 0; } -static void spufs_cntl_set(void *data, u64 val) +static int spufs_cntl_set(void *data, u64 val) { struct spu_context *ctx = data; + int ret; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; ctx->ops->runcntl_write(ctx, val); spu_release(ctx); + + return 0; } static int spufs_cntl_open(struct inode *inode, struct file *file) @@ -317,7 +454,7 @@ static int spufs_cntl_open(struct inode *inode, struct file *file) if (!i->i_openers++) ctx->cntl = inode->i_mapping; mutex_unlock(&ctx->mapping_lock); - return simple_attr_open(inode, file, spufs_cntl_get, + return spufs_attr_open(inode, file, spufs_cntl_get, spufs_cntl_set, "0x%08lx"); } @@ -327,7 +464,7 @@ spufs_cntl_release(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); struct spu_context *ctx = i->i_ctx; - simple_attr_close(inode, file); + spufs_attr_release(inode, file); mutex_lock(&ctx->mapping_lock); if (!--i->i_openers) @@ -339,8 +476,8 @@ spufs_cntl_release(struct inode *inode, struct file *file) static const struct file_operations spufs_cntl_fops = { .open = spufs_cntl_open, .release = spufs_cntl_release, - .read = simple_attr_read, - .write = simple_attr_write, + .read = spufs_attr_read, + .write = spufs_attr_write, .mmap = spufs_cntl_mmap, }; @@ -368,7 +505,9 @@ spufs_regs_read(struct file *file, char __user *buffer, int ret; struct spu_context *ctx = file->private_data; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; ret = __spufs_regs_read(ctx, buffer, size, pos); spu_release_saved(ctx); return ret; @@ -387,7 +526,9 @@ spufs_regs_write(struct file *file, const char __user *buffer, return -EFBIG; *pos += size; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; ret = copy_from_user(lscsa->gprs + *pos - size, buffer, size) ? -EFAULT : size; @@ -419,7 +560,9 @@ spufs_fpcr_read(struct file *file, char __user * buffer, int ret; struct spu_context *ctx = file->private_data; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; ret = __spufs_fpcr_read(ctx, buffer, size, pos); spu_release_saved(ctx); return ret; @@ -436,10 +579,12 @@ spufs_fpcr_write(struct file *file, const char __user * buffer, size = min_t(ssize_t, sizeof(lscsa->fpcr) - *pos, size); if (size <= 0) return -EFBIG; - *pos += size; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + *pos += size; ret = copy_from_user((char *)&lscsa->fpcr + *pos - size, buffer, size) ? -EFAULT : size; @@ -486,7 +631,10 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf, udata = (void __user *)buf; - spu_acquire(ctx); + count = spu_acquire(ctx); + if (count) + return count; + for (count = 0; (count + 4) <= len; count += 4, udata++) { int ret; ret = ctx->ops->mbox_read(ctx, &mbox_data); @@ -522,12 +670,15 @@ static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + ssize_t ret; u32 mbox_stat; if (len < 4) return -EINVAL; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff; @@ -562,6 +713,9 @@ void spufs_ibox_callback(struct spu *spu) { struct spu_context *ctx = spu->ctx; + if (!ctx) + return; + wake_up_all(&ctx->ibox_wq); kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN); } @@ -593,7 +747,9 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, udata = (void __user *)buf; - spu_acquire(ctx); + count = spu_acquire(ctx); + if (count) + return count; /* wait only for the first element */ count = 0; @@ -639,7 +795,11 @@ static unsigned int spufs_ibox_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->ibox_wq, wait); - spu_acquire(ctx); + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); mask = ctx->ops->mbox_stat_poll(ctx, POLLIN | POLLRDNORM); spu_release(ctx); @@ -657,12 +817,15 @@ static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + ssize_t ret; u32 ibox_stat; if (len < 4) return -EINVAL; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff; spu_release(ctx); @@ -698,6 +861,9 @@ void spufs_wbox_callback(struct spu *spu) { struct spu_context *ctx = spu->ctx; + if (!ctx) + return; + wake_up_all(&ctx->wbox_wq); kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT); } @@ -731,7 +897,9 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, if (__get_user(wbox_data, udata)) return -EFAULT; - spu_acquire(ctx); + count = spu_acquire(ctx); + if (count) + return count; /* * make sure we can at least write one element, by waiting @@ -772,7 +940,11 @@ static unsigned int spufs_wbox_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->wbox_wq, wait); - spu_acquire(ctx); + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); mask = ctx->ops->mbox_stat_poll(ctx, POLLOUT | POLLWRNORM); spu_release(ctx); @@ -790,12 +962,15 @@ static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + ssize_t ret; u32 wbox_stat; if (len < 4) return -EINVAL; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff; spu_release(ctx); @@ -866,7 +1041,9 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf, int ret; struct spu_context *ctx = file->private_data; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; ret = __spufs_signal1_read(ctx, buf, len, pos); spu_release_saved(ctx); @@ -877,6 +1054,7 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx; + ssize_t ret; u32 data; ctx = file->private_data; @@ -887,7 +1065,9 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, if (copy_from_user(&data, buf, 4)) return -EFAULT; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; ctx->ops->signal1_write(ctx, data); spu_release(ctx); @@ -997,7 +1177,9 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf, struct spu_context *ctx = file->private_data; int ret; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; ret = __spufs_signal2_read(ctx, buf, len, pos); spu_release_saved(ctx); @@ -1008,6 +1190,7 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx; + ssize_t ret; u32 data; ctx = file->private_data; @@ -1018,7 +1201,9 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, if (copy_from_user(&data, buf, 4)) return -EFAULT; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; ctx->ops->signal2_write(ctx, data); spu_release(ctx); @@ -1086,33 +1271,42 @@ static const struct file_operations spufs_signal2_nosched_fops = { #define SPU_ATTR_ACQUIRE_SAVED 2 #define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire) \ -static u64 __##__get(void *data) \ +static int __##__get(void *data, u64 *val) \ { \ struct spu_context *ctx = data; \ - u64 ret; \ + int ret = 0; \ \ if (__acquire == SPU_ATTR_ACQUIRE) { \ - spu_acquire(ctx); \ - ret = __get(ctx); \ + ret = spu_acquire(ctx); \ + if (ret) \ + return ret; \ + *val = __get(ctx); \ spu_release(ctx); \ } else if (__acquire == SPU_ATTR_ACQUIRE_SAVED) { \ - spu_acquire_saved(ctx); \ - ret = __get(ctx); \ + ret = spu_acquire_saved(ctx); \ + if (ret) \ + return ret; \ + *val = __get(ctx); \ spu_release_saved(ctx); \ } else \ - ret = __get(ctx); \ + *val = __get(ctx); \ \ - return ret; \ + return 0; \ } \ -DEFINE_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt); +DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt); -static void spufs_signal1_type_set(void *data, u64 val) +static int spufs_signal1_type_set(void *data, u64 val) { struct spu_context *ctx = data; + int ret; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; ctx->ops->signal1_type_set(ctx, val); spu_release(ctx); + + return 0; } static u64 spufs_signal1_type_get(struct spu_context *ctx) @@ -1123,13 +1317,18 @@ DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, spufs_signal1_type_set, "%llu", SPU_ATTR_ACQUIRE); -static void spufs_signal2_type_set(void *data, u64 val) +static int spufs_signal2_type_set(void *data, u64 val) { struct spu_context *ctx = data; + int ret; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; ctx->ops->signal2_type_set(ctx, val); spu_release(ctx); + + return 0; } static u64 spufs_signal2_type_get(struct spu_context *ctx) @@ -1329,6 +1528,9 @@ void spufs_mfc_callback(struct spu *spu) { struct spu_context *ctx = spu->ctx; + if (!ctx) + return; + wake_up_all(&ctx->mfc_wq); pr_debug("%s %s\n", __FUNCTION__, spu->name); @@ -1375,12 +1577,17 @@ static ssize_t spufs_mfc_read(struct file *file, char __user *buffer, if (size != 4) goto out; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; + + ret = -EINVAL; if (file->f_flags & O_NONBLOCK) { status = ctx->ops->read_mfc_tagstatus(ctx); if (!(status & ctx->tagwait)) ret = -EAGAIN; else + /* XXX(hch): shouldn't we clear ret here? */ ctx->tagwait &= ~status; } else { ret = spufs_wait(ctx->mfc_wq, @@ -1505,7 +1712,11 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer, if (ret) goto out; - ret = spu_acquire_runnable(ctx, 0); + ret = spu_acquire(ctx); + if (ret) + goto out; + + ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); if (ret) goto out; @@ -1539,7 +1750,11 @@ static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait) poll_wait(file, &ctx->mfc_wq, wait); - spu_acquire(ctx); + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2); free_elements = ctx->ops->get_mfc_free_elements(ctx); tagstatus = ctx->ops->read_mfc_tagstatus(ctx); @@ -1562,7 +1777,9 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id) struct spu_context *ctx = file->private_data; int ret; - spu_acquire(ctx); + ret = spu_acquire(ctx); + if (ret) + return ret; #if 0 /* this currently hangs */ ret = spufs_wait(ctx->mfc_wq, @@ -1605,12 +1822,18 @@ static const struct file_operations spufs_mfc_fops = { .mmap = spufs_mfc_mmap, }; -static void spufs_npc_set(void *data, u64 val) +static int spufs_npc_set(void *data, u64 val) { struct spu_context *ctx = data; - spu_acquire(ctx); + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; ctx->ops->npc_write(ctx, val); spu_release(ctx); + + return 0; } static u64 spufs_npc_get(struct spu_context *ctx) @@ -1620,13 +1843,19 @@ static u64 spufs_npc_get(struct spu_context *ctx) DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, "0x%llx\n", SPU_ATTR_ACQUIRE); -static void spufs_decr_set(void *data, u64 val) +static int spufs_decr_set(void *data, u64 val) { struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; - spu_acquire_saved(ctx); + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; lscsa->decr.slot[0] = (u32) val; spu_release_saved(ctx); + + return 0; } static u64 spufs_decr_get(struct spu_context *ctx) @@ -1637,15 +1866,21 @@ static u64 spufs_decr_get(struct spu_context *ctx) DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED); -static void spufs_decr_status_set(void *data, u64 val) +static int spufs_decr_status_set(void *data, u64 val) { struct spu_context *ctx = data; - spu_acquire_saved(ctx); + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; if (val) ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; else ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; spu_release_saved(ctx); + + return 0; } static u64 spufs_decr_status_get(struct spu_context *ctx) @@ -1659,13 +1894,19 @@ DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, spufs_decr_status_set, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED); -static void spufs_event_mask_set(void *data, u64 val) +static int spufs_event_mask_set(void *data, u64 val) { struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; - spu_acquire_saved(ctx); + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; lscsa->event_mask.slot[0] = (u32) val; spu_release_saved(ctx); + + return 0; } static u64 spufs_event_mask_get(struct spu_context *ctx) @@ -1690,13 +1931,19 @@ static u64 spufs_event_status_get(struct spu_context *ctx) DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) -static void spufs_srr0_set(void *data, u64 val) +static int spufs_srr0_set(void *data, u64 val) { struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; - spu_acquire_saved(ctx); + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; lscsa->srr0.slot[0] = (u32) val; spu_release_saved(ctx); + + return 0; } static u64 spufs_srr0_get(struct spu_context *ctx) @@ -1727,10 +1974,12 @@ static u64 spufs_object_id_get(struct spu_context *ctx) return ctx->object_id; } -static void spufs_object_id_set(void *data, u64 id) +static int spufs_object_id_set(void *data, u64 id) { struct spu_context *ctx = data; ctx->object_id = id; + + return 0; } DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get, @@ -1777,13 +2026,13 @@ static const struct file_operations spufs_caps_fops = { static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { - u32 mbox_stat; u32 data; - mbox_stat = ctx->csa.prob.mb_stat_R; - if (mbox_stat & 0x0000ff) { - data = ctx->csa.prob.pu_mb_R; - } + /* EOF if there's no entry in the mbox */ + if (!(ctx->csa.prob.mb_stat_R & 0x0000ff)) + return 0; + + data = ctx->csa.prob.pu_mb_R; return simple_read_from_buffer(buf, len, pos, &data, sizeof data); } @@ -1797,7 +2046,9 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, if (!access_ok(VERIFY_WRITE, buf, len)) return -EFAULT; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; spin_lock(&ctx->csa.register_lock); ret = __spufs_mbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); @@ -1815,13 +2066,13 @@ static const struct file_operations spufs_mbox_info_fops = { static ssize_t __spufs_ibox_info_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { - u32 ibox_stat; u32 data; - ibox_stat = ctx->csa.prob.mb_stat_R; - if (ibox_stat & 0xff0000) { - data = ctx->csa.priv2.puint_mb_R; - } + /* EOF if there's no entry in the ibox */ + if (!(ctx->csa.prob.mb_stat_R & 0xff0000)) + return 0; + + data = ctx->csa.priv2.puint_mb_R; return simple_read_from_buffer(buf, len, pos, &data, sizeof data); } @@ -1835,7 +2086,9 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, if (!access_ok(VERIFY_WRITE, buf, len)) return -EFAULT; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; spin_lock(&ctx->csa.register_lock); ret = __spufs_ibox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); @@ -1876,7 +2129,9 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, if (!access_ok(VERIFY_WRITE, buf, len)) return -EFAULT; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; spin_lock(&ctx->csa.register_lock); ret = __spufs_wbox_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); @@ -1926,7 +2181,9 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, if (!access_ok(VERIFY_WRITE, buf, len)) return -EFAULT; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; spin_lock(&ctx->csa.register_lock); ret = __spufs_dma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); @@ -1977,7 +2234,9 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, struct spu_context *ctx = file->private_data; int ret; - spu_acquire_saved(ctx); + ret = spu_acquire_saved(ctx); + if (ret) + return ret; spin_lock(&ctx->csa.register_lock); ret = __spufs_proxydma_info_read(ctx, buf, len, pos); spin_unlock(&ctx->csa.register_lock); @@ -2066,8 +2325,12 @@ static unsigned long long spufs_class2_intrs(struct spu_context *ctx) static int spufs_show_stat(struct seq_file *s, void *private) { struct spu_context *ctx = s->private; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; - spu_acquire(ctx); seq_printf(s, "%s %llu %llu %llu %llu " "%llu %llu %llu %llu %llu %llu %llu %llu\n", ctx_state_names[ctx->stats.util_state], diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c index fc4ed1ffbd4..64f8540b832 100644 --- a/arch/powerpc/platforms/cell/spufs/hw_ops.c +++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -76,16 +76,18 @@ static unsigned int spu_hw_mbox_stat_poll(struct spu_context *ctx, if (stat & 0xff0000) ret |= POLLIN | POLLRDNORM; else { - spu_int_stat_clear(spu, 2, 0x1); - spu_int_mask_or(spu, 2, 0x1); + spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR); + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); } } if (events & (POLLOUT | POLLWRNORM)) { if (stat & 0x00ff00) ret = POLLOUT | POLLWRNORM; else { - spu_int_stat_clear(spu, 2, 0x10); - spu_int_mask_or(spu, 2, 0x10); + spu_int_stat_clear(spu, 2, + CLASS2_MAILBOX_THRESHOLD_INTR); + spu_int_mask_or(spu, 2, + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR); } } spin_unlock_irq(&spu->register_lock); @@ -106,7 +108,7 @@ static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data) ret = 4; } else { /* make sure we get woken up by the interrupt */ - spu_int_mask_or(spu, 2, 0x1); + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); ret = 0; } spin_unlock_irq(&spu->register_lock); @@ -127,7 +129,7 @@ static int spu_hw_wbox_write(struct spu_context *ctx, u32 data) } else { /* make sure we get woken up by the interrupt when space becomes available */ - spu_int_mask_or(spu, 2, 0x10); + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR); ret = 0; } spin_unlock_irq(&spu->register_lock); @@ -206,6 +208,11 @@ static char *spu_hw_get_ls(struct spu_context *ctx) return ctx->spu->local_store; } +static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val) +{ + out_be64(&ctx->spu->priv2->spu_privcntl_RW, val); +} + static u32 spu_hw_runcntl_read(struct spu_context *ctx) { return in_be32(&ctx->spu->problem->spu_runcntl_RW); @@ -215,11 +222,21 @@ static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val) { spin_lock_irq(&ctx->spu->register_lock); if (val & SPU_RUNCNTL_ISOLATE) - out_be64(&ctx->spu->priv2->spu_privcntl_RW, 4LL); + spu_hw_privcntl_write(ctx, + SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK); out_be32(&ctx->spu->problem->spu_runcntl_RW, val); spin_unlock_irq(&ctx->spu->register_lock); } +static void spu_hw_runcntl_stop(struct spu_context *ctx) +{ + spin_lock_irq(&ctx->spu->register_lock); + out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP); + while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING) + cpu_relax(); + spin_unlock_irq(&ctx->spu->register_lock); +} + static void spu_hw_master_start(struct spu_context *ctx) { struct spu *spu = ctx->spu; @@ -319,8 +336,10 @@ struct spu_context_ops spu_hw_ops = { .npc_write = spu_hw_npc_write, .status_read = spu_hw_status_read, .get_ls = spu_hw_get_ls, + .privcntl_write = spu_hw_privcntl_write, .runcntl_read = spu_hw_runcntl_read, .runcntl_write = spu_hw_runcntl_write, + .runcntl_stop = spu_hw_runcntl_stop, .master_start = spu_hw_master_start, .master_stop = spu_hw_master_stop, .set_mfc_query = spu_hw_set_mfc_query, diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c index f4b3c052dab..0e9f325c9ff 100644 --- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c +++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -28,6 +28,8 @@ #include <asm/spu_csa.h> #include <asm/mmu.h> +#include "spufs.h" + static int spu_alloc_lscsa_std(struct spu_state *csa) { struct spu_lscsa *lscsa; @@ -73,7 +75,7 @@ int spu_alloc_lscsa(struct spu_state *csa) int i, j, n_4k; /* Check availability of 64K pages */ - if (mmu_psize_defs[MMU_PAGE_64K].shift == 0) + if (!spu_64k_pages_available()) goto fail; csa->use_big_pages = 1; diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 1ce5e22ea5f..c01a09da1e5 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -15,24 +15,55 @@ void spufs_stop_callback(struct spu *spu) { struct spu_context *ctx = spu->ctx; - wake_up_all(&ctx->stop_wq); + /* + * It should be impossible to preempt a context while an exception + * is being processed, since the context switch code is specially + * coded to deal with interrupts ... But, just in case, sanity check + * the context pointer. It is OK to return doing nothing since + * the exception will be regenerated when the context is resumed. + */ + if (ctx) { + /* Copy exception arguments into module specific structure */ + ctx->csa.class_0_pending = spu->class_0_pending; + ctx->csa.dsisr = spu->dsisr; + ctx->csa.dar = spu->dar; + + /* ensure that the exception status has hit memory before a + * thread waiting on the context's stop queue is woken */ + smp_wmb(); + + wake_up_all(&ctx->stop_wq); + } + + /* Clear callback arguments from spu structure */ + spu->class_0_pending = 0; + spu->dsisr = 0; + spu->dar = 0; } -static inline int spu_stopped(struct spu_context *ctx, u32 *stat) +int spu_stopped(struct spu_context *ctx, u32 *stat) { - struct spu *spu; - u64 pte_fault; + u64 dsisr; + u32 stopped; *stat = ctx->ops->status_read(ctx); - spu = ctx->spu; - if (ctx->state != SPU_STATE_RUNNABLE || - test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) + if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) return 1; - pte_fault = spu->dsisr & - (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); - return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ? - 1 : 0; + + stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + if (*stat & stopped) + return 1; + + dsisr = ctx->csa.dsisr; + if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) + return 1; + + if (ctx->csa.class_0_pending) + return 1; + + return 0; } static int spu_setup_isolated(struct spu_context *ctx) @@ -128,34 +159,66 @@ out: static int spu_run_init(struct spu_context *ctx, u32 *npc) { + unsigned long runcntl = SPU_RUNCNTL_RUNNABLE; + int ret; + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); - if (ctx->flags & SPU_CREATE_ISOLATE) { - unsigned long runcntl; + /* + * NOSCHED is synchronous scheduling with respect to the caller. + * The caller waits for the context to be loaded. + */ + if (ctx->flags & SPU_CREATE_NOSCHED) { + if (ctx->state == SPU_STATE_SAVED) { + ret = spu_activate(ctx, 0); + if (ret) + return ret; + } + } + /* + * Apply special setup as required. + */ + if (ctx->flags & SPU_CREATE_ISOLATE) { if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) { - int ret = spu_setup_isolated(ctx); + ret = spu_setup_isolated(ctx); if (ret) return ret; } - /* if userspace has set the runcntrl register (eg, to issue an - * isolated exit), we need to re-set it here */ + /* + * If userspace has set the runcntrl register (eg, to + * issue an isolated exit), we need to re-set it here + */ runcntl = ctx->ops->runcntl_read(ctx) & (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); if (runcntl == 0) runcntl = SPU_RUNCNTL_RUNNABLE; + } + + if (ctx->flags & SPU_CREATE_NOSCHED) { + spuctx_switch_state(ctx, SPU_UTIL_USER); ctx->ops->runcntl_write(ctx, runcntl); } else { - unsigned long mode = SPU_PRIVCNTL_MODE_NORMAL; - ctx->ops->npc_write(ctx, *npc); + unsigned long privcntl; + if (test_thread_flag(TIF_SINGLESTEP)) - mode = SPU_PRIVCNTL_MODE_SINGLE_STEP; - out_be64(&ctx->spu->priv2->spu_privcntl_RW, mode); - ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); - } + privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP; + else + privcntl = SPU_PRIVCNTL_MODE_NORMAL; - spuctx_switch_state(ctx, SPU_UTIL_USER); + ctx->ops->npc_write(ctx, *npc); + ctx->ops->privcntl_write(ctx, privcntl); + ctx->ops->runcntl_write(ctx, runcntl); + + if (ctx->state == SPU_STATE_SAVED) { + ret = spu_activate(ctx, 0); + if (ret) + return ret; + } else { + spuctx_switch_state(ctx, SPU_UTIL_USER); + } + } return 0; } @@ -165,6 +228,8 @@ static int spu_run_fini(struct spu_context *ctx, u32 *npc, { int ret = 0; + spu_del_from_rq(ctx); + *status = ctx->ops->status_read(ctx); *npc = ctx->ops->npc_read(ctx); @@ -177,26 +242,6 @@ static int spu_run_fini(struct spu_context *ctx, u32 *npc, return ret; } -static int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, - u32 *status) -{ - int ret; - - ret = spu_run_fini(ctx, npc, status); - if (ret) - return ret; - - if (*status & (SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_STOPPED_BY_HALT)) - return *status; - - ret = spu_acquire_runnable(ctx, 0); - if (ret) - return ret; - - spuctx_switch_state(ctx, SPU_UTIL_USER); - return 0; -} - /* * SPU syscall restarting is tricky because we violate the basic * assumption that the signal handler is running on the interrupted @@ -247,7 +292,7 @@ static int spu_process_callback(struct spu_context *ctx) u32 ls_pointer, npc; void __iomem *ls; long spu_ret; - int ret; + int ret, ret2; /* get syscall block from local store */ npc = ctx->ops->npc_read(ctx) & ~3; @@ -269,9 +314,11 @@ static int spu_process_callback(struct spu_context *ctx) if (spu_ret <= -ERESTARTSYS) { ret = spu_handle_restartsys(ctx, &spu_ret, &npc); } - spu_acquire(ctx); + ret2 = spu_acquire(ctx); if (ret == -ERESTARTSYS) return ret; + if (ret2) + return -EINTR; } /* write result, jump over indirect pointer */ @@ -281,18 +328,6 @@ static int spu_process_callback(struct spu_context *ctx) return ret; } -static inline int spu_process_events(struct spu_context *ctx) -{ - struct spu *spu = ctx->spu; - int ret = 0; - - if (spu->class_0_pending) - ret = spu_irq_class_0_bottom(spu); - if (!ret && signal_pending(current)) - ret = -ERESTARTSYS; - return ret; -} - long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) { int ret; @@ -302,29 +337,14 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) if (mutex_lock_interruptible(&ctx->run_mutex)) return -ERESTARTSYS; - ctx->ops->master_start(ctx); + spu_enable_spu(ctx); ctx->event_return = 0; - spu_acquire(ctx); - if (ctx->state == SPU_STATE_SAVED) { - __spu_update_sched_info(ctx); - spu_set_timeslice(ctx); + ret = spu_acquire(ctx); + if (ret) + goto out_unlock; - ret = spu_activate(ctx, 0); - if (ret) { - spu_release(ctx); - goto out; - } - } else { - /* - * We have to update the scheduling priority under active_mutex - * to protect against find_victim(). - * - * No need to update the timeslice ASAP, it will get updated - * once the current one has expired. - */ - spu_update_sched_info(ctx); - } + spu_update_sched_info(ctx); ret = spu_run_init(ctx, npc); if (ret) { @@ -358,14 +378,12 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) if (ret) break; - if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) { - ret = spu_reacquire_runnable(ctx, npc, &status); - if (ret) - goto out2; - continue; - } - ret = spu_process_events(ctx); + ret = spufs_handle_class0(ctx); + if (ret) + break; + if (signal_pending(current)) + ret = -ERESTARTSYS; } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_SINGLE_STEP))); @@ -376,11 +394,10 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) ctx->stats.libassist++; - ctx->ops->master_stop(ctx); + spu_disable_spu(ctx); ret = spu_run_fini(ctx, npc, &status); spu_yield(ctx); -out2: if ((ret == 0) || ((ret == -ERESTARTSYS) && ((status & SPU_STATUS_STOPPED_BY_HALT) || @@ -401,6 +418,7 @@ out2: out: *event = ctx->event_return; +out_unlock: mutex_unlock(&ctx->run_mutex); return ret; } diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 9ad53e637ae..00d914232af 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -58,6 +58,7 @@ static unsigned long spu_avenrun[3]; static struct spu_prio_array *spu_prio; static struct task_struct *spusched_task; static struct timer_list spusched_timer; +static struct timer_list spuloadavg_timer; /* * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). @@ -105,15 +106,21 @@ void spu_set_timeslice(struct spu_context *ctx) void __spu_update_sched_info(struct spu_context *ctx) { /* - * 32-Bit assignment are atomic on powerpc, and we don't care about - * memory ordering here because retriving the controlling thread is - * per defintion racy. + * assert that the context is not on the runqueue, so it is safe + * to change its scheduling parameters. + */ + BUG_ON(!list_empty(&ctx->rq)); + + /* + * 32-Bit assignments are atomic on powerpc, and we don't care about + * memory ordering here because retrieving the controlling thread is + * per definition racy. */ ctx->tid = current->pid; /* * We do our own priority calculations, so we normally want - * ->static_prio to start with. Unfortunately thies field + * ->static_prio to start with. Unfortunately this field * contains junk for threads with a realtime scheduling * policy so we have to look at ->prio in this case. */ @@ -124,23 +131,32 @@ void __spu_update_sched_info(struct spu_context *ctx) ctx->policy = current->policy; /* - * A lot of places that don't hold list_mutex poke into - * cpus_allowed, including grab_runnable_context which - * already holds the runq_lock. So abuse runq_lock - * to protect this field aswell. + * TO DO: the context may be loaded, so we may need to activate + * it again on a different node. But it shouldn't hurt anything + * to update its parameters, because we know that the scheduler + * is not actively looking at this field, since it is not on the + * runqueue. The context will be rescheduled on the proper node + * if it is timesliced or preempted. */ - spin_lock(&spu_prio->runq_lock); ctx->cpus_allowed = current->cpus_allowed; - spin_unlock(&spu_prio->runq_lock); } void spu_update_sched_info(struct spu_context *ctx) { - int node = ctx->spu->node; + int node; - mutex_lock(&cbe_spu_info[node].list_mutex); - __spu_update_sched_info(ctx); - mutex_unlock(&cbe_spu_info[node].list_mutex); + if (ctx->state == SPU_STATE_RUNNABLE) { + node = ctx->spu->node; + + /* + * Take list_mutex to sync with find_victim(). + */ + mutex_lock(&cbe_spu_info[node].list_mutex); + __spu_update_sched_info(ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); + } else { + __spu_update_sched_info(ctx); + } } static int __node_allowed(struct spu_context *ctx, int node) @@ -174,7 +190,7 @@ void do_notify_spus_active(void) * Wake up the active spu_contexts. * * When the awakened processes see their "notify_active" flag is set, - * they will call spu_switch_notify(); + * they will call spu_switch_notify(). */ for_each_online_node(node) { struct spu *spu; @@ -221,7 +237,6 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) spu->wbox_callback = spufs_wbox_callback; spu->stop_callback = spufs_stop_callback; spu->mfc_callback = spufs_mfc_callback; - spu->dma_callback = spufs_dma_callback; mb(); spu_unmap_mappings(ctx); spu_restore(&ctx->csa, spu); @@ -409,7 +424,6 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) spu->wbox_callback = NULL; spu->stop_callback = NULL; spu->mfc_callback = NULL; - spu->dma_callback = NULL; spu_associate_mm(spu, NULL); spu->pid = 0; spu->tgid = 0; @@ -454,6 +468,13 @@ static void __spu_add_to_rq(struct spu_context *ctx) } } +static void spu_add_to_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_add_to_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + static void __spu_del_from_rq(struct spu_context *ctx) { int prio = ctx->prio; @@ -468,10 +489,24 @@ static void __spu_del_from_rq(struct spu_context *ctx) } } +void spu_del_from_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_del_from_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + static void spu_prio_wait(struct spu_context *ctx) { DEFINE_WAIT(wait); + /* + * The caller must explicitly wait for a context to be loaded + * if the nosched flag is set. If NOSCHED is not set, the caller + * queues the context and waits for an spu event or error. + */ + BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED)); + spin_lock(&spu_prio->runq_lock); prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE); if (!signal_pending(current)) { @@ -555,7 +590,7 @@ static struct spu *find_victim(struct spu_context *ctx) /* * Look for a possible preemption candidate on the local node first. * If there is no candidate look at the other nodes. This isn't - * exactly fair, but so far the whole spu schedule tries to keep + * exactly fair, but so far the whole spu scheduler tries to keep * a strong node affinity. We might want to fine-tune this in * the future. */ @@ -571,6 +606,7 @@ static struct spu *find_victim(struct spu_context *ctx) struct spu_context *tmp = spu->ctx; if (tmp && tmp->prio > ctx->prio && + !(tmp->flags & SPU_CREATE_NOSCHED) && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } @@ -582,6 +618,10 @@ static struct spu *find_victim(struct spu_context *ctx) * higher priority contexts before lower priority * ones, so this is safe until we introduce * priority inheritance schemes. + * + * XXX if the highest priority context is locked, + * this can loop a long time. Might be better to + * look at another context or give up after X retries. */ if (!mutex_trylock(&victim->state_mutex)) { victim = NULL; @@ -589,10 +629,10 @@ static struct spu *find_victim(struct spu_context *ctx) } spu = victim->spu; - if (!spu) { + if (!spu || victim->prio <= ctx->prio) { /* * This race can happen because we've dropped - * the active list mutex. No a problem, just + * the active list mutex. Not a problem, just * restart the search. */ mutex_unlock(&victim->state_mutex); @@ -607,13 +647,10 @@ static struct spu *find_victim(struct spu_context *ctx) victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; + spu_add_to_rq(victim); + mutex_unlock(&victim->state_mutex); - /* - * We need to break out of the wait loop in spu_run - * manually to ensure this context gets put on the - * runqueue again ASAP. - */ - wake_up(&victim->stop_wq); + return spu; } } @@ -621,6 +658,50 @@ static struct spu *find_victim(struct spu_context *ctx) return NULL; } +static void __spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + int node = spu->node; + int success = 0; + + spu_set_timeslice(ctx); + + mutex_lock(&cbe_spu_info[node].list_mutex); + if (spu->ctx == NULL) { + spu_bind_context(spu, ctx); + cbe_spu_info[node].nr_active++; + spu->alloc_state = SPU_USED; + success = 1; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + + if (success) + wake_up_all(&ctx->run_wq); + else + spu_add_to_rq(ctx); +} + +static void spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + /* not a candidate for interruptible because it's called either + from the scheduler thread or from spu_deactivate */ + mutex_lock(&ctx->state_mutex); + __spu_schedule(spu, ctx); + spu_release(ctx); +} + +static void spu_unschedule(struct spu *spu, struct spu_context *ctx) +{ + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + spu->alloc_state = SPU_FREE; + spu_unbind_context(spu, ctx); + ctx->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; + mutex_unlock(&cbe_spu_info[node].list_mutex); +} + /** * spu_activate - find a free spu for a context and execute it * @ctx: spu context to schedule @@ -632,39 +713,47 @@ static struct spu *find_victim(struct spu_context *ctx) */ int spu_activate(struct spu_context *ctx, unsigned long flags) { - do { - struct spu *spu; + struct spu *spu; - /* - * If there are multiple threads waiting for a single context - * only one actually binds the context while the others will - * only be able to acquire the state_mutex once the context - * already is in runnable state. - */ - if (ctx->spu) - return 0; + /* + * If there are multiple threads waiting for a single context + * only one actually binds the context while the others will + * only be able to acquire the state_mutex once the context + * already is in runnable state. + */ + if (ctx->spu) + return 0; - spu = spu_get_idle(ctx); - /* - * If this is a realtime thread we try to get it running by - * preempting a lower priority thread. - */ - if (!spu && rt_prio(ctx->prio)) - spu = find_victim(ctx); - if (spu) { - int node = spu->node; +spu_activate_top: + if (signal_pending(current)) + return -ERESTARTSYS; - mutex_lock(&cbe_spu_info[node].list_mutex); - spu_bind_context(spu, ctx); - cbe_spu_info[node].nr_active++; - mutex_unlock(&cbe_spu_info[node].list_mutex); - return 0; - } + spu = spu_get_idle(ctx); + /* + * If this is a realtime thread we try to get it running by + * preempting a lower priority thread. + */ + if (!spu && rt_prio(ctx->prio)) + spu = find_victim(ctx); + if (spu) { + unsigned long runcntl; + + runcntl = ctx->ops->runcntl_read(ctx); + __spu_schedule(spu, ctx); + if (runcntl & SPU_RUNCNTL_RUNNABLE) + spuctx_switch_state(ctx, SPU_UTIL_USER); + return 0; + } + + if (ctx->flags & SPU_CREATE_NOSCHED) { spu_prio_wait(ctx); - } while (!signal_pending(current)); + goto spu_activate_top; + } - return -ERESTARTSYS; + spu_add_to_rq(ctx); + + return 0; } /** @@ -706,21 +795,19 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - int node = spu->node; - - mutex_lock(&cbe_spu_info[node].list_mutex); - spu_unbind_context(spu, ctx); - spu->alloc_state = SPU_FREE; - cbe_spu_info[node].nr_active--; - mutex_unlock(&cbe_spu_info[node].list_mutex); - - ctx->stats.vol_ctx_switch++; - spu->stats.vol_ctx_switch++; - - if (new) - wake_up(&new->stop_wq); + spu_unschedule(spu, ctx); + if (new) { + if (new->flags & SPU_CREATE_NOSCHED) + wake_up(&new->stop_wq); + else { + spu_release(ctx); + spu_schedule(spu, new); + /* this one can't easily be made + interruptible */ + mutex_lock(&ctx->state_mutex); + } + } } - } return new != NULL; @@ -757,43 +844,38 @@ void spu_yield(struct spu_context *ctx) static noinline void spusched_tick(struct spu_context *ctx) { + struct spu_context *new = NULL; + struct spu *spu = NULL; + u32 status; + + if (spu_acquire(ctx)) + BUG(); /* a kernel thread never has signals pending */ + + if (ctx->state != SPU_STATE_RUNNABLE) + goto out; + if (spu_stopped(ctx, &status)) + goto out; if (ctx->flags & SPU_CREATE_NOSCHED) - return; + goto out; if (ctx->policy == SCHED_FIFO) - return; + goto out; if (--ctx->time_slice) - return; + goto out; - /* - * Unfortunately list_mutex ranks outside of state_mutex, so - * we have to trylock here. If we fail give the context another - * tick and try again. - */ - if (mutex_trylock(&ctx->state_mutex)) { - struct spu *spu = ctx->spu; - struct spu_context *new; - - new = grab_runnable_context(ctx->prio + 1, spu->node); - if (new) { - spu_unbind_context(spu, ctx); - ctx->stats.invol_ctx_switch++; - spu->stats.invol_ctx_switch++; - spu->alloc_state = SPU_FREE; - cbe_spu_info[spu->node].nr_active--; - wake_up(&new->stop_wq); - /* - * We need to break out of the wait loop in - * spu_run manually to ensure this context - * gets put on the runqueue again ASAP. - */ - wake_up(&ctx->stop_wq); - } - spu_set_timeslice(ctx); - mutex_unlock(&ctx->state_mutex); + spu = ctx->spu; + new = grab_runnable_context(ctx->prio + 1, spu->node); + if (new) { + spu_unschedule(spu, ctx); + spu_add_to_rq(ctx); } else { ctx->time_slice++; } +out: + spu_release(ctx); + + if (new) + spu_schedule(spu, new); } /** @@ -817,35 +899,31 @@ static unsigned long count_active_contexts(void) } /** - * spu_calc_load - given tick count, update the avenrun load estimates. - * @tick: tick count + * spu_calc_load - update the avenrun load estimates. * * No locking against reading these values from userspace, as for * the CPU loadavg code. */ -static void spu_calc_load(unsigned long ticks) +static void spu_calc_load(void) { unsigned long active_tasks; /* fixed-point */ - static int count = LOAD_FREQ; - - count -= ticks; - - if (unlikely(count < 0)) { - active_tasks = count_active_contexts() * FIXED_1; - do { - CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks); - CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks); - CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks); - count += LOAD_FREQ; - } while (count < 0); - } + + active_tasks = count_active_contexts() * FIXED_1; + CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks); + CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks); + CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks); } static void spusched_wake(unsigned long data) { mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); wake_up_process(spusched_task); - spu_calc_load(SPUSCHED_TICK); +} + +static void spuloadavg_wake(unsigned long data) +{ + mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ); + spu_calc_load(); } static int spusched_thread(void *unused) @@ -857,17 +935,58 @@ static int spusched_thread(void *unused) set_current_state(TASK_INTERRUPTIBLE); schedule(); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&cbe_spu_info[node].list_mutex); - list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) - if (spu->ctx) - spusched_tick(spu->ctx); - mutex_unlock(&cbe_spu_info[node].list_mutex); + struct mutex *mtx = &cbe_spu_info[node].list_mutex; + + mutex_lock(mtx); + list_for_each_entry(spu, &cbe_spu_info[node].spus, + cbe_list) { + struct spu_context *ctx = spu->ctx; + + if (ctx) { + mutex_unlock(mtx); + spusched_tick(ctx); + mutex_lock(mtx); + } + } + mutex_unlock(mtx); } } return 0; } +void spuctx_switch_state(struct spu_context *ctx, + enum spu_utilization_state new_state) +{ + unsigned long long curtime; + signed long long delta; + struct timespec ts; + struct spu *spu; + enum spu_utilization_state old_state; + + ktime_get_ts(&ts); + curtime = timespec_to_ns(&ts); + delta = curtime - ctx->stats.tstamp; + + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); + WARN_ON(delta < 0); + + spu = ctx->spu; + old_state = ctx->stats.util_state; + ctx->stats.util_state = new_state; + ctx->stats.tstamp = curtime; + + /* + * Update the physical SPU utilization statistics. + */ + if (spu) { + ctx->stats.times[old_state] += delta; + spu->stats.times[old_state] += delta; + spu->stats.util_state = new_state; + spu->stats.tstamp = curtime; + } +} + #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) @@ -881,7 +1000,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) /* * Note that last_pid doesn't really make much sense for the - * SPU loadavg (it even seems very odd on the CPU side..), + * SPU loadavg (it even seems very odd on the CPU side...), * but we include it here to have a 100% compatible interface. */ seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", @@ -922,6 +1041,7 @@ int __init spu_sched_init(void) spin_lock_init(&spu_prio->runq_lock); setup_timer(&spusched_timer, spusched_wake, 0); + setup_timer(&spuloadavg_timer, spuloadavg_wake, 0); spusched_task = kthread_run(spusched_thread, NULL, "spusched"); if (IS_ERR(spusched_task)) { @@ -929,6 +1049,8 @@ int __init spu_sched_init(void) goto out_free_spu_prio; } + mod_timer(&spuloadavg_timer, 0); + entry = create_proc_entry("spu_loadavg", 0, NULL); if (!entry) goto out_stop_kthread; @@ -954,6 +1076,7 @@ void spu_sched_exit(void) remove_proc_entry("spu_loadavg", NULL); del_timer_sync(&spusched_timer); + del_timer_sync(&spuloadavg_timer); kthread_stop(spusched_task); for (node = 0; node < MAX_NUMNODES; node++) { diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index ca47b991bda..0e114038ea6 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -71,6 +71,7 @@ struct spu_context { wait_queue_head_t wbox_wq; wait_queue_head_t stop_wq; wait_queue_head_t mfc_wq; + wait_queue_head_t run_wq; struct fasync_struct *ibox_fasync; struct fasync_struct *wbox_fasync; struct fasync_struct *mfc_fasync; @@ -168,8 +169,10 @@ struct spu_context_ops { void (*npc_write) (struct spu_context * ctx, u32 data); u32(*status_read) (struct spu_context * ctx); char*(*get_ls) (struct spu_context * ctx); + void (*privcntl_write) (struct spu_context *ctx, u64 data); u32 (*runcntl_read) (struct spu_context * ctx); void (*runcntl_write) (struct spu_context * ctx, u32 data); + void (*runcntl_stop) (struct spu_context * ctx); void (*master_start) (struct spu_context * ctx); void (*master_stop) (struct spu_context * ctx); int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode); @@ -219,15 +222,16 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); /* fault handling */ int spufs_handle_class1(struct spu_context *ctx); +int spufs_handle_class0(struct spu_context *ctx); /* affinity */ struct spu *affinity_check(struct spu_context *ctx); /* context management */ extern atomic_t nr_spu_contexts; -static inline void spu_acquire(struct spu_context *ctx) +static inline int __must_check spu_acquire(struct spu_context *ctx) { - mutex_lock(&ctx->state_mutex); + return mutex_lock_interruptible(&ctx->state_mutex); } static inline void spu_release(struct spu_context *ctx) @@ -242,10 +246,11 @@ int put_spu_context(struct spu_context *ctx); void spu_unmap_mappings(struct spu_context *ctx); void spu_forget(struct spu_context *ctx); -int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags); -void spu_acquire_saved(struct spu_context *ctx); +int __must_check spu_acquire_saved(struct spu_context *ctx); void spu_release_saved(struct spu_context *ctx); +int spu_stopped(struct spu_context *ctx, u32 * stat); +void spu_del_from_rq(struct spu_context *ctx); int spu_activate(struct spu_context *ctx, unsigned long flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); @@ -279,7 +284,9 @@ extern char *isolated_loader; } \ spu_release(ctx); \ schedule(); \ - spu_acquire(ctx); \ + __ret = spu_acquire(ctx); \ + if (__ret) \ + break; \ } \ finish_wait(&(wq), &__wait); \ __ret; \ @@ -306,41 +313,16 @@ struct spufs_coredump_reader { extern struct spufs_coredump_reader spufs_coredump_read[]; extern int spufs_coredump_num_notes; -/* - * This function is a little bit too large for an inline, but - * as fault.c is built into the kernel we can't move it out of - * line. - */ -static inline void spuctx_switch_state(struct spu_context *ctx, - enum spu_utilization_state new_state) -{ - unsigned long long curtime; - signed long long delta; - struct timespec ts; - struct spu *spu; - enum spu_utilization_state old_state; - - ktime_get_ts(&ts); - curtime = timespec_to_ns(&ts); - delta = curtime - ctx->stats.tstamp; - - WARN_ON(!mutex_is_locked(&ctx->state_mutex)); - WARN_ON(delta < 0); - - spu = ctx->spu; - old_state = ctx->stats.util_state; - ctx->stats.util_state = new_state; - ctx->stats.tstamp = curtime; - - /* - * Update the physical SPU utilization statistics. - */ - if (spu) { - ctx->stats.times[old_state] += delta; - spu->stats.times[old_state] += delta; - spu->stats.util_state = new_state; - spu->stats.tstamp = curtime; - } -} +extern int spu_init_csa(struct spu_state *csa); +extern void spu_fini_csa(struct spu_state *csa); +extern int spu_save(struct spu_state *prev, struct spu *spu); +extern int spu_restore(struct spu_state *new, struct spu *spu); +extern int spu_switch(struct spu_state *prev, struct spu_state *new, + struct spu *spu); +extern int spu_alloc_lscsa(struct spu_state *csa); +extern void spu_free_lscsa(struct spu_state *csa); + +extern void spuctx_switch_state(struct spu_context *ctx, + enum spu_utilization_state new_state); #endif diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 3d64c81cc6e..6063c88c26d 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -48,6 +48,8 @@ #include <asm/spu_csa.h> #include <asm/mmu_context.h> +#include "spufs.h" + #include "spu_save_dump.h" #include "spu_restore_dump.h" @@ -691,35 +693,9 @@ static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu) out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE); } -static inline void get_kernel_slb(u64 ea, u64 slb[2]) +static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu, + unsigned int *code, int code_size) { - u64 llp; - - if (REGION_ID(ea) == KERNEL_REGION_ID) - llp = mmu_psize_defs[mmu_linear_psize].sllp; - else - llp = mmu_psize_defs[mmu_virtual_psize].sllp; - slb[0] = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | - SLB_VSID_KERNEL | llp; - slb[1] = (ea & ESID_MASK) | SLB_ESID_V; -} - -static inline void load_mfc_slb(struct spu *spu, u64 slb[2], int slbe) -{ - struct spu_priv2 __iomem *priv2 = spu->priv2; - - out_be64(&priv2->slb_index_W, slbe); - eieio(); - out_be64(&priv2->slb_vsid_RW, slb[0]); - out_be64(&priv2->slb_esid_RW, slb[1]); - eieio(); -} - -static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu) -{ - u64 code_slb[2]; - u64 lscsa_slb[2]; - /* Save, Step 47: * Restore, Step 30. * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All @@ -735,11 +711,7 @@ static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu) * translation is desired by OS environment). */ spu_invalidate_slbs(spu); - get_kernel_slb((unsigned long)&spu_save_code[0], code_slb); - get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb); - load_mfc_slb(spu, code_slb, 0); - if ((lscsa_slb[0] != code_slb[0]) || (lscsa_slb[1] != code_slb[1])) - load_mfc_slb(spu, lscsa_slb, 1); + spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size); } static inline void set_switch_active(struct spu_state *csa, struct spu *spu) @@ -768,9 +740,9 @@ static inline void enable_interrupts(struct spu_state *csa, struct spu *spu) * (translation) interrupts. */ spin_lock_irq(&spu->register_lock); - spu_int_stat_clear(spu, 0, ~0ul); - spu_int_stat_clear(spu, 1, ~0ul); - spu_int_stat_clear(spu, 2, ~0ul); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); spu_int_mask_set(spu, 0, 0ul); spu_int_mask_set(spu, 1, class1_mask); spu_int_mask_set(spu, 2, 0ul); @@ -927,8 +899,8 @@ static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu) POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask); local_irq_save(flags); - spu_int_stat_clear(spu, 0, ~(0ul)); - spu_int_stat_clear(spu, 2, ~(0ul)); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); local_irq_restore(flags); } @@ -946,8 +918,8 @@ static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu) POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING); local_irq_save(flags); - spu_int_stat_clear(spu, 0, ~(0ul)); - spu_int_stat_clear(spu, 2, ~(0ul)); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); local_irq_restore(flags); } @@ -1423,9 +1395,9 @@ static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) spu_int_mask_set(spu, 0, 0ul); spu_int_mask_set(spu, 1, 0ul); spu_int_mask_set(spu, 2, 0ul); - spu_int_stat_clear(spu, 0, ~0ul); - spu_int_stat_clear(spu, 1, ~0ul); - spu_int_stat_clear(spu, 2, ~0ul); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); spin_unlock_irq(&spu->register_lock); } @@ -1866,7 +1838,8 @@ static void save_lscsa(struct spu_state *prev, struct spu *spu) */ resume_mfc_queue(prev, spu); /* Step 46. */ - setup_mfc_slbs(prev, spu); /* Step 47. */ + /* Step 47. */ + setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code)); set_switch_active(prev, spu); /* Step 48. */ enable_interrupts(prev, spu); /* Step 49. */ save_ls_16kb(prev, spu); /* Step 50. */ @@ -1971,7 +1944,8 @@ static void restore_lscsa(struct spu_state *next, struct spu *spu) setup_spu_status_part1(next, spu); /* Step 27. */ setup_spu_status_part2(next, spu); /* Step 28. */ restore_mfc_rag(next, spu); /* Step 29. */ - setup_mfc_slbs(next, spu); /* Step 30. */ + /* Step 30. */ + setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code)); set_spu_npc(next, spu); /* Step 31. */ set_signot1(next, spu); /* Step 32. */ set_signot2(next, spu); /* Step 33. */ @@ -2103,10 +2077,6 @@ int spu_save(struct spu_state *prev, struct spu *spu) int rc; acquire_spu_lock(spu); /* Step 1. */ - prev->dar = spu->dar; - prev->dsisr = spu->dsisr; - spu->dar = 0; - spu->dsisr = 0; rc = __do_spu_save(prev, spu); /* Steps 2-53. */ release_spu_lock(spu); if (rc != 0 && rc != 2 && rc != 6) { @@ -2133,9 +2103,6 @@ int spu_restore(struct spu_state *new, struct spu *spu) acquire_spu_lock(spu); harvest(NULL, spu); spu->slb_replace = 0; - new->dar = 0; - new->dsisr = 0; - spu->class_0_pending = 0; rc = __do_spu_restore(new, spu); release_spu_lock(spu); if (rc) { @@ -2215,10 +2182,8 @@ int spu_init_csa(struct spu_state *csa) return 0; } -EXPORT_SYMBOL_GPL(spu_init_csa); void spu_fini_csa(struct spu_state *csa) { spu_free_lscsa(csa); } -EXPORT_SYMBOL_GPL(spu_fini_csa); |