aboutsummaryrefslogtreecommitdiff
path: root/arch/sparc64
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-09 23:28:26 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-09 23:28:26 -0800
commitbfc1de0c40a26c6daa46c297e28138aecb4c5664 (patch)
tree5ac390e4c790076fda0644dd8b583ca819051905 /arch/sparc64
parent1712a699ab32d4952fe6b0f97af91b8230bece98 (diff)
parente88bb41595ad67a8e7d5dd8c7bbeea2e66cc0cac (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6: (24 commits) [SPARC]: Add solaris/sunos binary support to feature removal schedule. [SPARC]: Merge asm-sparc{,64}/a.out.h [SPARC]: Merge asm-sparc{,64}/fb.h [SPARC]: Merge asm-sparc{,64}/errno.h [SPARC]: Merge asm-sparc{,64}/emergency-restart.h [SPARC]: Merge asm-sparc{,64}/div64.h [SPARC]: Merge asm-sparc{,64}/device.h [SPARC]: Merge asm-sparc{,64}/current.h [SPARC]: Merge asm-sparc{,64}/cputime.h [SPARC]: Merge asm-sparc{,64}/cache.h [SPARC]: Merge asm-sparc{,64}/byteorder.h [SPARC]: Merge asm-sparc{,64}/bugs.h [SPARC]: Merge asm-sparc{,64}/bug.h [SPARC]: Kill BSD errno translation table and header files. [SPARC]: Merge asm-sparc{,64}/bpp.h [SPARC]: Merge include/asm-sparc{,64}/auxvec.h [SPARC]: Merge include/asm-sparc{,64}/of_device.h [SPARC]: Merge include/asm-sparc{,64}/prom.h [SPARC]: Remove of_platform_device_create [SPARC64]: Add kretprobe support. ...
Diffstat (limited to 'arch/sparc64')
-rw-r--r--arch/sparc64/Kconfig4
-rw-r--r--arch/sparc64/kernel/iommu.c352
-rw-r--r--arch/sparc64/kernel/iommu_common.h33
-rw-r--r--arch/sparc64/kernel/kprobes.c113
-rw-r--r--arch/sparc64/kernel/of_device.c26
-rw-r--r--arch/sparc64/kernel/pci_sun4v.c278
6 files changed, 509 insertions, 297 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 81075563731..3af378ddb6a 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -41,6 +41,10 @@ config MMU
bool
default y
+config IOMMU_HELPER
+ bool
+ default y
+
config QUICKLIST
bool
default y
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index 5623a4d59df..d3276ebcfb4 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -1,6 +1,6 @@
/* iommu.c: Generic sparc64 IOMMU support.
*
- * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net)
* Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com)
*/
@@ -10,6 +10,7 @@
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/errno.h>
+#include <linux/iommu-helper.h>
#ifdef CONFIG_PCI
#include <linux/pci.h>
@@ -41,7 +42,7 @@
"i" (ASI_PHYS_BYPASS_EC_E))
/* Must be invoked under the IOMMU lock. */
-static void __iommu_flushall(struct iommu *iommu)
+static void iommu_flushall(struct iommu *iommu)
{
if (iommu->iommu_flushinv) {
iommu_write(iommu->iommu_flushinv, ~(u64)0);
@@ -83,54 +84,91 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
iopte_val(*iopte) = val;
}
-/* Based largely upon the ppc64 iommu allocator. */
-static long arena_alloc(struct iommu *iommu, unsigned long npages)
+/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle'
+ * facility it must all be done in one pass while under the iommu lock.
+ *
+ * On sun4u platforms, we only flush the IOMMU once every time we've passed
+ * over the entire page table doing allocations. Therefore we only ever advance
+ * the hint and cannot backtrack it.
+ */
+unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu *iommu,
+ unsigned long npages,
+ unsigned long *handle)
{
+ unsigned long n, end, start, limit, boundary_size;
struct iommu_arena *arena = &iommu->arena;
- unsigned long n, i, start, end, limit;
- int pass;
+ int pass = 0;
+
+ /* This allocator was derived from x86_64's bit string search */
+
+ /* Sanity check */
+ if (unlikely(npages == 0)) {
+ if (printk_ratelimit())
+ WARN_ON(1);
+ return DMA_ERROR_CODE;
+ }
+
+ if (handle && *handle)
+ start = *handle;
+ else
+ start = arena->hint;
limit = arena->limit;
- start = arena->hint;
- pass = 0;
-again:
- n = find_next_zero_bit(arena->map, limit, start);
- end = n + npages;
- if (unlikely(end >= limit)) {
+ /* The case below can happen if we have a small segment appended
+ * to a large, or when the previous alloc was at the very end of
+ * the available space. If so, go back to the beginning and flush.
+ */
+ if (start >= limit) {
+ start = 0;
+ if (iommu->flush_all)
+ iommu->flush_all(iommu);
+ }
+
+ again:
+
+ if (dev)
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ 1 << IO_PAGE_SHIFT);
+ else
+ boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT);
+
+ n = iommu_area_alloc(arena->map, limit, start, npages, 0,
+ boundary_size >> IO_PAGE_SHIFT, 0);
+ if (n == -1) {
if (likely(pass < 1)) {
- limit = start;
+ /* First failure, rescan from the beginning. */
start = 0;
- __iommu_flushall(iommu);
+ if (iommu->flush_all)
+ iommu->flush_all(iommu);
pass++;
goto again;
} else {
- /* Scanned the whole thing, give up. */
- return -1;
- }
- }
-
- for (i = n; i < end; i++) {
- if (test_bit(i, arena->map)) {
- start = i + 1;
- goto again;
+ /* Second failure, give up */
+ return DMA_ERROR_CODE;
}
}
- for (i = n; i < end; i++)
- __set_bit(i, arena->map);
+ end = n + npages;
arena->hint = end;
+ /* Update handle for SG allocations */
+ if (handle)
+ *handle = end;
+
return n;
}
-static void arena_free(struct iommu_arena *arena, unsigned long base, unsigned long npages)
+void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages)
{
- unsigned long i;
+ struct iommu_arena *arena = &iommu->arena;
+ unsigned long entry;
- for (i = base; i < (base + npages); i++)
- __clear_bit(i, arena->map);
+ entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
+
+ iommu_area_free(arena->map, entry, npages);
}
int iommu_table_init(struct iommu *iommu, int tsbsize,
@@ -156,6 +194,9 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
}
iommu->arena.limit = num_tsb_entries;
+ if (tlb_type != hypervisor)
+ iommu->flush_all = iommu_flushall;
+
/* Allocate and initialize the dummy page which we
* set inactive IO PTEs to point to.
*/
@@ -192,22 +233,18 @@ out_free_map:
return -ENOMEM;
}
-static inline iopte_t *alloc_npages(struct iommu *iommu, unsigned long npages)
+static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
+ unsigned long npages)
{
- long entry;
+ unsigned long entry;
- entry = arena_alloc(iommu, npages);
- if (unlikely(entry < 0))
+ entry = iommu_range_alloc(dev, iommu, npages, NULL);
+ if (unlikely(entry == DMA_ERROR_CODE))
return NULL;
return iommu->page_table + entry;
}
-static inline void free_npages(struct iommu *iommu, dma_addr_t base, unsigned long npages)
-{
- arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages);
-}
-
static int iommu_alloc_ctx(struct iommu *iommu)
{
int lowest = iommu->ctx_lowest_free;
@@ -258,7 +295,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
iommu = dev->archdata.iommu;
spin_lock_irqsave(&iommu->lock, flags);
- iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT);
+ iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
spin_unlock_irqrestore(&iommu->lock, flags);
if (unlikely(iopte == NULL)) {
@@ -296,7 +333,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size,
spin_lock_irqsave(&iommu->lock, flags);
- free_npages(iommu, dvma - iommu->page_table_map_base, npages);
+ iommu_range_free(iommu, dvma, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -327,7 +364,7 @@ static dma_addr_t dma_4u_map_single(struct device *dev, void *ptr, size_t sz,
npages >>= IO_PAGE_SHIFT;
spin_lock_irqsave(&iommu->lock, flags);
- base = alloc_npages(iommu, npages);
+ base = alloc_npages(dev, iommu, npages);
ctx = 0;
if (iommu->iommu_ctxflush)
ctx = iommu_alloc_ctx(iommu);
@@ -465,7 +502,7 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
for (i = 0; i < npages; i++)
iopte_make_dummy(iommu, base + i);
- free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
+ iommu_range_free(iommu, bus_addr, npages);
iommu_free_ctx(iommu, ctx);
@@ -475,124 +512,209 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr,
static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
- unsigned long flags, ctx, i, npages, iopte_protection;
- struct scatterlist *sg;
+ struct scatterlist *s, *outs, *segstart;
+ unsigned long flags, handle, prot, ctx;
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned int max_seg_size;
+ int outcount, incount, i;
struct strbuf *strbuf;
struct iommu *iommu;
- iopte_t *base;
- u32 dma_base;
-
- /* Fast path single entry scatterlists. */
- if (nelems == 1) {
- sglist->dma_address =
- dma_4u_map_single(dev, sg_virt(sglist),
- sglist->length, direction);
- if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
- return 0;
- sglist->dma_length = sglist->length;
- return 1;
- }
+
+ BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
strbuf = dev->archdata.stc;
-
- if (unlikely(direction == DMA_NONE))
- goto bad_no_ctx;
-
- npages = calc_npages(sglist, nelems);
+ if (nelems == 0 || !iommu)
+ return 0;
spin_lock_irqsave(&iommu->lock, flags);
- base = alloc_npages(iommu, npages);
ctx = 0;
if (iommu->iommu_ctxflush)
ctx = iommu_alloc_ctx(iommu);
- spin_unlock_irqrestore(&iommu->lock, flags);
-
- if (base == NULL)
- goto bad;
-
- dma_base = iommu->page_table_map_base +
- ((base - iommu->page_table) << IO_PAGE_SHIFT);
-
if (strbuf->strbuf_enabled)
- iopte_protection = IOPTE_STREAMING(ctx);
+ prot = IOPTE_STREAMING(ctx);
else
- iopte_protection = IOPTE_CONSISTENT(ctx);
+ prot = IOPTE_CONSISTENT(ctx);
if (direction != DMA_TO_DEVICE)
- iopte_protection |= IOPTE_WRITE;
-
- for_each_sg(sglist, sg, nelems, i) {
- unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg);
- unsigned long slen = sg->length;
- unsigned long this_npages;
+ prot |= IOPTE_WRITE;
+
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
+
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
+
+ max_seg_size = dma_get_max_seg_size(dev);
+ for_each_sg(sglist, s, nelems, i) {
+ unsigned long paddr, npages, entry, slen;
+ iopte_t *base;
+
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+ npages = iommu_num_pages(paddr, slen);
+ entry = iommu_range_alloc(dev, iommu, npages, &handle);
+
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
+ " npages %lx\n", iommu, paddr, npages);
+ goto iommu_map_failed;
+ }
- this_npages = iommu_num_pages(paddr, slen);
+ base = iommu->page_table + entry;
- sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK);
- sg->dma_length = slen;
+ /* Convert entry to a dma_addr_t */
+ dma_addr = iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT);
+ dma_addr |= (s->offset & ~IO_PAGE_MASK);
+ /* Insert into HW table */
paddr &= IO_PAGE_MASK;
- while (this_npages--) {
- iopte_val(*base) = iopte_protection | paddr;
-
+ while (npages--) {
+ iopte_val(*base) = prot | paddr;
base++;
paddr += IO_PAGE_SIZE;
- dma_base += IO_PAGE_SIZE;
}
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if ((dma_addr != dma_next) ||
+ (outs->dma_length + s->length > max_seg_size)) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++;
+ outs = sg_next(outs);
+ } else {
+ outs->dma_length += s->length;
+ }
+ }
+
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
+ }
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
}
- return nelems;
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
+ if (outcount < incount) {
+ outs = sg_next(outs);
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+
+ return outcount;
+
+iommu_map_failed:
+ for_each_sg(sglist, s, nelems, i) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages, entry, i;
+ iopte_t *base;
+
+ vaddr = s->dma_address & IO_PAGE_MASK;
+ npages = iommu_num_pages(s->dma_address, s->dma_length);
+ iommu_range_free(iommu, vaddr, npages);
+
+ entry = (vaddr - iommu->page_table_map_base)
+ >> IO_PAGE_SHIFT;
+ base = iommu->page_table + entry;
+
+ for (i = 0; i < npages; i++)
+ iopte_make_dummy(iommu, base + i);
+
+ s->dma_address = DMA_ERROR_CODE;
+ s->dma_length = 0;
+ }
+ if (s == outs)
+ break;
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
-bad:
- iommu_free_ctx(iommu, ctx);
-bad_no_ctx:
- if (printk_ratelimit())
- WARN_ON(1);
return 0;
}
+/* If contexts are being used, they are the same in all of the mappings
+ * we make for a particular SG.
+ */
+static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
+{
+ unsigned long ctx = 0;
+
+ if (iommu->iommu_ctxflush) {
+ iopte_t *base;
+ u32 bus_addr;
+
+ bus_addr = sg->dma_address & IO_PAGE_MASK;
+ base = iommu->page_table +
+ ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+
+ ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+ }
+ return ctx;
+}
+
static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
- unsigned long flags, ctx, i, npages;
+ unsigned long flags, ctx;
+ struct scatterlist *sg;
struct strbuf *strbuf;
struct iommu *iommu;
- iopte_t *base;
- u32 bus_addr;
- if (unlikely(direction == DMA_NONE)) {
- if (printk_ratelimit())
- WARN_ON(1);
- }
+ BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
strbuf = dev->archdata.stc;
- bus_addr = sglist->dma_address & IO_PAGE_MASK;
+ ctx = fetch_sg_ctx(iommu, sglist);
- npages = calc_npages(sglist, nelems);
+ spin_lock_irqsave(&iommu->lock, flags);
- base = iommu->page_table +
- ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ sg = sglist;
+ while (nelems--) {
+ dma_addr_t dma_handle = sg->dma_address;
+ unsigned int len = sg->dma_length;
+ unsigned long npages, entry;
+ iopte_t *base;
+ int i;
- spin_lock_irqsave(&iommu->lock, flags);
+ if (!len)
+ break;
+ npages = iommu_num_pages(dma_handle, len);
+ iommu_range_free(iommu, dma_handle, npages);
- /* Record the context, if any. */
- ctx = 0;
- if (iommu->iommu_ctxflush)
- ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+ entry = ((dma_handle - iommu->page_table_map_base)
+ >> IO_PAGE_SHIFT);
+ base = iommu->page_table + entry;
- /* Step 1: Kick data out of streaming buffers if necessary. */
- if (strbuf->strbuf_enabled)
- strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+ dma_handle &= IO_PAGE_MASK;
+ if (strbuf->strbuf_enabled)
+ strbuf_flush(strbuf, iommu, dma_handle, ctx,
+ npages, direction);
- /* Step 2: Clear out the TSB entries. */
- for (i = 0; i < npages; i++)
- iopte_make_dummy(iommu, base + i);
+ for (i = 0; i < npages; i++)
+ iopte_make_dummy(iommu, base + i);
- free_npages(iommu, bus_addr - iommu->page_table_map_base, npages);
+ sg = sg_next(sg);
+ }
iommu_free_ctx(iommu, ctx);
diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h
index 4b5cafa2877..0713bd58499 100644
--- a/arch/sparc64/kernel/iommu_common.h
+++ b/arch/sparc64/kernel/iommu_common.h
@@ -1,9 +1,11 @@
-/* $Id: iommu_common.h,v 1.5 2001/12/11 09:41:01 davem Exp $
- * iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
+/* iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
*
- * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1999, 2008 David S. Miller (davem@davemloft.net)
*/
+#ifndef _IOMMU_COMMON_H
+#define _IOMMU_COMMON_H
+
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/sched.h>
@@ -56,21 +58,12 @@ static inline unsigned long calc_npages(struct scatterlist *sglist, int nelems)
return npages;
}
-/* You are _strongly_ advised to enable the following debugging code
- * any time you make changes to the sg code below, run it for a while
- * with filesystems mounted read-only before buying the farm... -DaveM
- */
-#undef VERIFY_SG
-
-#ifdef VERIFY_SG
-extern void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int npages);
-#endif
-
-/* Two addresses are "virtually contiguous" if and only if:
- * 1) They are equal, or...
- * 2) They are both on a page boundary
- */
-#define VCONTIG(__X, __Y) (((__X) == (__Y)) || \
- (((__X) | (__Y)) << (64UL - PAGE_SHIFT)) == 0UL)
+extern unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu *iommu,
+ unsigned long npages,
+ unsigned long *handle);
+extern void iommu_range_free(struct iommu *iommu,
+ dma_addr_t dma_addr,
+ unsigned long npages);
-extern unsigned long prepare_sg(struct device *dev, struct scatterlist *sg, int nents);
+#endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index d94f901d321..34fc3ddd500 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -480,8 +480,117 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-/* architecture specific initialization */
-int arch_init_kprobes(void)
+/* Called with kretprobe_lock held. The value stored in the return
+ * address register is actually 2 instructions before where the
+ * callee will return to. Sequences usually look something like this
+ *
+ * call some_function <--- return register points here
+ * nop <--- call delay slot
+ * whatever <--- where callee returns to
+ *
+ * To keep trampoline_probe_handler logic simpler, we normalize the
+ * value kept in ri->ret_addr so we don't need to keep adjusting it
+ * back and forth.
+ */
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)(regs->u_regs[UREG_RETPC] + 8);
+
+ /* Replace the return addr with trampoline addr */
+ regs->u_regs[UREG_RETPC] =
+ ((unsigned long)kretprobe_trampoline) - 8;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *node, *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ spin_lock_irqsave(&kretprobe_lock, flags);
+ head = kretprobe_inst_table_head(current);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more then one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+ regs->tpc = orig_ret_address;
+ regs->tnpc = orig_ret_address + 4;
+
+ reset_current_kprobe();
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
+ preempt_enable_no_resched();
+
+ hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+void kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n"
+ "\tnop\n"
+ "\tnop\n");
+}
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
{
+ return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+ return 1;
+
return 0;
}
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index fc5c0cc793b..0fd9db95b89 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -868,29 +868,3 @@ static int __init of_debug(char *str)
}
__setup("of_debug=", of_debug);
-
-struct of_device* of_platform_device_create(struct device_node *np,
- const char *bus_id,
- struct device *parent,
- struct bus_type *bus)
-{
- struct of_device *dev;
-
- dev = kzalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return NULL;
-
- dev->dev.parent = parent;
- dev->dev.bus = bus;
- dev->dev.release = of_release_dev;
-
- strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
-
- if (of_device_register(dev) != 0) {
- kfree(dev);
- return NULL;
- }
-
- return dev;
-}
-EXPORT_SYMBOL(of_platform_device_create);
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index 61baf8dc095..ddca6c6c0b4 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -1,6 +1,6 @@
/* pci_sun4v.c: SUN4V specific PCI controller support.
*
- * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
*/
#include <linux/kernel.h>
@@ -89,6 +89,17 @@ static long iommu_batch_flush(struct iommu_batch *p)
return 0;
}
+static inline void iommu_batch_new_entry(unsigned long entry)
+{
+ struct iommu_batch *p = &__get_cpu_var(iommu_batch);
+
+ if (p->entry + p->npages == entry)
+ return;
+ if (p->entry != ~0UL)
+ iommu_batch_flush(p);
+ p->entry = entry;
+}
+
/* Interrupts must be disabled. */
static inline long iommu_batch_add(u64 phys_page)
{
@@ -113,54 +124,6 @@ static inline long iommu_batch_end(void)
return iommu_batch_flush(p);
}
-static long arena_alloc(struct iommu_arena *arena, unsigned long npages)
-{
- unsigned long n, i, start, end, limit;
- int pass;
-
- limit = arena->limit;
- start = arena->hint;
- pass = 0;
-
-again:
- n = find_next_zero_bit(arena->map, limit, start);
- end = n + npages;
- if (unlikely(end >= limit)) {
- if (likely(pass < 1)) {
- limit = start;
- start = 0;
- pass++;
- goto again;
- } else {
- /* Scanned the whole thing, give up. */
- return -1;
- }
- }
-
- for (i = n; i < end; i++) {
- if (test_bit(i, arena->map)) {
- start = i + 1;
- goto again;
- }
- }
-
- for (i = n; i < end; i++)
- __set_bit(i, arena->map);
-
- arena->hint = end;
-
- return n;
-}
-
-static void arena_free(struct iommu_arena *arena, unsigned long base,
- unsigned long npages)
-{
- unsigned long i;
-
- for (i = base; i < (base + npages); i++)
- __clear_bit(i, arena->map);
-}
-
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addrp, gfp_t gfp)
{
@@ -185,11 +148,11 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
iommu = dev->archdata.iommu;
spin_lock_irqsave(&iommu->lock, flags);
- entry = arena_alloc(&iommu->arena, npages);
+ entry = iommu_range_alloc(dev, iommu, npages, NULL);
spin_unlock_irqrestore(&iommu->lock, flags);
- if (unlikely(entry < 0L))
- goto arena_alloc_fail;
+ if (unlikely(entry == DMA_ERROR_CODE))
+ goto range_alloc_fail;
*dma_addrp = (iommu->page_table_map_base +
(entry << IO_PAGE_SHIFT));
@@ -219,10 +182,10 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
iommu_map_fail:
/* Interrupts are disabled. */
spin_lock(&iommu->lock);
- arena_free(&iommu->arena, entry, npages);
+ iommu_range_free(iommu, *dma_addrp, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
-arena_alloc_fail:
+range_alloc_fail:
free_pages(first_page, order);
return NULL;
}
@@ -243,7 +206,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
spin_lock_irqsave(&iommu->lock, flags);
- arena_free(&iommu->arena, entry, npages);
+ iommu_range_free(iommu, dvma, npages);
do {
unsigned long num;
@@ -281,10 +244,10 @@ static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz,
npages >>= IO_PAGE_SHIFT;
spin_lock_irqsave(&iommu->lock, flags);
- entry = arena_alloc(&iommu->arena, npages);
+ entry = iommu_range_alloc(dev, iommu, npages, NULL);
spin_unlock_irqrestore(&iommu->lock, flags);
- if (unlikely(entry < 0L))
+ if (unlikely(entry == DMA_ERROR_CODE))
goto bad;
bus_addr = (iommu->page_table_map_base +
@@ -319,7 +282,7 @@ bad:
iommu_map_fail:
/* Interrupts are disabled. */
spin_lock(&iommu->lock);
- arena_free(&iommu->arena, entry, npages);
+ iommu_range_free(iommu, bus_addr, npages);
spin_unlock_irqrestore(&iommu->lock, flags);
return DMA_ERROR_CODE;
@@ -350,9 +313,9 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
spin_lock_irqsave(&iommu->lock, flags);
- entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
- arena_free(&iommu->arena, entry, npages);
+ iommu_range_free(iommu, bus_addr, npages);
+ entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
do {
unsigned long num;
@@ -368,88 +331,131 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
- unsigned long flags, npages, i, prot;
- struct scatterlist *sg;
+ struct scatterlist *s, *outs, *segstart;
+ unsigned long flags, handle, prot;
+ dma_addr_t dma_next = 0, dma_addr;
+ unsigned int max_seg_size;
+ int outcount, incount, i;
struct iommu *iommu;
- long entry, err;
- u32 dma_base;
-
- /* Fast path single entry scatterlists. */
- if (nelems == 1) {
- sglist->dma_address =
- dma_4v_map_single(dev, sg_virt(sglist),
- sglist->length, direction);
- if (unlikely(sglist->dma_address == DMA_ERROR_CODE))
- return 0;
- sglist->dma_length = sglist->length;
- return 1;
- }
+ long err;
+
+ BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
+ if (nelems == 0 || !iommu)
+ return 0;
- if (unlikely(direction == DMA_NONE))
- goto bad;
-
- npages = calc_npages(sglist, nelems);
+ prot = HV_PCI_MAP_ATTR_READ;
+ if (direction != DMA_TO_DEVICE)
+ prot |= HV_PCI_MAP_ATTR_WRITE;
- spin_lock_irqsave(&iommu->lock, flags);
- entry = arena_alloc(&iommu->arena, npages);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ outs = s = segstart = &sglist[0];
+ outcount = 1;
+ incount = nelems;
+ handle = 0;
- if (unlikely(entry < 0L))
- goto bad;
+ /* Init first segment length for backout at failure */
+ outs->dma_length = 0;
- dma_base = iommu->page_table_map_base +
- (entry << IO_PAGE_SHIFT);
+ spin_lock_irqsave(&iommu->lock, flags);
- prot = HV_PCI_MAP_ATTR_READ;
- if (direction != DMA_TO_DEVICE)
- prot |= HV_PCI_MAP_ATTR_WRITE;
+ iommu_batch_start(dev, prot, ~0UL);
- local_irq_save(flags);
+ max_seg_size = dma_get_max_seg_size(dev);
+ for_each_sg(sglist, s, nelems, i) {
+ unsigned long paddr, npages, entry, slen;
- iommu_batch_start(dev, prot, entry);
+ slen = s->length;
+ /* Sanity check */
+ if (slen == 0) {
+ dma_next = 0;
+ continue;
+ }
+ /* Allocate iommu entries for that segment */
+ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+ npages = iommu_num_pages(paddr, slen);
+ entry = iommu_range_alloc(dev, iommu, npages, &handle);
- for_each_sg(sglist, sg, nelems, i) {
- unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg);
- unsigned long slen = sg->length;
- unsigned long this_npages;
+ /* Handle failure */
+ if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (printk_ratelimit())
+ printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
+ " npages %lx\n", iommu, paddr, npages);
+ goto iommu_map_failed;
+ }
- this_npages = iommu_num_pages(paddr, slen);
+ iommu_batch_new_entry(entry);
- sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK);
- sg->dma_length = slen;
+ /* Convert entry to a dma_addr_t */
+ dma_addr = iommu->page_table_map_base +
+ (entry << IO_PAGE_SHIFT);
+ dma_addr |= (s->offset & ~IO_PAGE_MASK);
+ /* Insert into HW table */
paddr &= IO_PAGE_MASK;
- while (this_npages--) {
+ while (npages--) {
err = iommu_batch_add(paddr);
- if (unlikely(err < 0L)) {
- local_irq_restore(flags);
+ if (unlikely(err < 0L))
goto iommu_map_failed;
+ paddr += IO_PAGE_SIZE;
+ }
+
+ /* If we are in an open segment, try merging */
+ if (segstart != s) {
+ /* We cannot merge if:
+ * - allocated dma_addr isn't contiguous to previous allocation
+ */
+ if ((dma_addr != dma_next) ||
+ (outs->dma_length + s->length > max_seg_size)) {
+ /* Can't merge: create a new segment */
+ segstart = s;
+ outcount++;
+ outs = sg_next(outs);
+ } else {
+ outs->dma_length += s->length;
}
+ }
- paddr += IO_PAGE_SIZE;
- dma_base += IO_PAGE_SIZE;
+ if (segstart == s) {
+ /* This is a new segment, fill entries */
+ outs->dma_address = dma_addr;
+ outs->dma_length = slen;
}
+
+ /* Calculate next page pointer for contiguous check */
+ dma_next = dma_addr + slen;
}
err = iommu_batch_end();
- local_irq_restore(flags);
-
if (unlikely(err < 0L))
goto iommu_map_failed;
- return nelems;
+ spin_unlock_irqrestore(&iommu->lock, flags);
-bad:
- if (printk_ratelimit())
- WARN_ON(1);
- return 0;
+ if (outcount < incount) {
+ outs = sg_next(outs);
+ outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_length = 0;
+ }
+
+ return outcount;
iommu_map_failed:
- spin_lock_irqsave(&iommu->lock, flags);
- arena_free(&iommu->arena, entry, npages);
+ for_each_sg(sglist, s, nelems, i) {
+ if (s->dma_length != 0) {
+ unsigned long vaddr, npages;
+
+ vaddr = s->dma_address & IO_PAGE_MASK;
+ npages = iommu_num_pages(s->dma_address, s->dma_length);
+ iommu_range_free(iommu, vaddr, npages);
+ /* XXX demap? XXX */
+ s->dma_address = DMA_ERROR_CODE;
+ s->dma_length = 0;
+ }
+ if (s == outs)
+ break;
+ }
spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
@@ -458,39 +464,43 @@ iommu_map_failed:
static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction)
{
- unsigned long flags, npages;
struct pci_pbm_info *pbm;
- u32 devhandle, bus_addr;
+ struct scatterlist *sg;
struct iommu *iommu;
- long entry;
+ unsigned long flags;
+ u32 devhandle;
- if (unlikely(direction == DMA_NONE)) {
- if (printk_ratelimit())
- WARN_ON(1);
- }
+ BUG_ON(direction == DMA_NONE);
iommu = dev->archdata.iommu;
pbm = dev->archdata.host_controller;
devhandle = pbm->devhandle;
- bus_addr = sglist->dma_address & IO_PAGE_MASK;
-
- npages = calc_npages(sglist, nelems);
-
- entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
-
spin_lock_irqsave(&iommu->lock, flags);
- arena_free(&iommu->arena, entry, npages);
-
- do {
- unsigned long num;
+ sg = sglist;
+ while (nelems--) {
+ dma_addr_t dma_handle = sg->dma_address;
+ unsigned int len = sg->dma_length;
+ unsigned long npages, entry;
+
+ if (!len)
+ break;
+ npages = iommu_num_pages(dma_handle, len);
+ iommu_range_free(iommu, dma_handle, npages);
+
+ entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
+ while (npages) {
+ unsigned long num;
+
+ num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
+ npages);
+ entry += num;
+ npages -= num;
+ }
- num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
- npages);
- entry += num;
- npages -= num;
- } while (npages != 0);
+ sg = sg_next(sg);
+ }
spin_unlock_irqrestore(&iommu->lock, flags);
}