aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CREDITS1
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-memmap2
-rw-r--r--Documentation/PCI/PCIEBUS-HOWTO.txt2
-rw-r--r--Documentation/cgroups/cgroups.txt6
-rw-r--r--Documentation/ftrace.txt74
-rw-r--r--Documentation/hwmon/hpfall.c101
-rw-r--r--Documentation/hwmon/lis3lv02d8
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--Documentation/sysrq.txt2
-rw-r--r--Documentation/vm/kmemtrace.txt126
-rw-r--r--MAINTAINERS24
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/include/asm/hardirq.h13
-rw-r--r--arch/alpha/kernel/process.c8
-rw-r--r--arch/alpha/kernel/smp.c12
-rw-r--r--arch/avr32/include/asm/hardirq.h11
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/ftrace.h28
-rw-r--r--arch/ia64/include/asm/hardirq.h10
-rw-r--r--arch/ia64/include/asm/kvm.h4
-rw-r--r--arch/ia64/include/asm/mmzone.h4
-rw-r--r--arch/ia64/kernel/Makefile5
-rw-r--r--arch/ia64/kernel/entry.S100
-rw-r--r--arch/ia64/kernel/ftrace.c206
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c6
-rw-r--r--arch/ia64/kvm/kvm-ia64.c4
-rw-r--r--arch/ia64/kvm/process.c17
-rw-r--r--arch/ia64/mm/numa.c4
-rw-r--r--arch/powerpc/include/asm/pgtable-4k.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-64k.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc32.h3
-rw-r--r--arch/powerpc/kernel/align.c7
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/mm/numa.c5
-rw-r--r--arch/powerpc/platforms/ps3/mm.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/kvm.h7
-rw-r--r--arch/x86/include/asm/mmzone_32.h2
-rw-r--r--arch/x86/include/asm/mmzone_64.h2
-rw-r--r--arch/x86/include/asm/page.h1
-rw-r--r--arch/x86/include/asm/paravirt.h17
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c12
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/ftrace.c49
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/olpc.c2
-rw-r--r--arch/x86/kernel/paravirt.c26
-rw-r--r--arch/x86/kernel/process.c5
-rw-r--r--arch/x86/kernel/ptrace.c16
-rw-r--r--arch/x86/kernel/traps.c10
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/irq.c7
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/lapic.c66
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c9
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/mm/ioremap.c19
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/pageattr.c15
-rw-r--r--arch/x86/mm/pat.c83
-rw-r--r--block/Kconfig16
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-timeout.c9
-rw-r--r--block/bsg.c17
-rw-r--r--block/genhd.c8
-rw-r--r--crypto/lrw.c8
-rw-r--r--drivers/ata/libata-sff.c28
-rw-r--r--drivers/ata/pata_via.c4
-rw-r--r--drivers/ata/sata_nv.c14
-rw-r--r--drivers/block/aoe/aoe.h1
-rw-r--r--drivers/block/aoe/aoenet.c2
-rw-r--r--drivers/block/cciss.c215
-rw-r--r--drivers/block/floppy.c79
-rw-r--r--drivers/block/paride/pg.c2
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--drivers/dma/dmaengine.c2
-rw-r--r--drivers/dma/dw_dmac.c5
-rw-r--r--drivers/dma/dw_dmac_regs.h2
-rw-r--r--drivers/firmware/memmap.c2
-rw-r--r--drivers/gpu/drm/Kconfig13
-rw-r--r--drivers/hid/hid-core.c13
-rw-r--r--drivers/hid/hid-ids.h3
-rw-r--r--drivers/hid/hidraw.c14
-rw-r--r--drivers/hwmon/f71882fg.c4
-rw-r--r--drivers/hwmon/hp_accel.c85
-rw-r--r--drivers/hwmon/lis3lv02d.c195
-rw-r--r--drivers/hwmon/lis3lv02d.h21
-rw-r--r--drivers/hwmon/vt1211.c2
-rw-r--r--drivers/hwmon/w83627ehf.c2
-rw-r--r--drivers/md/dm-io.c2
-rw-r--r--drivers/md/dm-kcopyd.c2
-rw-r--r--drivers/md/md.c4
-rw-r--r--drivers/media/common/tuners/tuner-simple.c10
-rw-r--r--drivers/media/dvb/dvb-core/dmxdev.c16
-rw-r--r--drivers/media/dvb/dvb-core/dvb_demux.c16
-rw-r--r--drivers/media/radio/radio-si470x.c55
-rw-r--r--drivers/media/video/gspca/gspca.c5
-rw-r--r--drivers/media/video/ivtv/ivtv-ioctl.c26
-rw-r--r--drivers/mfd/htc-egpio.c4
-rw-r--r--drivers/mfd/pcf50633-core.c1
-rw-r--r--drivers/mfd/sm501.c26
-rw-r--r--drivers/mfd/twl4030-core.c2
-rw-r--r--drivers/mfd/wm8350-core.c48
-rw-r--r--drivers/mfd/wm8350-regmap.c2
-rw-r--r--drivers/mmc/card/block.c2
-rw-r--r--drivers/mmc/card/mmc_test.c2
-rw-r--r--drivers/mmc/host/atmel-mci.c5
-rw-r--r--drivers/mmc/host/omap_hsmmc.c98
-rw-r--r--drivers/mmc/host/s3cmci.c2
-rw-r--r--drivers/mmc/host/sdhci-pci.c3
-rw-r--r--drivers/mmc/host/sdhci.c7
-rw-r--r--drivers/mmc/host/sdhci.h3
-rw-r--r--drivers/oprofile/cpu_buffer.c5
-rw-r--r--drivers/pci/intel-iommu.c16
-rw-r--r--drivers/pci/msi.c10
-rw-r--r--drivers/pci/pci.c13
-rw-r--r--drivers/pci/pci.h20
-rw-r--r--drivers/pci/rom.c1
-rw-r--r--drivers/platform/x86/Kconfig2
-rw-r--r--drivers/serial/atmel_serial.c4
-rw-r--r--drivers/serial/jsm/jsm_driver.c3
-rw-r--r--drivers/spi/spi_gpio.c2
-rw-r--r--drivers/usb/core/hcd-pci.c15
-rw-r--r--drivers/usb/core/hcd.h1
-rw-r--r--drivers/usb/host/ehci-pci.c1
-rw-r--r--drivers/usb/host/ohci-pci.c1
-rw-r--r--drivers/usb/host/uhci-hcd.c1
-rw-r--r--drivers/usb/host/whci/asl.c4
-rw-r--r--drivers/usb/host/whci/pzl.c4
-rw-r--r--drivers/video/Kconfig10
-rw-r--r--drivers/watchdog/Kconfig2
-rw-r--r--drivers/watchdog/iTCO_vendor_support.c32
-rw-r--r--drivers/watchdog/iTCO_wdt.c35
-rw-r--r--fs/bio.c5
-rw-r--r--fs/btrfs/ctree.c58
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/disk-io.c46
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/extent-tree.c83
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/inode-map.c1
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/locking.c11
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/buffer.c3
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/inode.c27
-rw-r--r--fs/ext4/mballoc.c32
-rw-r--r--fs/ext4/migrate.c8
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jbd2/transaction.c42
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/notify/inotify/inotify.c2
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/seq_file.c36
-rw-r--r--fs/super.c17
-rw-r--r--fs/timerfd.c12
-rw-r--r--include/asm-frv/pgtable.h2
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blktrace_api.h6
-rw-r--r--include/linux/dmaengine.h2
-rw-r--r--include/linux/firmware-map.h2
-rw-r--r--include/linux/fs.h24
-rw-r--r--include/linux/ftrace.h99
-rw-r--r--include/linux/ftrace_irq.h2
-rw-r--r--include/linux/hardirq.h73
-rw-r--r--include/linux/jbd2.h3
-rw-r--r--include/linux/kvm.h10
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/mm.h21
-rw-r--r--include/linux/mmzone.h2
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/ring_buffer.h22
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/slab_def.h68
-rw-r--r--include/linux/slob_def.h9
-rw-r--r--include/linux/slub_def.h53
-rw-r--r--include/linux/timerfd.h16
-rw-r--r--include/linux/vmalloc.h4
-rw-r--r--include/trace/kmemtrace.h75
-rw-r--r--include/trace/power.h34
-rw-r--r--include/trace/workqueue.h25
-rw-r--r--init/main.c2
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/extable.c4
-rw-r--r--kernel/futex.c53
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/posix-cpu-timers.c60
-rw-r--r--kernel/power/Makefile2
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched.c23
-rw-r--r--kernel/softirq.c13
-rw-r--r--kernel/trace/Kconfig72
-rw-r--r--kernel/trace/Makefile5
-rw-r--r--kernel/trace/blktrace.c (renamed from block/blktrace.c)720
-rw-r--r--kernel/trace/ftrace.c959
-rw-r--r--kernel/trace/kmemtrace.c339
-rw-r--r--kernel/trace/ring_buffer.c168
-rw-r--r--kernel/trace/trace.c1533
-rw-r--r--kernel/trace/trace.h130
-rw-r--r--kernel/trace/trace_boot.c36
-rw-r--r--kernel/trace/trace_branch.c279
-rw-r--r--kernel/trace/trace_functions.c369
-rw-r--r--kernel/trace/trace_functions_graph.c379
-rw-r--r--kernel/trace/trace_hw_branches.c179
-rw-r--r--kernel/trace/trace_irqsoff.c46
-rw-r--r--kernel/trace/trace_mmiotrace.c41
-rw-r--r--kernel/trace/trace_nop.c5
-rw-r--r--kernel/trace/trace_output.c919
-rw-r--r--kernel/trace/trace_output.h62
-rw-r--r--kernel/trace/trace_power.c188
-rw-r--r--kernel/trace/trace_sched_switch.c15
-rw-r--r--kernel/trace/trace_sched_wakeup.c88
-rw-r--r--kernel/trace/trace_selftest.c85
-rw-r--r--kernel/trace/trace_stat.c319
-rw-r--r--kernel/trace/trace_stat.h31
-rw-r--r--kernel/trace/trace_sysprof.c21
-rw-r--r--kernel/trace/trace_workqueue.c281
-rw-r--r--kernel/workqueue.c16
-rw-r--r--mm/mlock.c7
-rw-r--r--mm/page-writeback.c13
-rw-r--r--mm/page_alloc.c27
-rw-r--r--mm/page_io.c2
-rw-r--r--mm/slab.c71
-rw-r--r--mm/slob.c37
-rw-r--r--mm/slub.c83
-rw-r--r--mm/vmalloc.c8
-rw-r--r--scripts/Makefile.build13
-rw-r--r--scripts/bootgraph.pl4
-rw-r--r--scripts/markup_oops.pl161
-rw-r--r--scripts/mod/file2alias.c1
-rwxr-xr-xscripts/package/mkspec8
-rwxr-xr-xscripts/recordmcount.pl37
-rwxr-xr-xscripts/setlocalversion9
-rwxr-xr-xscripts/tags.sh12
-rw-r--r--virt/kvm/iommu.c6
-rw-r--r--virt/kvm/kvm_main.c43
253 files changed, 8602 insertions, 3001 deletions
diff --git a/CREDITS b/CREDITS
index 2b39168c06a..5e0736722af 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2166,7 +2166,6 @@ D: Initial implementation of VC's, pty's and select()
N: Pavel Machek
E: pavel@ucw.cz
-E: pavel@suse.cz
D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd
D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB,
D: work on suspend-to-ram/disk, killing duplicates from ioctl32
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644
index 00000000000..5e6a92a02d8
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -0,0 +1,71 @@
+What: /sys/kernel/debug/kmemtrace/
+Date: July 2008
+Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
+Description:
+
+In kmemtrace-enabled kernels, the following files are created:
+
+/sys/kernel/debug/kmemtrace/
+ cpu<n> (0400) Per-CPU tracing data, see below. (binary)
+ total_overruns (0400) Total number of bytes which were dropped from
+ cpu<n> files because of full buffer condition,
+ non-binary. (text)
+ abi_version (0400) Kernel's kmemtrace ABI version. (text)
+
+Each per-CPU file should be read according to the relay interface. That is,
+the reader should set affinity to that specific CPU and, as currently done by
+the userspace application (though there are other methods), use poll() with
+an infinite timeout before every read(). Otherwise, erroneous data may be
+read. The binary data has the following _core_ format:
+
+ Event ID (1 byte) Unsigned integer, one of:
+ 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
+ 1 - represents a freeing of previously allocated memory
+ (KMEMTRACE_EVENT_FREE)
+ Type ID (1 byte) Unsigned integer, one of:
+ 0 - this is a kmalloc() / kfree()
+ 1 - this is a kmem_cache_alloc() / kmem_cache_free()
+ 2 - this is a __get_free_pages() et al.
+ Event size (2 bytes) Unsigned integer representing the
+ size of this event. Used to extend
+ kmemtrace. Discard the bytes you
+ don't know about.
+ Sequence number (4 bytes) Signed integer used to reorder data
+ logged on SMP machines. Wraparound
+ must be taken into account, although
+ it is unlikely.
+ Caller address (8 bytes) Return address to the caller.
+ Pointer to mem (8 bytes) Pointer to target memory area. Can be
+ NULL, but not all such calls might be
+ recorded.
+
+In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
+
+ Requested bytes (8 bytes) Total number of requested bytes,
+ unsigned, must not be zero.
+ Allocated bytes (8 bytes) Total number of actually allocated
+ bytes, unsigned, must not be lower
+ than requested bytes.
+ Requested flags (4 bytes) GFP flags supplied by the caller.
+ Target CPU (4 bytes) Signed integer, valid for event id 1.
+ If equal to -1, target CPU is the same
+ as origin CPU, but the reverse might
+ not be true.
+
+The data is made available in the same endianness the machine has.
+
+Other event ids and type ids may be defined and added. Other fields may be
+added by increasing event size, but see below for details.
+Every modification to the ABI, including new id definitions, are followed
+by bumping the ABI version by one.
+
+Adding new data to the packet (features) is done at the end of the mandatory
+data:
+ Feature size (2 byte)
+ Feature ID (1 byte)
+ Feature data (Feature size - 3 bytes)
+
+
+Users:
+ kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
+
diff --git a/Documentation/ABI/testing/sysfs-firmware-memmap b/Documentation/ABI/testing/sysfs-firmware-memmap
index 0d99ee6ae02..eca0d65087d 100644
--- a/Documentation/ABI/testing/sysfs-firmware-memmap
+++ b/Documentation/ABI/testing/sysfs-firmware-memmap
@@ -1,6 +1,6 @@
What: /sys/firmware/memmap/
Date: June 2008
-Contact: Bernhard Walle <bwalle@suse.de>
+Contact: Bernhard Walle <bernhard.walle@gmx.de>
Description:
On all platforms, the firmware provides a memory map which the
kernel reads. The resources from that memory map are registered
diff --git a/Documentation/PCI/PCIEBUS-HOWTO.txt b/Documentation/PCI/PCIEBUS-HOWTO.txt
index 9a07e38631b..6bd5f372ade 100644
--- a/Documentation/PCI/PCIEBUS-HOWTO.txt
+++ b/Documentation/PCI/PCIEBUS-HOWTO.txt
@@ -93,7 +93,7 @@ the PCI Express Port Bus driver from loading a service driver.
int pcie_port_service_register(struct pcie_port_service_driver *new)
-This API replaces the Linux Driver Model's pci_module_init API. A
+This API replaces the Linux Driver Model's pci_register_driver API. A
service driver should always calls pcie_port_service_register at
module init. Note that after service driver being loaded, calls
such as pci_enable_device(dev) and pci_set_master(dev) are no longer
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index d9e5d6f41b9..93feb844448 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -252,10 +252,8 @@ cgroup file system directories.
When a task is moved from one cgroup to another, it gets a new
css_set pointer - if there's an already existing css_set with the
desired collection of cgroups then that group is reused, else a new
-css_set is allocated. Note that the current implementation uses a
-linear search to locate an appropriate existing css_set, so isn't
-very efficient. A future version will use a hash table for better
-performance.
+css_set is allocated. The appropriate existing css_set is located by
+looking into a hash table.
To allow access from a cgroup to the css_sets (and hence tasks)
that comprise it, a set of cg_cgroup_link objects form a lattice;
diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt
index 803b1318b13..758fb42a1b6 100644
--- a/Documentation/ftrace.txt
+++ b/Documentation/ftrace.txt
@@ -165,6 +165,8 @@ Here is the list of current tracers that may be configured.
nop - This is not a tracer. To remove all tracers from tracing
simply echo "nop" into current_tracer.
+ hw-branch-tracer - traces branches on all cpu's in a circular buffer.
+
Examples of using the tracer
----------------------------
@@ -1152,6 +1154,78 @@ int main (int argc, char **argv)
return 0;
}
+
+hw-branch-tracer (x86 only)
+---------------------------
+
+This tracer uses the x86 last branch tracing hardware feature to
+collect a branch trace on all cpus with relatively low overhead.
+
+The tracer uses a fixed-size circular buffer per cpu and only
+traces ring 0 branches. The trace file dumps that buffer in the
+following format:
+
+# tracer: hw-branch-tracer
+#
+# CPU# TO <- FROM
+ 0 scheduler_tick+0xb5/0x1bf <- task_tick_idle+0x5/0x6
+ 2 run_posix_cpu_timers+0x2b/0x72a <- run_posix_cpu_timers+0x25/0x72a
+ 0 scheduler_tick+0x139/0x1bf <- scheduler_tick+0xed/0x1bf
+ 0 scheduler_tick+0x17c/0x1bf <- scheduler_tick+0x148/0x1bf
+ 2 run_posix_cpu_timers+0x9e/0x72a <- run_posix_cpu_timers+0x5e/0x72a
+ 0 scheduler_tick+0x1b6/0x1bf <- scheduler_tick+0x1aa/0x1bf
+
+
+The tracer may be used to dump the trace for the oops'ing cpu on a
+kernel oops into the system log. To enable this, ftrace_dump_on_oops
+must be set. To set ftrace_dump_on_oops, one can either use the sysctl
+function or set it via the proc system interface.
+
+ sysctl kernel.ftrace_dump_on_oops=1
+
+or
+
+ echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
+
+
+Here's an example of such a dump after a null pointer dereference in a
+kernel module:
+
+[57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+[57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops]
+[57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0
+[57848.106019] Oops: 0002 [#1] SMP
+[57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus
+[57848.106019] Dumping ftrace buffer:
+[57848.106019] ---------------------------------
+[...]
+[57848.106019] 0 chrdev_open+0xe6/0x165 <- cdev_put+0x23/0x24
+[57848.106019] 0 chrdev_open+0x117/0x165 <- chrdev_open+0xfa/0x165
+[57848.106019] 0 chrdev_open+0x120/0x165 <- chrdev_open+0x11c/0x165
+[57848.106019] 0 chrdev_open+0x134/0x165 <- chrdev_open+0x12b/0x165
+[57848.106019] 0 open+0x0/0x14 [oops] <- chrdev_open+0x144/0x165
+[57848.106019] 0 page_fault+0x0/0x30 <- open+0x6/0x14 [oops]
+[57848.106019] 0 error_entry+0x0/0x5b <- page_fault+0x4/0x30
+[57848.106019] 0 error_kernelspace+0x0/0x31 <- error_entry+0x59/0x5b
+[57848.106019] 0 error_sti+0x0/0x1 <- error_kernelspace+0x2d/0x31
+[57848.106019] 0 page_fault+0x9/0x30 <- error_sti+0x0/0x1
+[57848.106019] 0 do_page_fault+0x0/0x881 <- page_fault+0x1a/0x30
+[...]
+[57848.106019] 0 do_page_fault+0x66b/0x881 <- is_prefetch+0x1ee/0x1f2
+[57848.106019] 0 do_page_fault+0x6e0/0x881 <- do_page_fault+0x67a/0x881
+[57848.106019] 0 oops_begin+0x0/0x96 <- do_page_fault+0x6e0/0x881
+[57848.106019] 0 trace_hw_branch_oops+0x0/0x2d <- oops_begin+0x9/0x96
+[...]
+[57848.106019] 0 ds_suspend_bts+0x2a/0xe3 <- ds_suspend_bts+0x1a/0xe3
+[57848.106019] ---------------------------------
+[57848.106019] CPU 0
+[57848.106019] Modules linked in: oops
+[57848.106019] Pid: 5542, comm: cat Tainted: G W 2.6.28 #23
+[57848.106019] RIP: 0010:[<ffffffffa0000006>] [<ffffffffa0000006>] open+0x6/0x14 [oops]
+[57848.106019] RSP: 0018:ffff880235457d48 EFLAGS: 00010246
+[...]
+
+
dynamic ftrace
--------------
diff --git a/Documentation/hwmon/hpfall.c b/Documentation/hwmon/hpfall.c
new file mode 100644
index 00000000000..bbea1ccfd46
--- /dev/null
+++ b/Documentation/hwmon/hpfall.c
@@ -0,0 +1,101 @@
+/* Disk protection for HP machines.
+ *
+ * Copyright 2008 Eric Piel
+ * Copyright 2009 Pavel Machek <pavel@suse.cz>
+ *
+ * GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <signal.h>
+
+void write_int(char *path, int i)
+{
+ char buf[1024];
+ int fd = open(path, O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+ sprintf(buf, "%d", i);
+ if (write(fd, buf, strlen(buf)) != strlen(buf)) {
+ perror("write");
+ exit(1);
+ }
+ close(fd);
+}
+
+void set_led(int on)
+{
+ write_int("/sys/class/leds/hp::hddprotect/brightness", on);
+}
+
+void protect(int seconds)
+{
+ write_int("/sys/block/sda/device/unload_heads", seconds*1000);
+}
+
+int on_ac(void)
+{
+// /sys/class/power_supply/AC0/online
+}
+
+int lid_open(void)
+{
+// /proc/acpi/button/lid/LID/state
+}
+
+void ignore_me(void)
+{
+ protect(0);
+ set_led(0);
+
+}
+
+int main(int argc, char* argv[])
+{
+ int fd, ret;
+
+ fd = open("/dev/freefall", O_RDONLY);
+ if (fd < 0) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ signal(SIGALRM, ignore_me);
+
+ for (;;) {
+ unsigned char count;
+
+ ret = read(fd, &count, sizeof(count));
+ alarm(0);
+ if ((ret == -1) && (errno == EINTR)) {
+ /* Alarm expired, time to unpark the heads */
+ continue;
+ }
+
+ if (ret != sizeof(count)) {
+ perror("read");
+ break;
+ }
+
+ protect(21);
+ set_led(1);
+ if (1 || on_ac() || lid_open()) {
+ alarm(2);
+ } else {
+ alarm(20);
+ }
+ }
+
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/Documentation/hwmon/lis3lv02d b/Documentation/hwmon/lis3lv02d
index 0fcfc4a7ccd..287f8c90265 100644
--- a/Documentation/hwmon/lis3lv02d
+++ b/Documentation/hwmon/lis3lv02d
@@ -33,6 +33,14 @@ rate - reports the sampling rate of the accelerometer device in HZ
This driver also provides an absolute input class device, allowing
the laptop to act as a pinball machine-esque joystick.
+Another feature of the driver is misc device called "freefall" that
+acts similar to /dev/rtc and reacts on free-fall interrupts received
+from the device. It supports blocking operations, poll/select and
+fasync operation modes. You must read 1 bytes from the device. The
+result is number of free-fall interrupts since the last successful
+read (or 255 if number of interrupts would not fit).
+
+
Axes orientation
----------------
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b182626739e..fc22e922342 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -49,6 +49,7 @@ parameter is applicable:
ISAPNP ISA PnP code is enabled.
ISDN Appropriate ISDN support is enabled.
JOY Appropriate joystick support is enabled.
+ KMEMTRACE kmemtrace is enabled.
LIBATA Libata driver is enabled
LP Printer support is enabled.
LOOP Loopback device support is enabled.
@@ -1045,6 +1046,15 @@ and is between 256 and 4096 characters. It is defined in the file
use the HighMem zone if it exists, and the Normal
zone if it does not.
+ kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
+ Controls whether kmemtrace is enabled
+ at boot-time.
+
+ kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
+ subbufs kmemtrace's relay channel has. Set this
+ higher than default (KMEMTRACE_N_SUBBUFS in code) if
+ you experience buffer overruns.
+
movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
is similar to kernelcore except it specifies the
amount of memory used for migratable allocations.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 9e592c718af..535aeb936db 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -113,6 +113,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'x' - Used by xmon interface on ppc/powerpc platforms.
+'z' - Dump the ftrace buffer
+
'0'-'9' - Sets the console log level, controlling which kernel messages
will be printed to your console. ('0', for example would make
it so that only emergency messages like PANICs or OOPSes would
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
new file mode 100644
index 00000000000..a956d9b7f94
--- /dev/null
+++ b/Documentation/vm/kmemtrace.txt
@@ -0,0 +1,126 @@
+ kmemtrace - Kernel Memory Tracer
+
+ by Eduard - Gabriel Munteanu
+ <eduard.munteanu@linux360.ro>
+
+I. Introduction
+===============
+
+kmemtrace helps kernel developers figure out two things:
+1) how different allocators (SLAB, SLUB etc.) perform
+2) how kernel code allocates memory and how much
+
+To do this, we trace every allocation and export information to the userspace
+through the relay interface. We export things such as the number of requested
+bytes, the number of bytes actually allocated (i.e. including internal
+fragmentation), whether this is a slab allocation or a plain kmalloc() and so
+on.
+
+The actual analysis is performed by a userspace tool (see section III for
+details on where to get it from). It logs the data exported by the kernel,
+processes it and (as of writing this) can provide the following information:
+- the total amount of memory allocated and fragmentation per call-site
+- the amount of memory allocated and fragmentation per allocation
+- total memory allocated and fragmentation in the collected dataset
+- number of cross-CPU allocation and frees (makes sense in NUMA environments)
+
+Moreover, it can potentially find inconsistent and erroneous behavior in
+kernel code, such as using slab free functions on kmalloc'ed memory or
+allocating less memory than requested (but not truly failed allocations).
+
+kmemtrace also makes provisions for tracing on some arch and analysing the
+data on another.
+
+II. Design and goals
+====================
+
+kmemtrace was designed to handle rather large amounts of data. Thus, it uses
+the relay interface to export whatever is logged to userspace, which then
+stores it. Analysis and reporting is done asynchronously, that is, after the
+data is collected and stored. By design, it allows one to log and analyse
+on different machines and different arches.
+
+As of writing this, the ABI is not considered stable, though it might not
+change much. However, no guarantees are made about compatibility yet. When
+deemed stable, the ABI should still allow easy extension while maintaining
+backward compatibility. This is described further in Documentation/ABI.
+
+Summary of design goals:
+ - allow logging and analysis to be done across different machines
+ - be fast and anticipate usage in high-load environments (*)
+ - be reasonably extensible
+ - make it possible for GNU/Linux distributions to have kmemtrace
+ included in their repositories
+
+(*) - one of the reasons Pekka Enberg's original userspace data analysis
+ tool's code was rewritten from Perl to C (although this is more than a
+ simple conversion)
+
+
+III. Quick usage guide
+======================
+
+1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
+CONFIG_KMEMTRACE).
+
+2) Get the userspace tool and build it:
+$ git-clone git://repo.or.cz/kmemtrace-user.git # current repository
+$ cd kmemtrace-user/
+$ ./autogen.sh
+$ ./configure
+$ make
+
+3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
+'single' runlevel (so that relay buffers don't fill up easily), and run
+kmemtrace:
+# '$' does not mean user, but root here.
+$ mount -t debugfs none /sys/kernel/debug
+$ mount -t proc none /proc
+$ cd path/to/kmemtrace-user/
+$ ./kmemtraced
+Wait a bit, then stop it with CTRL+C.
+$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
+ # overrun, should
+ # be zero.
+$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
+ check its correctness]
+$ ./kmemtrace-report
+
+Now you should have a nice and short summary of how the allocator performs.
+
+IV. FAQ and known issues
+========================
+
+Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
+this? Should I worry?
+A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
+large the number is. You can fix it by supplying a higher
+'kmemtrace.subbufs=N' kernel parameter.
+---
+
+Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
+A: This is a bug and should be reported. It can occur for a variety of
+reasons:
+ - possible bugs in relay code
+ - possible misuse of relay by kmemtrace
+ - timestamps being collected unorderly
+Or you may fix it yourself and send us a patch.
+---
+
+Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
+A: This is a known issue and I'm working on it. These might be true errors
+in kernel code, which may have inconsistent behavior (e.g. allocating memory
+with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
+out this behavior may work with SLAB, but may fail with other allocators.
+
+It may also be due to lack of tracing in some unusual allocator functions.
+
+We don't want bug reports regarding this issue yet.
+---
+
+V. See also
+===========
+
+Documentation/kernel-parameters.txt
+Documentation/ABI/testing/debugfs-kmemtrace
+
diff --git a/MAINTAINERS b/MAINTAINERS
index db65b4e6d13..4c2e406fab6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1905,10 +1905,10 @@ W: http://gigaset307x.sourceforge.net/
S: Maintained
HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER
-P: Robert Love
-M: rlove@rlove.org
-M: linux-kernel@vger.kernel.org
-W: http://www.kernel.org/pub/linux/kernel/people/rml/hdaps/
+P: Frank Seidel
+M: frank@f-seidel.de
+L: lm-sensors@lm-sensors.org
+W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/
S: Maintained
GSPCA FINEPIX SUBDRIVER
@@ -2001,7 +2001,7 @@ S: Maintained
HIBERNATION (aka Software Suspend, aka swsusp)
P: Pavel Machek
-M: pavel@suse.cz
+M: pavel@ucw.cz
P: Rafael J. Wysocki
M: rjw@sisk.pl
L: linux-pm@lists.linux-foundation.org
@@ -2616,6 +2616,12 @@ M: jason.wessel@windriver.com
L: kgdb-bugreport@lists.sourceforge.net
S: Maintained
+KMEMTRACE
+P: Eduard - Gabriel Munteanu
+M: eduard.munteanu@linux360.ro
+L: linux-kernel@vger.kernel.org
+S: Maintained
+
KPROBES
P: Ananth N Mavinakayanahalli
M: ananth@in.ibm.com
@@ -4172,7 +4178,7 @@ SUSPEND TO RAM
P: Len Brown
M: len.brown@intel.com
P: Pavel Machek
-M: pavel@suse.cz
+M: pavel@ucw.cz
P: Rafael J. Wysocki
M: rjw@sisk.pl
L: linux-pm@lists.linux-foundation.org
@@ -4924,11 +4930,11 @@ L: zd1211-devs@lists.sourceforge.net (subscribers-only)
S: Maintained
ZR36067 VIDEO FOR LINUX DRIVER
-P: Ronald Bultje
-M: rbultje@ronald.bitfreak.net
L: mjpeg-users@lists.sourceforge.net
+L: linux-media@vger.kernel.org
W: http://mjpeg.sourceforge.net/driver-zoran/
-S: Maintained
+T: Mercurial http://linuxtv.org/hg/v4l-dvb
+S: Odd Fixes
ZS DECSTATION Z85C30 SERIAL DRIVER
P: Maciej W. Rozycki
diff --git a/Makefile b/Makefile
index 22d758495ad..59cf6dacec0 100644
--- a/Makefile
+++ b/Makefile
@@ -389,6 +389,7 @@ PHONY += outputmakefile
# output directory.
outputmakefile:
ifneq ($(KBUILD_SRC),)
+ $(Q)ln -fsn $(srctree) source
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile \
$(srctree) $(objtree) $(VERSION) $(PATCHLEVEL)
endif
@@ -946,7 +947,6 @@ ifneq ($(KBUILD_SRC),)
mkdir -p include2; \
ln -fsn $(srctree)/include/asm-$(SRCARCH) include2/asm; \
fi
- ln -fsn $(srctree) source
endif
# prepare2 creates a makefile if using a separate output directory
diff --git a/arch/alpha/include/asm/hardirq.h b/arch/alpha/include/asm/hardirq.h
index d953e234daa..88971460fa6 100644
--- a/arch/alpha/include/asm/hardirq.h
+++ b/arch/alpha/include/asm/hardirq.h
@@ -14,17 +14,4 @@ typedef struct {
void ack_bad_irq(unsigned int irq);
-#define HARDIRQ_BITS 12
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially nestable IRQ sources in the system
- * to nest on a single CPU. On Alpha, interrupts are masked at the CPU
- * by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode)
- * so we really only have 8 nestable IRQs, but allow some overhead
- */
-#if (1 << HARDIRQ_BITS) < 16
-#error HARDIRQ_BITS is too low!
-#endif
-
#endif /* _ALPHA_HARDIRQ_H */
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index f238370c907..8d0097f1020 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -93,8 +93,8 @@ common_shutdown_1(void *generic_ptr)
if (cpuid != boot_cpuid) {
flags |= 0x00040000UL; /* "remain halted" */
*pflags = flags;
- cpu_clear(cpuid, cpu_present_map);
- cpu_clear(cpuid, cpu_possible_map);
+ set_cpu_present(cpuid, false);
+ set_cpu_possible(cpuid, false);
halt();
}
#endif
@@ -120,8 +120,8 @@ common_shutdown_1(void *generic_ptr)
#ifdef CONFIG_SMP
/* Wait for the secondaries to halt. */
- cpu_clear(boot_cpuid, cpu_present_map);
- cpu_clear(boot_cpuid, cpu_possible_map);
+ set_cpu_present(boot_cpuid, false);
+ set_cpu_possible(boot_cpuid, false);
while (cpus_weight(cpu_present_map))
barrier();
#endif
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 00f1dc3dfd5..b1fe5674c3a 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -120,12 +120,12 @@ void __cpuinit
smp_callin(void)
{
int cpuid = hard_smp_processor_id();
- cpumask_t mask = cpu_online_map;
- if (cpu_test_and_set(cpuid, mask)) {
+ if (cpu_online(cpuid)) {
printk("??, cpu 0x%x already present??\n", cpuid);
BUG();
}
+ set_cpu_online(cpuid, true);
/* Turn on machine checks. */
wrmces(7);
@@ -436,8 +436,8 @@ setup_smp(void)
((char *)cpubase + i*hwrpb->processor_size);
if ((cpu->flags & 0x1cc) == 0x1cc) {
smp_num_probed++;
- cpu_set(i, cpu_possible_map);
- cpu_set(i, cpu_present_map);
+ set_cpu_possible(i, true);
+ set_cpu_present(i, true);
cpu->pal_revision = boot_cpu_palrev;
}
@@ -470,8 +470,8 @@ smp_prepare_cpus(unsigned int max_cpus)
/* Nothing to do on a UP box, or when told not to. */
if (smp_num_probed == 1 || max_cpus == 0) {
- cpu_possible_map = cpumask_of_cpu(boot_cpuid);
- cpu_present_map = cpumask_of_cpu(boot_cpuid);
+ init_cpu_possible(cpumask_of(boot_cpuid));
+ init_cpu_present(cpumask_of(boot_cpuid));
printk(KERN_INFO "SMP mode deactivated.\n");
return;
}
diff --git a/arch/avr32/include/asm/hardirq.h b/arch/avr32/include/asm/hardirq.h
index 267354356f6..015bc75ea79 100644
--- a/arch/avr32/include/asm/hardirq.h
+++ b/arch/avr32/include/asm/hardirq.h
@@ -20,15 +20,4 @@ void ack_bad_irq(unsigned int irq);
#endif /* __ASSEMBLY__ */
-#define HARDIRQ_BITS 12
-
-/*
- * The hardirq mask has to be large enough to have
- * space for potentially all IRQ sources in the system
- * nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
#endif /* __ASM_AVR32_HARDIRQ_H */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6183aeccecf..8b6a8a554af 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -22,6 +22,9 @@ config IA64
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
+ select HAVE_FUNCTION_TRACER
select HAVE_DMA_ATTRS
select HAVE_KVM
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h
new file mode 100644
index 00000000000..d20db3c2a65
--- /dev/null
+++ b/arch/ia64/include/asm/ftrace.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_IA64_FTRACE_H
+#define _ASM_IA64_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE 32 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
+#define mcount _mcount
+
+#include <asm/kprobes.h>
+/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
+#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
+#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ /* second bundle, insn 2 */
+ return addr - 0x12;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_IA64_FTRACE_H */
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index 140e495b8e0..d514cd9edb4 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -20,16 +20,6 @@
#define local_softirq_pending() (local_cpu_data->softirq_pending)
-#define HARDIRQ_BITS 14
-
-/*
- * The hardirq mask has to be large enough to have space for potentially all IRQ sources
- * in the system nesting on a single CPU:
- */
-#if (1 << HARDIRQ_BITS) < NR_IRQS
-# error HARDIRQ_BITS is too low!
-#endif
-
extern void __iomem *ipi_base_addr;
void ack_bad_irq(unsigned int irq);
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index 68aa6da807c..bfa86b6af7c 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -25,6 +25,10 @@
#include <linux/ioctl.h>
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h
index 34efe88eb84..f2ca32069b3 100644
--- a/arch/ia64/include/asm/mmzone.h
+++ b/arch/ia64/include/asm/mmzone.h
@@ -31,10 +31,6 @@ static inline int pfn_to_nid(unsigned long pfn)
#endif
}
-#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-extern int early_pfn_to_nid(unsigned long pfn);
-#endif
-
#ifdef CONFIG_IA64_DIG /* DIG systems are small */
# define MAX_PHYSNODE_ID 8
# define NR_NODE_MEMBLKS (MAX_NUMNODES * 8)
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index c381ea95489..ab6e7ec0bba 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -2,6 +2,10 @@
# Makefile for the linux kernel.
#
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
extra-y := head.o init_task.o vmlinux.lds
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index e5341e2c117..7e3382b06d5 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -47,6 +47,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
+#include <asm/ftrace.h>
#include "minstate.h"
@@ -1404,6 +1405,105 @@ GLOBAL_ENTRY(unw_init_running)
br.ret.sptk.many rp
END(unw_init_running)
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+ br ftrace_stub
+END(_mcount)
+
+.here:
+ br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+ alloc out0 = ar.pfs, 8, 0, 4, 0
+ mov out3 = r0
+ ;;
+ mov out2 = b0
+ add r3 = 0x20, r3
+ mov out1 = r1;
+ br.call.sptk.many b0 = ftrace_patch_gp
+ //this might be called from module, so we must patch gp
+ftrace_patch_gp:
+ movl gp=__gp
+ mov b0 = r3
+ ;;
+.global ftrace_call;
+ftrace_call:
+{
+ .mlx
+ nop.m 0x0
+ movl r3 = .here;;
+}
+ alloc loc0 = ar.pfs, 4, 4, 2, 0
+ ;;
+ mov loc1 = b0
+ mov out0 = b0
+ mov loc2 = r8
+ mov loc3 = r15
+ ;;
+ adds out0 = -MCOUNT_INSN_SIZE, out0
+ mov out1 = in2
+ mov b6 = r3
+
+ br.call.sptk.many b0 = b6
+ ;;
+ mov ar.pfs = loc0
+ mov b0 = loc1
+ mov r8 = loc2
+ mov r15 = loc3
+ br ftrace_stub
+ ;;
+END(ftrace_caller)
+
+#else
+GLOBAL_ENTRY(_mcount)
+ movl r2 = ftrace_stub
+ movl r3 = ftrace_trace_function;;
+ ld8 r3 = [r3];;
+ ld8 r3 = [r3];;
+ cmp.eq p7,p0 = r2, r3
+(p7) br.sptk.many ftrace_stub
+ ;;
+
+ alloc loc0 = ar.pfs, 4, 4, 2, 0
+ ;;
+ mov loc1 = b0
+ mov out0 = b0
+ mov loc2 = r8
+ mov loc3 = r15
+ ;;
+ adds out0 = -MCOUNT_INSN_SIZE, out0
+ mov out1 = in2
+ mov b6 = r3
+
+ br.call.sptk.many b0 = b6
+ ;;
+ mov ar.pfs = loc0
+ mov b0 = loc1
+ mov r8 = loc2
+ mov r15 = loc3
+ br ftrace_stub
+ ;;
+END(_mcount)
+#endif
+
+GLOBAL_ENTRY(ftrace_stub)
+ mov r3 = b0
+ movl r2 = _mcount_ret_helper
+ ;;
+ mov b6 = r2
+ mov b7 = r3
+ br.ret.sptk.many b6
+
+_mcount_ret_helper:
+ mov b0 = r42
+ mov r1 = r41
+ mov ar.pfs = r40
+ br b7
+END(ftrace_stub)
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
.rodata
.align 8
.globl sys_call_table
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644
index 00000000000..7fc8c961b1f
--- /dev/null
+++ b/arch/ia64/kernel/ftrace.c
@@ -0,0 +1,206 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+ 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+ 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+ 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */
+ 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+ 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+ 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+ u64 dummy1, dummy2, dummy3;
+ u64 dummy4:64-41+13;
+ u64 imm20:20;
+ u64 dummy5:3;
+ u64 sign:1;
+ u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+ 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+ 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
+ 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+ 0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+ return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+ 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+ 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
+ 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+ 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+ 0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+ u64 dummy1, dummy2;
+ u64 dummy3:48;
+ u64 imm39_l:16;
+ u64 imm39_h:23;
+ u64 dummy4:13;
+ u64 imm20:20;
+ u64 dummy5:3;
+ u64 i:1;
+ u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ struct ftrace_call_insn *code = (void *)ftrace_call_code;
+ unsigned long offset = addr - (ip + 0x10);
+
+ code->imm39_l = offset >> 24;
+ code->imm39_h = offset >> 40;
+ code->imm20 = offset >> 4;
+ code->i = offset >> 63;
+ return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code, int do_check)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE];
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing. We do this by using the
+ * probe_kernel_* functions.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine, or before SMP starts.
+ */
+
+ if (!do_check)
+ goto skip_check;
+
+ /* read the text we want to modify */
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+
+skip_check:
+ /* replace the text with the new text */
+ if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+ return -EPERM;
+ flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+ return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+ unsigned long ip = rec->ip;
+
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+ if (rec->flags & FTRACE_FL_CONVERTED) {
+ struct ftrace_call_insn *call_insn, *tmp_call;
+
+ call_insn = (void *)ftrace_call_code;
+ tmp_call = (void *)replaced;
+ call_insn->imm39_l = tmp_call->imm39_l;
+ call_insn->imm39_h = tmp_call->imm39_h;
+ call_insn->imm20 = tmp_call->imm20;
+ call_insn->i = tmp_call->i;
+ if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+ return 0;
+ } else {
+ struct ftrace_orig_insn *call_insn, *tmp_call;
+
+ call_insn = (void *)ftrace_orig_code;
+ tmp_call = (void *)replaced;
+ call_insn->sign = tmp_call->sign;
+ call_insn->imm20 = tmp_call->imm20;
+ if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+ return 0;
+ }
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ int ret;
+ char *new;
+
+ ret = ftrace_make_nop_check(rec, addr);
+ if (ret)
+ return ret;
+ new = ftrace_nop_replace();
+ return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned char *old, *new;
+
+ old= ftrace_nop_replace();
+ new = ftrace_call_replace(ip, addr);
+ return ftrace_modify_code(ip, old, new, 1);
+}
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip;
+ unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+
+ if (func == ftrace_stub)
+ return 0;
+ ip = ((struct fnptr *)func)->ip;
+
+ ia64_patch_imm64(addr + 2, ip);
+
+ flush_icache_range(addr, addr + 16);
+ return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+ *(unsigned long *)data = 0;
+
+ return 0;
+}
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 6da1f20d737..2d311864e35 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -112,3 +112,9 @@ EXPORT_SYMBOL_GPL(esi_call_phys);
#endif
extern char ia64_ivt[];
EXPORT_SYMBOL(ia64_ivt);
+
+#include <asm/ftrace.h>
+#ifdef CONFIG_FUNCTION_TRACER
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 4e586f6110a..28f982045f2 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm)
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 552d0772420..230eae482f3 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -455,13 +455,18 @@ fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
if (!vmm_fpswa_interface)
return (fpswa_ret_t) {-1, 0, 0, 0};
- /*
- * Just let fpswa driver to use hardware fp registers.
- * No fp register is valid in memory.
- */
memset(&fp_state, 0, sizeof(fp_state_t));
/*
+ * compute fp_state. only FP registers f6 - f11 are used by the
+ * vmm, so set those bits in the mask and set the low volatile
+ * pointer to point to these registers.
+ */
+ fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */
+
+ fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+
+ /*
* unsigned long (*EFI_FPSWA) (
* unsigned long trap_type,
* void *Bundle,
@@ -545,10 +550,6 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
status = vmm_handle_fpu_swa(0, regs, isr);
if (!status)
return ;
- else if (-EAGAIN == status) {
- vcpu_decrement_iip(vcpu);
- return ;
- }
break;
}
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index b73bf1838e5..3efea7d0a35 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -58,7 +58,7 @@ paddr_to_nid(unsigned long paddr)
* SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
* the section resides.
*/
-int early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn)
{
int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
@@ -70,7 +70,7 @@ int early_pfn_to_nid(unsigned long pfn)
return node_memblk[i].nid;
}
- return 0;
+ return -1;
}
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/powerpc/include/asm/pgtable-4k.h b/arch/powerpc/include/asm/pgtable-4k.h
index 6b18ba9d2d8..1dbca4e7de6 100644
--- a/arch/powerpc/include/asm/pgtable-4k.h
+++ b/arch/powerpc/include/asm/pgtable-4k.h
@@ -60,7 +60,7 @@
/* It should be preserving the high 48 bits and then specifically */
/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
- _PAGE_HPTEFLAGS)
+ _PAGE_HPTEFLAGS | _PAGE_SPECIAL)
/* Bits to mask out from a PMD to get to the PTE page */
#define PMD_MASKED_BITS 0
diff --git a/arch/powerpc/include/asm/pgtable-64k.h b/arch/powerpc/include/asm/pgtable-64k.h
index 07b0d8f09cb..7389003349a 100644
--- a/arch/powerpc/include/asm/pgtable-64k.h
+++ b/arch/powerpc/include/asm/pgtable-64k.h
@@ -114,7 +114,7 @@ static inline struct subpage_prot_table *pgd_subpage_prot(pgd_t *pgd)
* pgprot changes
*/
#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
- _PAGE_ACCESSED)
+ _PAGE_ACCESSED | _PAGE_SPECIAL)
/* Bits to mask out from a PMD to get to the PTE page */
#define PMD_MASKED_BITS 0x1ff
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index f69a4d97772..820b5f0a35c 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -429,7 +429,8 @@ extern int icache_44x_need_flush;
#define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE()
#endif
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
+ _PAGE_SPECIAL)
#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 5af4e9b2dbe..ada06924a42 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -646,11 +646,16 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
unsigned int areg, struct pt_regs *regs,
unsigned int flags, unsigned int length)
{
- char *ptr = (char *) &current->thread.TS_FPR(reg);
+ char *ptr;
int ret = 0;
flush_vsx_to_thread(current);
+ if (reg < 32)
+ ptr = (char *) &current->thread.TS_FPR(reg);
+ else
+ ptr = (char *) &current->thread.vr[reg - 32];
+
if (flags & ST)
ret = __copy_to_user(addr, ptr, length);
else {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2822c8ccfaa..5f81256287f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm)
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvmppc_free_vcpus(kvm);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 7393bd76d69..5ac08b8ab65 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -19,6 +19,7 @@
#include <linux/notifier.h>
#include <linux/lmb.h>
#include <linux/of.h>
+#include <linux/pfn.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/system.h>
@@ -882,7 +883,7 @@ static void mark_reserved_regions_for_nid(int nid)
unsigned long physbase = lmb.reserved.region[i].base;
unsigned long size = lmb.reserved.region[i].size;
unsigned long start_pfn = physbase >> PAGE_SHIFT;
- unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT);
+ unsigned long end_pfn = PFN_UP(physbase + size);
struct node_active_region node_ar;
unsigned long node_end_pfn = node->node_start_pfn +
node->node_spanned_pages;
@@ -908,7 +909,7 @@ static void mark_reserved_regions_for_nid(int nid)
*/
if (end_pfn > node_ar.end_pfn)
reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
- - (start_pfn << PAGE_SHIFT);
+ - physbase;
/*
* Only worry about *this* node, others may not
* yet have valid NODE_DATA().
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 67de6bf3db3..d281cc0bca7 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -328,7 +328,7 @@ static int __init ps3_mm_add_memory(void)
return result;
}
-core_initcall(ps3_mm_add_memory);
+device_initcall(ps3_mm_add_memory);
/*============================================================================*/
/* dma routines */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index be8497186b9..0d33893e1e8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9c39095b33f..8fc9a847cf4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index d2e3bf3608a..886c9402ec4 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,13 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_PIT
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_DEVICE_ASSIGNMENT
+#define __KVM_HAVE_MSI
+#define __KVM_HAVE_USER_NMI
+
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 07f1af494ca..105fb90a063 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -32,8 +32,6 @@ static inline void get_memcfg_numa(void)
get_memcfg_numa_flat();
}
-extern int early_pfn_to_nid(unsigned long pfn);
-
extern void resume_map_numa_kva(pgd_t *pgd);
#else /* !CONFIG_NUMA */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index a5b3817d4b9..a29f48c2a32 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -40,8 +40,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
NODE_DATA(nid)->node_spanned_pages)
-extern int early_pfn_to_nid(unsigned long pfn);
-
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index e9873a2e869..776579119a0 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t;
typedef struct { pgprotval_t pgprot; } pgprot_t;
extern int page_is_ram(unsigned long pagenr);
-extern int pagerange_is_ram(unsigned long start, unsigned long end);
extern int devmem_is_allowed(unsigned long pagenr);
extern void map_devmem(unsigned long pfn, unsigned long size,
pgprot_t vma_prot);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c09a1412758..e299287e8e3 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void)
PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
}
-static inline void arch_flush_lazy_cpu_mode(void)
-{
- if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
- arch_leave_lazy_cpu_mode();
- arch_enter_lazy_cpu_mode();
- }
-}
-
+void arch_flush_lazy_cpu_mode(void);
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void)
@@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
}
-static inline void arch_flush_lazy_mmu_mode(void)
-{
- if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
- arch_leave_lazy_mmu_mode();
- arch_enter_lazy_mmu_mode();
- }
-}
+void arch_flush_lazy_mmu_mode(void);
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
unsigned long phys, pgprot_t flags)
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4b1c319d30c..c5d737cdb36 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
#include <linux/acpi.h>
#include <acpi/processor.h>
@@ -70,6 +70,8 @@ struct acpi_cpufreq_data {
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+DEFINE_TRACE(power_mark);
+
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index fb039cd345d..6428aa17b40 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1157,8 +1157,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
data->cpu = pol->cpu;
data->currpstate = HW_PSTATE_INVALID;
- rc = powernow_k8_cpu_init_acpi(data);
- if (rc) {
+ if (powernow_k8_cpu_init_acpi(data)) {
/*
* Use the PSB BIOS structure. This is only availabe on
* an UP version, and is deprecated by AMD.
@@ -1176,17 +1175,20 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
"ACPI maintainers and complain to your BIOS "
"vendor.\n");
#endif
- goto err_out;
+ kfree(data);
+ return -ENODEV;
}
if (pol->cpu != 0) {
printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
"CPU other than CPU0. Complain to your BIOS "
"vendor.\n");
- goto err_out;
+ kfree(data);
+ return -ENODEV;
}
rc = find_psb_table(data);
if (rc) {
- goto err_out;
+ kfree(data);
+ return -ENODEV;
}
/* Take a crude guess here.
* That guess was in microseconds, so multiply with 1000 */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6b1f6f6f866..077c9ea655f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -14,6 +14,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <linux/ftrace.h>
#include <asm/stacktrace.h>
@@ -195,6 +196,11 @@ unsigned __kprobes long oops_begin(void)
int cpu;
unsigned long flags;
+ /* notify the hw-branch tracer so it may disable tracing and
+ add the last trace to the trace buffer -
+ the earlier this happens, the more useful the trace. */
+ trace_hw_branch_oops();
+
oops_enter();
/* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 231bdd3c5b1..2f9c0c8cb4c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -82,7 +82,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* are the same as what exists.
*/
-static atomic_t in_nmi = ATOMIC_INIT(0);
+static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
@@ -115,8 +115,8 @@ static void ftrace_mod_code(void)
void ftrace_nmi_enter(void)
{
- atomic_inc(&in_nmi);
- /* Must have in_nmi seen before reading write flag */
+ atomic_inc(&nmi_running);
+ /* Must have nmi_running seen before reading write flag */
smp_mb();
if (mod_code_write) {
ftrace_mod_code();
@@ -126,22 +126,21 @@ void ftrace_nmi_enter(void)
void ftrace_nmi_exit(void)
{
- /* Finish all executions before clearing in_nmi */
+ /* Finish all executions before clearing nmi_running */
smp_wmb();
- atomic_dec(&in_nmi);
+ atomic_dec(&nmi_running);
}
static void wait_for_nmi(void)
{
- int waited = 0;
+ if (!atomic_read(&nmi_running))
+ return;
- while (atomic_read(&in_nmi)) {
- waited = 1;
+ do {
cpu_relax();
- }
+ } while (atomic_read(&nmi_running));
- if (waited)
- nmi_wait_count++;
+ nmi_wait_count++;
}
static int
@@ -368,25 +367,6 @@ int ftrace_disable_ftrace_graph_caller(void)
return ftrace_mod_jmp(ip, old_offset, new_offset);
}
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
-
-void ftrace_nmi_enter(void)
-{
- atomic_inc(&in_nmi);
-}
-
-void ftrace_nmi_exit(void)
-{
- atomic_dec(&in_nmi);
-}
-
#endif /* !CONFIG_DYNAMIC_FTRACE */
/* Add a function return address to the trace stack on thread info.*/
@@ -476,7 +456,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
&return_to_handler;
/* Nmi's are currently unsupported */
- if (unlikely(atomic_read(&in_nmi)))
+ if (unlikely(in_nmi()))
return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -512,13 +492,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
- if (unlikely(!__kernel_text_address(old))) {
- ftrace_graph_stop();
- *parent = old;
- WARN_ON(1);
- return;
- }
-
calltime = cpu_clock(raw_smp_processor_id());
if (push_return_trace(old, calltime,
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 388254f69a2..a00545fe5cd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned long) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
+ /* Make sure we use edge triggered interrupts */
+ cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 7a13fac63a1..4006c522adc 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -203,7 +203,7 @@ static void __init platform_detect(void)
static void __init platform_detect(void)
{
/* stopgap until OFW support is added to the kernel */
- olpc_platform_info.boardrev = 0xc2;
+ olpc_platform_info.boardrev = olpc_board(0xc2);
}
#endif
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb60887..c6520a4e85d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return __get_cpu_var(paravirt_lazy_mode);
}
+void arch_flush_lazy_mmu_mode(void)
+{
+ preempt_disable();
+
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+ WARN_ON(preempt_count() == 1);
+ arch_leave_lazy_mmu_mode();
+ arch_enter_lazy_mmu_mode();
+ }
+
+ preempt_enable();
+}
+
+void arch_flush_lazy_cpu_mode(void)
+{
+ preempt_disable();
+
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+ WARN_ON(preempt_count() == 1);
+ arch_leave_lazy_cpu_mode();
+ arch_enter_lazy_cpu_mode();
+ }
+
+ preempt_enable();
+}
+
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d12f7e37f8..23b328edc2b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
#include <asm/system.h>
#include <asm/apic.h>
@@ -19,6 +19,9 @@ EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
+DEFINE_TRACE(power_start);
+DEFINE_TRACE(power_end);
+
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb..5a4c23d8989 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
static void ptrace_bts_detach(struct task_struct *child)
{
- if (unlikely(child->bts)) {
- ds_release_bts(child->bts);
- child->bts = NULL;
-
- ptrace_bts_free_buffer(child);
- }
+ /*
+ * Ptrace_detach() races with ptrace_untrace() in case
+ * the child dies and is reaped by another thread.
+ *
+ * We only do the memory accounting at this point and
+ * leave the buffer deallocation and the bts tracer
+ * release to ptrace_bts_untrace() which will be called
+ * later on with tasklist_lock held.
+ */
+ release_locked_buffer(child->bts_buffer, child->bts_size);
}
#else
static inline void ptrace_bts_fork(struct task_struct *tsk) {}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7932338d7cb..a9e7548e179 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
local_irq_enable();
}
+static inline void conditional_cli(struct pt_regs *regs)
+{
+ if (regs->flags & X86_EFLAGS_IF)
+ local_irq_disable();
+}
+
static inline void preempt_conditional_cli(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
@@ -626,8 +632,10 @@ clear_dr7:
#ifdef CONFIG_X86_32
debug_vm86:
+ /* reenable preemption: handle_vm86_trap() might sleep */
+ dec_preempt_count();
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
- preempt_conditional_cli(regs);
+ conditional_cli(regs);
return;
#endif
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bde..c7da3683f4c 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -55,7 +55,8 @@ config KVM_AMD
config KVM_TRACE
bool "KVM trace support"
- depends on KVM && MARKERS && SYSFS
+ depends on KVM && SYSFS
+ select MARKERS
select RELAY
select DEBUG_FS
default n
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index e665d1c623c..72bd275a9b5 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
hrtimer_add_expires_ns(&pt->timer, pt->period);
pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
if (pt->period)
- ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
+ ps->channels[0].count_load_time = ktime_get();
return (pt->period == 0 ? 0 : 1);
}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index c019b8edcdb..cf17ed52f6f 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
- kvm_apic_timer_intr_post(vcpu, vec);
- /* TODO: PIT, RTC etc. */
-}
-EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
-
void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
__kvm_migrate_apic_timer(vcpu);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2bf32a03cee..82579ee538d 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s);
-void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c0815..f0b67f2cdd6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -35,6 +35,12 @@
#include "kvm_cache_regs.h"
#include "irq.h"
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
#define PRId64 "d"
#define PRIx64 "llx"
#define PRIu64 "u"
@@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
- u64 counter_passed;
- ktime_t passed, now;
+ ktime_t remaining;
+ s64 ns;
u32 tmcct;
ASSERT(apic != NULL);
- now = apic->timer.dev.base->get_time();
- tmcct = apic_get_reg(apic, APIC_TMICT);
-
/* if initial count is 0, current count should also be 0 */
- if (tmcct == 0)
+ if (apic_get_reg(apic, APIC_TMICT) == 0)
return 0;
- if (unlikely(ktime_to_ns(now) <=
- ktime_to_ns(apic->timer.last_update))) {
- /* Wrap around */
- passed = ktime_add(( {
- (ktime_t) {
- .tv64 = KTIME_MAX -
- (apic->timer.last_update).tv64}; }
- ), now);
- apic_debug("time elapsed\n");
- } else
- passed = ktime_sub(now, apic->timer.last_update);
-
- counter_passed = div64_u64(ktime_to_ns(passed),
- (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
-
- if (counter_passed > tmcct) {
- if (unlikely(!apic_lvtt_period(apic))) {
- /* one-shot timers stick at 0 until reset */
- tmcct = 0;
- } else {
- /*
- * periodic timers reset to APIC_TMICT when they
- * hit 0. The while loop simulates this happening N
- * times. (counter_passed %= tmcct) would also work,
- * but might be slower or not work on 32-bit??
- */
- while (counter_passed > tmcct)
- counter_passed -= tmcct;
- tmcct -= counter_passed;
- }
- } else {
- tmcct -= counter_passed;
- }
+ remaining = hrtimer_expires_remaining(&apic->timer.dev);
+ if (ktime_to_ns(remaining) < 0)
+ remaining = ktime_set(0, 0);
+
+ ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
+ tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
return tmcct;
}
@@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now = apic->timer.dev.base->get_time();
- apic->timer.last_update = now;
-
apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->timer.divide_count;
atomic_set(&apic->timer.pending, 0);
@@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
}
}
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
- apic->timer.last_update = ktime_add_ns(
- apic->timer.last_update,
- apic->timer.period);
-}
-
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
{
int vector = kvm_apic_has_interrupt(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 81858881287..45ab6ee7120 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -12,7 +12,6 @@ struct kvm_lapic {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
- ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
-void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83f11c7474a..2d4477c7147 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
if (mt_mask) {
- mt_mask = get_memory_type(vcpu, gfn) <<
- kvm_x86_ops->get_mt_mask_shift();
+ if (!kvm_is_mmio_pfn(pfn)) {
+ mt_mask = get_memory_type(vcpu, gfn) <<
+ kvm_x86_ops->get_mt_mask_shift();
+ mt_mask |= VMX_EPT_IGMT_BIT;
+ } else
+ mt_mask = MTRR_TYPE_UNCACHABLE <<
+ kvm_x86_ops->get_mt_mask_shift();
spte |= mt_mask;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1452851ae25..a9e769e4e25 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
/* Okay, we can deliver the interrupt: grab it and update PIC state. */
intr_vector = kvm_cpu_get_interrupt(vcpu);
svm_inject_irq(svm, intr_vector);
- kvm_timer_intr_post(vcpu, intr_vector);
out:
update_cr8_intercept(vcpu);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6259d746764..7611af57682 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
default:
+ vmx_load_host_state(to_vmx(vcpu));
msr = find_msr_entry(to_vmx(vcpu), msr_index);
if (msr) {
data = msr->data;
@@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
}
if (vcpu->arch.interrupt.pending) {
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
- kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
if (kvm_cpu_has_interrupt(vcpu))
enable_irq_window(vcpu);
}
@@ -3687,8 +3687,7 @@ static int __init vmx_init(void)
if (vm_need_ept()) {
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
- VMX_EPT_WRITABLE_MASK |
- VMX_EPT_IGMT_BIT);
+ VMX_EPT_WRITABLE_MASK);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK,
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc17546a240..758b7a155ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
- case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
@@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOMMU:
r = iommu_found();
break;
+ case KVM_CAP_CLOCKSOURCE:
+ r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
+ break;
default:
r = 0;
break;
@@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
}
-void kvm_arch_destroy_vm(struct kvm *kvm)
+void kvm_arch_sync_events(struct kvm *kvm)
{
kvm_free_all_assigned_devices(kvm);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
kvm_iommu_unmap_guest(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index af750ab973b..f45d5e29a72 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
return 0;
}
-int pagerange_is_ram(unsigned long start, unsigned long end)
-{
- int ram_page = 0, not_rampage = 0;
- unsigned long page_nr;
-
- for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
- ++page_nr) {
- if (page_is_ram(page_nr))
- ram_page = 1;
- else
- not_rampage = 1;
-
- if (ram_page == not_rampage)
- return -1;
- }
-
- return ram_page;
-}
-
/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89..f3516da035d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -145,7 +145,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
return shift;
}
-int early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn)
{
return phys_to_nid(pfn << PAGE_SHIFT);
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 84ba74820ad..8ca0d8566fc 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
address = cpa->vaddr[cpa->curpage];
else
address = *cpa->vaddr;
-
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
@@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
vm_unmap_aliases();
+ /*
+ * If we're called with lazy mmu updates enabled, the
+ * in-memory pte state may be stale. Flush pending updates to
+ * bring them up to date.
+ */
+ arch_flush_lazy_mmu_mode();
+
cpa.vaddr = addr;
cpa.numpages = numpages;
cpa.mask_set = mask_set;
@@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
} else
cpa_flush_all(cache);
+ /*
+ * If we've been called with lazy mmu updates enabled, then
+ * make sure that everything gets flushed out before we
+ * return.
+ */
+ arch_flush_lazy_mmu_mode();
+
out:
return ret;
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7b61036427d..aebbf67a79d 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
static struct memtype *cached_entry;
static u64 cached_start;
+static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
+{
+ int ram_page = 0, not_rampage = 0;
+ unsigned long page_nr;
+
+ for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
+ ++page_nr) {
+ /*
+ * For legacy reasons, physical address range in the legacy ISA
+ * region is tracked as non-RAM. This will allow users of
+ * /dev/mem to map portions of legacy ISA region, even when
+ * some of those portions are listed(or not even listed) with
+ * different e820 types(RAM/reserved/..)
+ */
+ if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
+ page_is_ram(page_nr))
+ ram_page = 1;
+ else
+ not_rampage = 1;
+
+ if (ram_page == not_rampage)
+ return -1;
+ }
+
+ return ram_page;
+}
+
/*
* For RAM pages, mark the pages as non WB memory type using
* PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (new_type)
*new_type = actual_type;
- /*
- * For legacy reasons, some parts of the physical address range in the
- * legacy 1MB region is treated as non-RAM (even when listed as RAM in
- * the e820 tables). So we will track the memory attributes of this
- * legacy 1MB region using the linear memtype_list always.
- */
- if (end >= ISA_END_ADDRESS) {
- is_range_ram = pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return reserve_ram_pages_type(start, end, req_type,
- new_type);
- else if (is_range_ram < 0)
- return -EINVAL;
- }
+ is_range_ram = pat_pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return reserve_ram_pages_type(start, end, req_type,
+ new_type);
+ else if (is_range_ram < 0)
+ return -EINVAL;
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
@@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end)
if (is_ISA_range(start, end - 1))
return 0;
- /*
- * For legacy reasons, some parts of the physical address range in the
- * legacy 1MB region is treated as non-RAM (even when listed as RAM in
- * the e820 tables). So we will track the memory attributes of this
- * legacy 1MB region using the linear memtype_list always.
- */
- if (end >= ISA_END_ADDRESS) {
- is_range_ram = pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return free_ram_pages_type(start, end);
- else if (is_range_ram < 0)
- return -EINVAL;
- }
+ is_range_ram = pat_pagerange_is_ram(start, end);
+ if (is_range_ram == 1)
+ return free_ram_pages_type(start, end);
+ else if (is_range_ram < 0)
+ return -EINVAL;
spin_lock(&memtype_lock);
list_for_each_entry(entry, &memtype_list, nd) {
@@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
unsigned long flags;
unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
- is_ram = pagerange_is_ram(paddr, paddr + size);
+ is_ram = pat_pagerange_is_ram(paddr, paddr + size);
- if (is_ram != 0) {
- /*
- * For mapping RAM pages, drivers need to call
- * set_memory_[uc|wc|wb] directly, for reserve and free, before
- * setting up the PTE.
- */
- WARN_ON_ONCE(1);
- return 0;
- }
+ /*
+ * reserve_pfn_range() doesn't support RAM pages.
+ */
+ if (is_ram != 0)
+ return -EINVAL;
ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
if (ret)
@@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
{
int is_ram;
- is_ram = pagerange_is_ram(paddr, paddr + size);
+ is_ram = pat_pagerange_is_ram(paddr, paddr + size);
if (is_ram == 0)
free_memtype(paddr, paddr + size);
}
diff --git a/block/Kconfig b/block/Kconfig
index 0cbb3b88b59..e7d12782bcf 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -44,22 +44,6 @@ config LBD
If unsure, say N.
-config BLK_DEV_IO_TRACE
- bool "Support for tracing block io actions"
- depends on SYSFS
- select RELAY
- select DEBUG_FS
- select TRACEPOINTS
- help
- Say Y here if you want to be able to trace the block layer actions
- on a given queue. Tracing allows you to see any traffic happening
- on a block device queue. For more information (and the userspace
- support tools needed), fetch the blktrace tools from:
-
- git://git.kernel.dk/blktrace.git
-
- If unsure, say N.
-
config BLK_DEV_BSG
bool "Block layer SG support v4 (EXPERIMENTAL)"
depends on EXPERIMENTAL
diff --git a/block/Makefile b/block/Makefile
index bfe73049f93..e9fa4dd690f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -13,6 +13,5 @@ obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
-obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index a09535377a9..bbbdc4b8ccf 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -209,12 +209,19 @@ void blk_abort_queue(struct request_queue *q)
{
unsigned long flags;
struct request *rq, *tmp;
+ LIST_HEAD(list);
spin_lock_irqsave(q->queue_lock, flags);
elv_abort_queue(q);
- list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
+ /*
+ * Splice entries to local list, to avoid deadlocking if entries
+ * get readded to the timeout list by error handling
+ */
+ list_splice_init(&q->timeout_list, &list);
+
+ list_for_each_entry_safe(rq, tmp, &list, timeout_list)
blk_abort_request(rq);
spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/bsg.c b/block/bsg.c
index d414bb5607e..0ce8806dd0c 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -244,7 +244,8 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw)
* map sg_io_v4 to a request.
*/
static struct request *
-bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
+bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
+ u8 *sense)
{
struct request_queue *q = bd->queue;
struct request *rq, *next_rq = NULL;
@@ -306,6 +307,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
if (ret)
goto out;
}
+
+ rq->sense = sense;
+ rq->sense_len = 0;
+
return rq;
out:
if (rq->cmd != rq->__cmd)
@@ -348,9 +353,6 @@ static void bsg_rq_end_io(struct request *rq, int uptodate)
static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
struct bsg_command *bc, struct request *rq)
{
- rq->sense = bc->sense;
- rq->sense_len = 0;
-
/*
* add bc command to busy queue and submit rq for io
*/
@@ -419,7 +421,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
{
int ret = 0;
- dprintk("rq %p bio %p %u\n", rq, bio, rq->errors);
+ dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors);
/*
* fill in all the output members
*/
@@ -635,7 +637,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
/*
* get a request, fill in the blanks, and add to request queue
*/
- rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm);
+ rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm, bc->sense);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
rq = NULL;
@@ -922,11 +924,12 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct request *rq;
struct bio *bio, *bidi_bio = NULL;
struct sg_io_v4 hdr;
+ u8 sense[SCSI_SENSE_BUFFERSIZE];
if (copy_from_user(&hdr, uarg, sizeof(hdr)))
return -EFAULT;
- rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE);
+ rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE, sense);
if (IS_ERR(rq))
return PTR_ERR(rq);
diff --git a/block/genhd.c b/block/genhd.c
index 397960cf26a..e1eadcc9546 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1087,6 +1087,14 @@ dev_t blk_lookup_devt(const char *name, int partno)
if (strcmp(dev_name(dev), name))
continue;
+ if (partno < disk->minors) {
+ /* We need to return the right devno, even
+ * if the partition doesn't exist yet.
+ */
+ devt = MKDEV(MAJOR(dev->devt),
+ MINOR(dev->devt) + partno);
+ break;
+ }
part = disk_get_part(disk, partno);
if (part) {
devt = part_devt(part);
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 8ef664e3bcd..358f80be2bf 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -45,7 +45,13 @@ struct priv {
static inline void setbit128_bbe(void *b, int bit)
{
- __set_bit(bit ^ 0x78, b);
+ __set_bit(bit ^ (0x80 -
+#ifdef __BIG_ENDIAN
+ BITS_PER_LONG
+#else
+ BITS_PER_BYTE
+#endif
+ ), b);
}
static int setkey(struct crypto_tfm *parent, const u8 *key,
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 0b299b0f817..714cb046b59 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -773,18 +773,32 @@ unsigned int ata_sff_data_xfer32(struct ata_device *dev, unsigned char *buf,
else
iowrite32_rep(data_addr, buf, words);
+ /* Transfer trailing bytes, if any */
if (unlikely(slop)) {
- __le32 pad;
+ unsigned char pad[4];
+
+ /* Point buf to the tail of buffer */
+ buf += buflen - slop;
+
+ /*
+ * Use io*_rep() accessors here as well to avoid pointlessly
+ * swapping bytes to and fro on the big endian machines...
+ */
if (rw == READ) {
- pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr));
- memcpy(buf + buflen - slop, &pad, slop);
+ if (slop < 3)
+ ioread16_rep(data_addr, pad, 1);
+ else
+ ioread32_rep(data_addr, pad, 1);
+ memcpy(buf, pad, slop);
} else {
- memcpy(&pad, buf + buflen - slop, slop);
- iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr);
+ memcpy(pad, buf, slop);
+ if (slop < 3)
+ iowrite16_rep(data_addr, pad, 1);
+ else
+ iowrite32_rep(data_addr, pad, 1);
}
- words++;
}
- return words << 2;
+ return (buflen + 1) & ~1;
}
EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 79a6c9a0b72..ba556d3e696 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -110,7 +110,8 @@ static const struct via_isa_bridge {
{ "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
{ "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
{ "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA },
- { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES},
+ { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES },
+ { "vt6415", PCI_DEVICE_ID_VIA_6415, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES },
{ "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
{ "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
{ "vt8235", PCI_DEVICE_ID_VIA_8235, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
@@ -593,6 +594,7 @@ static int via_reinit_one(struct pci_dev *pdev)
#endif
static const struct pci_device_id via[] = {
+ { PCI_VDEVICE(VIA, 0x0415), },
{ PCI_VDEVICE(VIA, 0x0571), },
{ PCI_VDEVICE(VIA, 0x0581), },
{ PCI_VDEVICE(VIA, 0x1571), },
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 444af0415ca..55a8eed3f3a 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -421,19 +421,21 @@ static struct ata_port_operations nv_generic_ops = {
.hardreset = ATA_OP_NULL,
};
-/* OSDL bz3352 reports that nf2/3 controllers can't determine device
- * signature reliably. Also, the following thread reports detection
- * failure on cold boot with the standard debouncing timing.
+/* nf2 is ripe with hardreset related problems.
+ *
+ * kernel bz#3352 reports nf2/3 controllers can't determine device
+ * signature reliably. The following thread reports detection failure
+ * on cold boot with the standard debouncing timing.
*
* http://thread.gmane.org/gmane.linux.ide/34098
*
- * Debounce with hotplug timing and request follow-up SRST.
+ * And bz#12176 reports that hardreset simply doesn't work on nf2.
+ * Give up on it and just don't do hardreset.
*/
static struct ata_port_operations nv_nf2_ops = {
- .inherits = &nv_common_ops,
+ .inherits = &nv_generic_ops,
.freeze = nv_nf2_freeze,
.thaw = nv_nf2_thaw,
- .hardreset = nv_noclassify_hardreset,
};
/* For initial probing after boot and hot plugging, hardreset mostly
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index c237527b1aa..5e41e6dd657 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -18,6 +18,7 @@
enum {
AOECMD_ATA,
AOECMD_CFG,
+ AOECMD_VEND_MIN = 0xf0,
AOEFL_RSP = (1<<3),
AOEFL_ERR = (1<<2),
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index 30de5b1c647..c6099ba9a4b 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -142,6 +142,8 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
aoecmd_cfg_rsp(skb);
break;
default:
+ if (h->cmd >= AOECMD_VEND_MIN)
+ break; /* don't complain about vendor commands */
printk(KERN_INFO "aoe: unknown cmd %d\n", h->cmd);
}
exit:
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 01e69383d9c..d2cb67b6117 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3390,6 +3390,203 @@ static void free_hba(int i)
kfree(p);
}
+/* Send a message CDB to the firmware. */
+static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
+{
+ typedef struct {
+ CommandListHeader_struct CommandHeader;
+ RequestBlock_struct Request;
+ ErrDescriptor_struct ErrorDescriptor;
+ } Command;
+ static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
+ Command *cmd;
+ dma_addr_t paddr64;
+ uint32_t paddr32, tag;
+ void __iomem *vaddr;
+ int i, err;
+
+ vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+ if (vaddr == NULL)
+ return -ENOMEM;
+
+ /* The Inbound Post Queue only accepts 32-bit physical addresses for the
+ CCISS commands, so they must be allocated from the lower 4GiB of
+ memory. */
+ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ iounmap(vaddr);
+ return -ENOMEM;
+ }
+
+ cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
+ if (cmd == NULL) {
+ iounmap(vaddr);
+ return -ENOMEM;
+ }
+
+ /* This must fit, because of the 32-bit consistent DMA mask. Also,
+ although there's no guarantee, we assume that the address is at
+ least 4-byte aligned (most likely, it's page-aligned). */
+ paddr32 = paddr64;
+
+ cmd->CommandHeader.ReplyQueue = 0;
+ cmd->CommandHeader.SGList = 0;
+ cmd->CommandHeader.SGTotal = 0;
+ cmd->CommandHeader.Tag.lower = paddr32;
+ cmd->CommandHeader.Tag.upper = 0;
+ memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
+
+ cmd->Request.CDBLen = 16;
+ cmd->Request.Type.Type = TYPE_MSG;
+ cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
+ cmd->Request.Type.Direction = XFER_NONE;
+ cmd->Request.Timeout = 0; /* Don't time out */
+ cmd->Request.CDB[0] = opcode;
+ cmd->Request.CDB[1] = type;
+ memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
+
+ cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
+ cmd->ErrorDescriptor.Addr.upper = 0;
+ cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
+
+ writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
+
+ for (i = 0; i < 10; i++) {
+ tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
+ if ((tag & ~3) == paddr32)
+ break;
+ schedule_timeout_uninterruptible(HZ);
+ }
+
+ iounmap(vaddr);
+
+ /* we leak the DMA buffer here ... no choice since the controller could
+ still complete the command. */
+ if (i == 10) {
+ printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
+ opcode, type);
+ return -ETIMEDOUT;
+ }
+
+ pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
+
+ if (tag & 2) {
+ printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
+ opcode, type);
+ return -EIO;
+ }
+
+ printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
+ opcode, type);
+ return 0;
+}
+
+#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
+#define cciss_noop(p) cciss_message(p, 3, 0)
+
+static __devinit int cciss_reset_msi(struct pci_dev *pdev)
+{
+/* the #defines are stolen from drivers/pci/msi.h. */
+#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
+#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
+
+ int pos;
+ u16 control = 0;
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ if (pos) {
+ pci_read_config_word(pdev, msi_control_reg(pos), &control);
+ if (control & PCI_MSI_FLAGS_ENABLE) {
+ printk(KERN_INFO "cciss: resetting MSI\n");
+ pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
+ }
+ }
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+ if (pos) {
+ pci_read_config_word(pdev, msi_control_reg(pos), &control);
+ if (control & PCI_MSIX_FLAGS_ENABLE) {
+ printk(KERN_INFO "cciss: resetting MSI-X\n");
+ pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
+ }
+ }
+
+ return 0;
+}
+
+/* This does a hard reset of the controller using PCI power management
+ * states. */
+static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
+{
+ u16 pmcsr, saved_config_space[32];
+ int i, pos;
+
+ printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
+
+ /* This is very nearly the same thing as
+
+ pci_save_state(pci_dev);
+ pci_set_power_state(pci_dev, PCI_D3hot);
+ pci_set_power_state(pci_dev, PCI_D0);
+ pci_restore_state(pci_dev);
+
+ but we can't use these nice canned kernel routines on
+ kexec, because they also check the MSI/MSI-X state in PCI
+ configuration space and do the wrong thing when it is
+ set/cleared. Also, the pci_save/restore_state functions
+ violate the ordering requirements for restoring the
+ configuration space from the CCISS document (see the
+ comment below). So we roll our own .... */
+
+ for (i = 0; i < 32; i++)
+ pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+ if (pos == 0) {
+ printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
+ return -ENODEV;
+ }
+
+ /* Quoting from the Open CISS Specification: "The Power
+ * Management Control/Status Register (CSR) controls the power
+ * state of the device. The normal operating state is D0,
+ * CSR=00h. The software off state is D3, CSR=03h. To reset
+ * the controller, place the interface device in D3 then to
+ * D0, this causes a secondary PCI reset which will reset the
+ * controller." */
+
+ /* enter the D3hot power management state */
+ pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+ pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+ pmcsr |= PCI_D3hot;
+ pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+ schedule_timeout_uninterruptible(HZ >> 1);
+
+ /* enter the D0 power management state */
+ pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+ pmcsr |= PCI_D0;
+ pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+ schedule_timeout_uninterruptible(HZ >> 1);
+
+ /* Restore the PCI configuration space. The Open CISS
+ * Specification says, "Restore the PCI Configuration
+ * Registers, offsets 00h through 60h. It is important to
+ * restore the command register, 16-bits at offset 04h,
+ * last. Do not restore the configuration status register,
+ * 16-bits at offset 06h." Note that the offset is 2*i. */
+ for (i = 0; i < 32; i++) {
+ if (i == 2 || i == 3)
+ continue;
+ pci_write_config_word(pdev, 2*i, saved_config_space[i]);
+ }
+ wmb();
+ pci_write_config_word(pdev, 4, saved_config_space[2]);
+
+ return 0;
+}
+
/*
* This is it. Find all the controllers and register them. I really hate
* stealing all these major device numbers.
@@ -3404,6 +3601,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
int dac, return_code;
InquiryData_struct *inq_buff = NULL;
+ if (reset_devices) {
+ /* Reset the controller with a PCI power-cycle */
+ if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
+ return -ENODEV;
+
+ /* Some devices (notably the HP Smart Array 5i Controller)
+ need a little pause here */
+ schedule_timeout_uninterruptible(30*HZ);
+
+ /* Now try to get the controller to respond to a no-op */
+ for (i=0; i<12; i++) {
+ if (cciss_noop(pdev) == 0)
+ break;
+ else
+ printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : ""));
+ }
+ }
+
i = alloc_cciss_hba();
if (i < 0)
return -1;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index cf29cc4e6ab..83d8ed39433 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -558,6 +558,8 @@ static void process_fd_request(void);
static void recalibrate_floppy(void);
static void floppy_shutdown(unsigned long);
+static int floppy_request_regions(int);
+static void floppy_release_regions(int);
static int floppy_grab_irq_and_dma(void);
static void floppy_release_irq_and_dma(void);
@@ -4274,8 +4276,7 @@ static int __init floppy_init(void)
FDCS->rawcmd = 2;
if (user_reset_fdc(-1, FD_RESET_ALWAYS, 0)) {
/* free ioports reserved by floppy_grab_irq_and_dma() */
- release_region(FDCS->address + 2, 4);
- release_region(FDCS->address + 7, 1);
+ floppy_release_regions(fdc);
FDCS->address = -1;
FDCS->version = FDC_NONE;
continue;
@@ -4284,8 +4285,7 @@ static int __init floppy_init(void)
FDCS->version = get_fdc_version();
if (FDCS->version == FDC_NONE) {
/* free ioports reserved by floppy_grab_irq_and_dma() */
- release_region(FDCS->address + 2, 4);
- release_region(FDCS->address + 7, 1);
+ floppy_release_regions(fdc);
FDCS->address = -1;
continue;
}
@@ -4358,6 +4358,47 @@ out_put_disk:
static DEFINE_SPINLOCK(floppy_usage_lock);
+static const struct io_region {
+ int offset;
+ int size;
+} io_regions[] = {
+ { 2, 1 },
+ /* address + 3 is sometimes reserved by pnp bios for motherboard */
+ { 4, 2 },
+ /* address + 6 is reserved, and may be taken by IDE.
+ * Unfortunately, Adaptec doesn't know this :-(, */
+ { 7, 1 },
+};
+
+static void floppy_release_allocated_regions(int fdc, const struct io_region *p)
+{
+ while (p != io_regions) {
+ p--;
+ release_region(FDCS->address + p->offset, p->size);
+ }
+}
+
+#define ARRAY_END(X) (&((X)[ARRAY_SIZE(X)]))
+
+static int floppy_request_regions(int fdc)
+{
+ const struct io_region *p;
+
+ for (p = io_regions; p < ARRAY_END(io_regions); p++) {
+ if (!request_region(FDCS->address + p->offset, p->size, "floppy")) {
+ DPRINT("Floppy io-port 0x%04lx in use\n", FDCS->address + p->offset);
+ floppy_release_allocated_regions(fdc, p);
+ return -EBUSY;
+ }
+ }
+ return 0;
+}
+
+static void floppy_release_regions(int fdc)
+{
+ floppy_release_allocated_regions(fdc, ARRAY_END(io_regions));
+}
+
static int floppy_grab_irq_and_dma(void)
{
unsigned long flags;
@@ -4399,18 +4440,8 @@ static int floppy_grab_irq_and_dma(void)
for (fdc = 0; fdc < N_FDC; fdc++) {
if (FDCS->address != -1) {
- if (!request_region(FDCS->address + 2, 4, "floppy")) {
- DPRINT("Floppy io-port 0x%04lx in use\n",
- FDCS->address + 2);
- goto cleanup1;
- }
- if (!request_region(FDCS->address + 7, 1, "floppy DIR")) {
- DPRINT("Floppy io-port 0x%04lx in use\n",
- FDCS->address + 7);
- goto cleanup2;
- }
- /* address + 6 is reserved, and may be taken by IDE.
- * Unfortunately, Adaptec doesn't know this :-(, */
+ if (floppy_request_regions(fdc))
+ goto cleanup;
}
}
for (fdc = 0; fdc < N_FDC; fdc++) {
@@ -4432,15 +4463,11 @@ static int floppy_grab_irq_and_dma(void)
fdc = 0;
irqdma_allocated = 1;
return 0;
-cleanup2:
- release_region(FDCS->address + 2, 4);
-cleanup1:
+cleanup:
fd_free_irq();
fd_free_dma();
- while (--fdc >= 0) {
- release_region(FDCS->address + 2, 4);
- release_region(FDCS->address + 7, 1);
- }
+ while (--fdc >= 0)
+ floppy_release_regions(fdc);
spin_lock_irqsave(&floppy_usage_lock, flags);
usage_count--;
spin_unlock_irqrestore(&floppy_usage_lock, flags);
@@ -4501,10 +4528,8 @@ static void floppy_release_irq_and_dma(void)
#endif
old_fdc = fdc;
for (fdc = 0; fdc < N_FDC; fdc++)
- if (FDCS->address != -1) {
- release_region(FDCS->address + 2, 4);
- release_region(FDCS->address + 7, 1);
- }
+ if (FDCS->address != -1)
+ floppy_release_regions(fdc);
fdc = old_fdc;
}
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 9dfa2716300..c397b3ddba9 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -422,7 +422,7 @@ static void xs(char *buf, char *targ, int len)
for (k = 0; k < len; k++) {
char c = *buf++;
- if (c != ' ' || c != l)
+ if (c != ' ' && c != l)
l = *targ++ = c;
}
if (l == ' ')
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 33a9351c896..30659ce9bcf 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -283,7 +283,7 @@ static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
}
static struct sysrq_key_op sysrq_ftrace_dump_op = {
.handler = sysrq_ftrace_dump,
- .help_msg = "dumpZ-ftrace-buffer",
+ .help_msg = "dump-ftrace-buffer(Z)",
.action_msg = "Dump ftrace buffer",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index a58993011ed..280a9d263eb 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -518,6 +518,7 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
dma_chan_name(chan), err);
else
break;
+ chan->private = NULL;
chan = NULL;
}
}
@@ -536,6 +537,7 @@ void dma_release_channel(struct dma_chan *chan)
WARN_ONCE(chan->client_count != 1,
"chan reference count %d != 1\n", chan->client_count);
dma_chan_put(chan);
+ chan->private = NULL;
mutex_unlock(&dma_list_mutex);
}
EXPORT_SYMBOL_GPL(dma_release_channel);
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 6b702cc46b3..a97c07eef7e 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -560,7 +560,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
unsigned long flags)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
- struct dw_dma_slave *dws = dwc->dws;
+ struct dw_dma_slave *dws = chan->private;
struct dw_desc *prev;
struct dw_desc *first;
u32 ctllo;
@@ -790,7 +790,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
cfghi = DWC_CFGH_FIFO_MODE;
cfglo = 0;
- dws = dwc->dws;
+ dws = chan->private;
if (dws) {
/*
* We need controller-specific data to set up slave
@@ -866,7 +866,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
spin_lock_bh(&dwc->lock);
list_splice_init(&dwc->free_list, &list);
dwc->descs_allocated = 0;
- dwc->dws = NULL;
/* Disable interrupts */
channel_clear_bit(dw, MASK.XFER, dwc->mask);
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 00fdd187bb0..b252b202c5c 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -139,8 +139,6 @@ struct dw_dma_chan {
struct list_head queue;
struct list_head free_list;
- struct dw_dma_slave *dws;
-
unsigned int descs_allocated;
};
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 261b9aa3f24..05aa2d406ac 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -1,7 +1,7 @@
/*
* linux/drivers/firmware/memmap.c
* Copyright (C) 2008 SUSE LINUX Products GmbH
- * by Bernhard Walle <bwalle@suse.de>
+ * by Bernhard Walle <bernhard.walle@gmx.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License v2.0 as published by
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 4be3acbaaf9..3a22eb9be37 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -80,18 +80,17 @@ config DRM_I915
XFree86 4.4 and above. If unsure, build this and i830 as modules and
the X server will load the correct one.
-endchoice
-
config DRM_I915_KMS
bool "Enable modesetting on intel by default"
depends on DRM_I915
help
- Choose this option if you want kernel modesetting enabled by default,
- and you have a new enough userspace to support this. Running old
- userspaces with this enabled will cause pain. Note that this causes
- the driver to bind to PCI devices, which precludes loading things
- like intelfb.
+ Choose this option if you want kernel modesetting enabled by default,
+ and you have a new enough userspace to support this. Running old
+ userspaces with this enabled will cause pain. Note that this causes
+ the driver to bind to PCI devices, which precludes loading things
+ like intelfb.
+endchoice
config DRM_MGA
tristate "Matrox g200/g400"
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 6cad69ed21c..1cc967448f4 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1300,7 +1300,13 @@ static const struct hid_device_id hid_blacklist[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS3_CONTROLLER) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGX_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, 0x030c) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT) },
@@ -1605,6 +1611,7 @@ static const struct hid_device_id hid_ignore_list[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0002) },
{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0003) },
{ HID_USB_DEVICE(USB_VENDOR_ID_PANJIT, 0x0004) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_POWERCOM, USB_DEVICE_ID_POWERCOM_UPS) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD2) },
{ HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD3) },
@@ -1612,10 +1619,6 @@ static const struct hid_device_id hid_ignore_list[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_SOUNDGRAPH, USB_DEVICE_ID_SOUNDGRAPH_IMON_LCD5) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TENX, USB_DEVICE_ID_TENX_IBUDDY1) },
{ HID_USB_DEVICE(USB_VENDOR_ID_TENX, USB_DEVICE_ID_TENX_IBUDDY2) },
- { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
- { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
- { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651) },
- { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654) },
{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_LABPRO) },
{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_GOTEMP) },
{ HID_USB_DEVICE(USB_VENDOR_ID_VERNIER, USB_DEVICE_ID_VERNIER_SKIP) },
@@ -1626,8 +1629,6 @@ static const struct hid_device_id hid_ignore_list[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_1_PHIDGETSERVO_20) },
{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_8_8_4_IF_KIT) },
{ HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) },
- { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) },
- { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) },
{ }
};
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index e899f510ebe..88511970508 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -348,6 +348,9 @@
#define USB_VENDOR_ID_PLAYDOTCOM 0x0b43
#define USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII 0x0003
+#define USB_VENDOR_ID_POWERCOM 0x0d9f
+#define USB_DEVICE_ID_POWERCOM_UPS 0x0002
+
#define USB_VENDOR_ID_SAITEK 0x06a3
#define USB_DEVICE_ID_SAITEK_RUMBLEPAD 0xff17
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 73244962897..02b19db5442 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -267,8 +267,10 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd,
default:
{
struct hid_device *hid = dev->hid;
- if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ)
- return -EINVAL;
+ if (_IOC_TYPE(cmd) != 'H' || _IOC_DIR(cmd) != _IOC_READ) {
+ ret = -EINVAL;
+ break;
+ }
if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWNAME(0))) {
int len;
@@ -277,8 +279,9 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd,
len = strlen(hid->name) + 1;
if (len > _IOC_SIZE(cmd))
len = _IOC_SIZE(cmd);
- return copy_to_user(user_arg, hid->name, len) ?
+ ret = copy_to_user(user_arg, hid->name, len) ?
-EFAULT : len;
+ break;
}
if (_IOC_NR(cmd) == _IOC_NR(HIDIOCGRAWPHYS(0))) {
@@ -288,12 +291,13 @@ static long hidraw_ioctl(struct file *file, unsigned int cmd,
len = strlen(hid->phys) + 1;
if (len > _IOC_SIZE(cmd))
len = _IOC_SIZE(cmd);
- return copy_to_user(user_arg, hid->phys, len) ?
+ ret = copy_to_user(user_arg, hid->phys, len) ?
-EFAULT : len;
+ break;
}
}
- ret = -ENOTTY;
+ ret = -ENOTTY;
}
unlock_kernel();
return ret;
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index 609cafff86b..5f81ddf7150 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -1872,7 +1872,7 @@ static int __init f71882fg_find(int sioaddr, unsigned short *address,
devid = superio_inw(sioaddr, SIO_REG_MANID);
if (devid != SIO_FINTEK_ID) {
- printk(KERN_INFO DRVNAME ": Not a Fintek device\n");
+ pr_debug(DRVNAME ": Not a Fintek device\n");
goto exit;
}
@@ -1932,7 +1932,7 @@ static int __init f71882fg_device_add(unsigned short address,
res.name = f71882fg_pdev->name;
err = acpi_check_resource_conflict(&res);
if (err)
- return err;
+ goto exit_device_put;
err = platform_device_add_resources(f71882fg_pdev, &res, 1);
if (err) {
diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c
index abf4dfc8ec2..29c83b5b969 100644
--- a/drivers/hwmon/hp_accel.c
+++ b/drivers/hwmon/hp_accel.c
@@ -166,6 +166,18 @@ static struct axis_conversion lis3lv02d_axis_xy_swap_yz_inverted = {2, -1, -3};
}, \
.driver_data = &lis3lv02d_axis_##_axis \
}
+
+#define AXIS_DMI_MATCH2(_ident, _class1, _name1, \
+ _class2, _name2, \
+ _axis) { \
+ .ident = _ident, \
+ .callback = lis3lv02d_dmi_matched, \
+ .matches = { \
+ DMI_MATCH(DMI_##_class1, _name1), \
+ DMI_MATCH(DMI_##_class2, _name2), \
+ }, \
+ .driver_data = &lis3lv02d_axis_##_axis \
+}
static struct dmi_system_id lis3lv02d_dmi_ids[] = {
/* product names are truncated to match all kinds of a same model */
AXIS_DMI_MATCH("NC64x0", "HP Compaq nc64", x_inverted),
@@ -179,6 +191,16 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = {
AXIS_DMI_MATCH("NC673x", "HP Compaq 673", xy_rotated_left_usd),
AXIS_DMI_MATCH("NC651xx", "HP Compaq 651", xy_rotated_right),
AXIS_DMI_MATCH("NC671xx", "HP Compaq 671", xy_swap_yz_inverted),
+ /* Intel-based HP Pavilion dv5 */
+ AXIS_DMI_MATCH2("HPDV5_I",
+ PRODUCT_NAME, "HP Pavilion dv5",
+ BOARD_NAME, "3603",
+ x_inverted),
+ /* AMD-based HP Pavilion dv5 */
+ AXIS_DMI_MATCH2("HPDV5_A",
+ PRODUCT_NAME, "HP Pavilion dv5",
+ BOARD_NAME, "3600",
+ y_inverted),
{ NULL, }
/* Laptop models without axis info (yet):
* "NC6910" "HP Compaq 6910"
@@ -213,9 +235,49 @@ static struct delayed_led_classdev hpled_led = {
.set_brightness = hpled_set,
};
+static acpi_status
+lis3lv02d_get_resource(struct acpi_resource *resource, void *context)
+{
+ if (resource->type == ACPI_RESOURCE_TYPE_EXTENDED_IRQ) {
+ struct acpi_resource_extended_irq *irq;
+ u32 *device_irq = context;
+
+ irq = &resource->data.extended_irq;
+ *device_irq = irq->interrupts[0];
+ }
+
+ return AE_OK;
+}
+
+static void lis3lv02d_enum_resources(struct acpi_device *device)
+{
+ acpi_status status;
+
+ status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+ lis3lv02d_get_resource, &adev.irq);
+ if (ACPI_FAILURE(status))
+ printk(KERN_DEBUG DRIVER_NAME ": Error getting resources\n");
+}
+
+static s16 lis3lv02d_read_16(acpi_handle handle, int reg)
+{
+ u8 lo, hi;
+
+ adev.read(handle, reg - 1, &lo);
+ adev.read(handle, reg, &hi);
+ /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */
+ return (s16)((hi << 8) | lo);
+}
+
+static s16 lis3lv02d_read_8(acpi_handle handle, int reg)
+{
+ s8 lo;
+ adev.read(handle, reg, &lo);
+ return lo;
+}
+
static int lis3lv02d_add(struct acpi_device *device)
{
- u8 val;
int ret;
if (!device)
@@ -229,10 +291,22 @@ static int lis3lv02d_add(struct acpi_device *device)
strcpy(acpi_device_class(device), ACPI_MDPS_CLASS);
device->driver_data = &adev;
- lis3lv02d_acpi_read(device->handle, WHO_AM_I, &val);
- if ((val != LIS3LV02DL_ID) && (val != LIS302DL_ID)) {
+ lis3lv02d_acpi_read(device->handle, WHO_AM_I, &adev.whoami);
+ switch (adev.whoami) {
+ case LIS_DOUBLE_ID:
+ printk(KERN_INFO DRIVER_NAME ": 2-byte sensor found\n");
+ adev.read_data = lis3lv02d_read_16;
+ adev.mdps_max_val = 2048;
+ break;
+ case LIS_SINGLE_ID:
+ printk(KERN_INFO DRIVER_NAME ": 1-byte sensor found\n");
+ adev.read_data = lis3lv02d_read_8;
+ adev.mdps_max_val = 128;
+ break;
+ default:
printk(KERN_ERR DRIVER_NAME
- ": Accelerometer chip not LIS3LV02D{L,Q}\n");
+ ": unknown sensor type 0x%X\n", adev.whoami);
+ return -EINVAL;
}
/* If possible use a "standard" axes order */
@@ -247,6 +321,9 @@ static int lis3lv02d_add(struct acpi_device *device)
if (ret)
return ret;
+ /* obtain IRQ number of our device from ACPI */
+ lis3lv02d_enum_resources(adev.device);
+
ret = lis3lv02d_init_device(&adev);
if (ret) {
flush_work(&hpled_led.work);
diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 219d2d0d5a6..8bb2158f045 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 2007-2008 Yan Burman
* Copyright (C) 2008 Eric Piel
- * Copyright (C) 2008 Pavel Machek
+ * Copyright (C) 2008-2009 Pavel Machek
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -35,6 +35,7 @@
#include <linux/poll.h>
#include <linux/freezer.h>
#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
#include <acpi/acpi_drivers.h>
#include <asm/atomic.h>
#include "lis3lv02d.h"
@@ -52,24 +53,14 @@
* joystick.
*/
-/* Maximum value our axis may get for the input device (signed 12 bits) */
-#define MDPS_MAX_VAL 2048
+struct acpi_lis3lv02d adev = {
+ .misc_wait = __WAIT_QUEUE_HEAD_INITIALIZER(adev.misc_wait),
+};
-struct acpi_lis3lv02d adev;
EXPORT_SYMBOL_GPL(adev);
static int lis3lv02d_add_fs(struct acpi_device *device);
-static s16 lis3lv02d_read_16(acpi_handle handle, int reg)
-{
- u8 lo, hi;
-
- adev.read(handle, reg, &lo);
- adev.read(handle, reg + 1, &hi);
- /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */
- return (s16)((hi << 8) | lo);
-}
-
/**
* lis3lv02d_get_axis - For the given axis, give the value converted
* @axis: 1,2,3 - can also be negative
@@ -98,9 +89,9 @@ static void lis3lv02d_get_xyz(acpi_handle handle, int *x, int *y, int *z)
{
int position[3];
- position[0] = lis3lv02d_read_16(handle, OUTX_L);
- position[1] = lis3lv02d_read_16(handle, OUTY_L);
- position[2] = lis3lv02d_read_16(handle, OUTZ_L);
+ position[0] = adev.read_data(handle, OUTX);
+ position[1] = adev.read_data(handle, OUTY);
+ position[2] = adev.read_data(handle, OUTZ);
*x = lis3lv02d_get_axis(adev.ac.x, position);
*y = lis3lv02d_get_axis(adev.ac.y, position);
@@ -110,26 +101,13 @@ static void lis3lv02d_get_xyz(acpi_handle handle, int *x, int *y, int *z)
void lis3lv02d_poweroff(acpi_handle handle)
{
adev.is_on = 0;
- /* disable X,Y,Z axis and power down */
- adev.write(handle, CTRL_REG1, 0x00);
}
EXPORT_SYMBOL_GPL(lis3lv02d_poweroff);
void lis3lv02d_poweron(acpi_handle handle)
{
- u8 val;
-
adev.is_on = 1;
adev.init(handle);
- adev.write(handle, FF_WU_CFG, 0);
- /*
- * BDU: LSB and MSB values are not updated until both have been read.
- * So the value read will always be correct.
- * IEN: Interrupt for free-fall and DD, not for data-ready.
- */
- adev.read(handle, CTRL_REG2, &val);
- val |= CTRL2_BDU | CTRL2_IEN;
- adev.write(handle, CTRL_REG2, val);
}
EXPORT_SYMBOL_GPL(lis3lv02d_poweron);
@@ -162,6 +140,140 @@ static void lis3lv02d_decrease_use(struct acpi_lis3lv02d *dev)
mutex_unlock(&dev->lock);
}
+static irqreturn_t lis302dl_interrupt(int irq, void *dummy)
+{
+ /*
+ * Be careful: on some HP laptops the bios force DD when on battery and
+ * the lid is closed. This leads to interrupts as soon as a little move
+ * is done.
+ */
+ atomic_inc(&adev.count);
+
+ wake_up_interruptible(&adev.misc_wait);
+ kill_fasync(&adev.async_queue, SIGIO, POLL_IN);
+ return IRQ_HANDLED;
+}
+
+static int lis3lv02d_misc_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ if (test_and_set_bit(0, &adev.misc_opened))
+ return -EBUSY; /* already open */
+
+ atomic_set(&adev.count, 0);
+
+ /*
+ * The sensor can generate interrupts for free-fall and direction
+ * detection (distinguishable with FF_WU_SRC and DD_SRC) but to keep
+ * the things simple and _fast_ we activate it only for free-fall, so
+ * no need to read register (very slow with ACPI). For the same reason,
+ * we forbid shared interrupts.
+ *
+ * IRQF_TRIGGER_RISING seems pointless on HP laptops because the
+ * io-apic is not configurable (and generates a warning) but I keep it
+ * in case of support for other hardware.
+ */
+ ret = request_irq(adev.irq, lis302dl_interrupt, IRQF_TRIGGER_RISING,
+ DRIVER_NAME, &adev);
+
+ if (ret) {
+ clear_bit(0, &adev.misc_opened);
+ printk(KERN_ERR DRIVER_NAME ": IRQ%d allocation failed\n", adev.irq);
+ return -EBUSY;
+ }
+ lis3lv02d_increase_use(&adev);
+ printk("lis3: registered interrupt %d\n", adev.irq);
+ return 0;
+}
+
+static int lis3lv02d_misc_release(struct inode *inode, struct file *file)
+{
+ fasync_helper(-1, file, 0, &adev.async_queue);
+ lis3lv02d_decrease_use(&adev);
+ free_irq(adev.irq, &adev);
+ clear_bit(0, &adev.misc_opened); /* release the device */
+ return 0;
+}
+
+static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ u32 data;
+ unsigned char byte_data;
+ ssize_t retval = 1;
+
+ if (count < 1)
+ return -EINVAL;
+
+ add_wait_queue(&adev.misc_wait, &wait);
+ while (true) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ data = atomic_xchg(&adev.count, 0);
+ if (data)
+ break;
+
+ if (file->f_flags & O_NONBLOCK) {
+ retval = -EAGAIN;
+ goto out;
+ }
+
+ if (signal_pending(current)) {
+ retval = -ERESTARTSYS;
+ goto out;
+ }
+
+ schedule();
+ }
+
+ if (data < 255)
+ byte_data = data;
+ else
+ byte_data = 255;
+
+ /* make sure we are not going into copy_to_user() with
+ * TASK_INTERRUPTIBLE state */
+ set_current_state(TASK_RUNNING);
+ if (copy_to_user(buf, &byte_data, sizeof(byte_data)))
+ retval = -EFAULT;
+
+out:
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&adev.misc_wait, &wait);
+
+ return retval;
+}
+
+static unsigned int lis3lv02d_misc_poll(struct file *file, poll_table *wait)
+{
+ poll_wait(file, &adev.misc_wait, wait);
+ if (atomic_read(&adev.count))
+ return POLLIN | POLLRDNORM;
+ return 0;
+}
+
+static int lis3lv02d_misc_fasync(int fd, struct file *file, int on)
+{
+ return fasync_helper(fd, file, on, &adev.async_queue);
+}
+
+static const struct file_operations lis3lv02d_misc_fops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = lis3lv02d_misc_read,
+ .open = lis3lv02d_misc_open,
+ .release = lis3lv02d_misc_release,
+ .poll = lis3lv02d_misc_poll,
+ .fasync = lis3lv02d_misc_fasync,
+};
+
+static struct miscdevice lis3lv02d_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "freefall",
+ .fops = &lis3lv02d_misc_fops,
+};
+
/**
* lis3lv02d_joystick_kthread - Kthread polling function
* @data: unused - here to conform to threadfn prototype
@@ -203,7 +315,6 @@ static void lis3lv02d_joystick_close(struct input_dev *input)
lis3lv02d_decrease_use(&adev);
}
-
static inline void lis3lv02d_calibrate_joystick(void)
{
lis3lv02d_get_xyz(adev.device->handle, &adev.xcalib, &adev.ycalib, &adev.zcalib);
@@ -231,9 +342,9 @@ int lis3lv02d_joystick_enable(void)
adev.idev->close = lis3lv02d_joystick_close;
set_bit(EV_ABS, adev.idev->evbit);
- input_set_abs_params(adev.idev, ABS_X, -MDPS_MAX_VAL, MDPS_MAX_VAL, 3, 3);
- input_set_abs_params(adev.idev, ABS_Y, -MDPS_MAX_VAL, MDPS_MAX_VAL, 3, 3);
- input_set_abs_params(adev.idev, ABS_Z, -MDPS_MAX_VAL, MDPS_MAX_VAL, 3, 3);
+ input_set_abs_params(adev.idev, ABS_X, -adev.mdps_max_val, adev.mdps_max_val, 3, 3);
+ input_set_abs_params(adev.idev, ABS_Y, -adev.mdps_max_val, adev.mdps_max_val, 3, 3);
+ input_set_abs_params(adev.idev, ABS_Z, -adev.mdps_max_val, adev.mdps_max_val, 3, 3);
err = input_register_device(adev.idev);
if (err) {
@@ -250,6 +361,7 @@ void lis3lv02d_joystick_disable(void)
if (!adev.idev)
return;
+ misc_deregister(&lis3lv02d_misc_device);
input_unregister_device(adev.idev);
adev.idev = NULL;
}
@@ -268,6 +380,19 @@ int lis3lv02d_init_device(struct acpi_lis3lv02d *dev)
if (lis3lv02d_joystick_enable())
printk(KERN_ERR DRIVER_NAME ": joystick initialization failed\n");
+ printk("lis3_init_device: irq %d\n", dev->irq);
+
+ /* if we did not get an IRQ from ACPI - we have nothing more to do */
+ if (!dev->irq) {
+ printk(KERN_ERR DRIVER_NAME
+ ": No IRQ in ACPI. Disabling /dev/freefall\n");
+ goto out;
+ }
+
+ printk("lis3: registering device\n");
+ if (misc_register(&lis3lv02d_misc_device))
+ printk(KERN_ERR DRIVER_NAME ": misc_register failed\n");
+out:
lis3lv02d_decrease_use(dev);
return 0;
}
@@ -351,6 +476,6 @@ int lis3lv02d_remove_fs(void)
EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs);
MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver");
-MODULE_AUTHOR("Yan Burman and Eric Piel");
+MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek");
MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index 223f1c0763b..75972bf372f 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -22,12 +22,15 @@
/*
* The actual chip is STMicroelectronics LIS3LV02DL or LIS3LV02DQ that seems to
* be connected via SPI. There exists also several similar chips (such as LIS302DL or
- * LIS3L02DQ) but not in the HP laptops and they have slightly different registers.
+ * LIS3L02DQ) and they have slightly different registers, but we can provide a
+ * common interface for all of them.
* They can also be connected via I²C.
*/
-#define LIS3LV02DL_ID 0x3A /* Also the LIS3LV02DQ */
-#define LIS302DL_ID 0x3B /* Also the LIS202DL! */
+/* 2-byte registers */
+#define LIS_DOUBLE_ID 0x3A /* LIS3LV02D[LQ] */
+/* 1-byte registers */
+#define LIS_SINGLE_ID 0x3B /* LIS[32]02DL and others */
enum lis3lv02d_reg {
WHO_AM_I = 0x0F,
@@ -44,10 +47,13 @@ enum lis3lv02d_reg {
STATUS_REG = 0x27,
OUTX_L = 0x28,
OUTX_H = 0x29,
+ OUTX = 0x29,
OUTY_L = 0x2A,
OUTY_H = 0x2B,
+ OUTY = 0x2B,
OUTZ_L = 0x2C,
OUTZ_H = 0x2D,
+ OUTZ = 0x2D,
FF_WU_CFG = 0x30,
FF_WU_SRC = 0x31,
FF_WU_ACK = 0x32,
@@ -159,6 +165,10 @@ struct acpi_lis3lv02d {
acpi_status (*write) (acpi_handle handle, int reg, u8 val);
acpi_status (*read) (acpi_handle handle, int reg, u8 *ret);
+ u8 whoami; /* 3Ah: 2-byte registries, 3Bh: 1-byte registries */
+ s16 (*read_data) (acpi_handle handle, int reg);
+ int mdps_max_val;
+
struct input_dev *idev; /* input device */
struct task_struct *kthread; /* kthread for input */
struct mutex lock;
@@ -170,6 +180,11 @@ struct acpi_lis3lv02d {
unsigned char is_on; /* whether the device is on or off */
unsigned char usage; /* usage counter */
struct axis_conversion ac; /* hw -> logical axis */
+
+ u32 irq; /* IRQ number */
+ struct fasync_struct *async_queue; /* queue for the misc device */
+ wait_queue_head_t misc_wait; /* Wait queue for the misc device */
+ unsigned long misc_opened; /* bit0: whether the device is open */
};
int lis3lv02d_init_device(struct acpi_lis3lv02d *dev);
diff --git a/drivers/hwmon/vt1211.c b/drivers/hwmon/vt1211.c
index b0ce3785228..73f77a9b8b1 100644
--- a/drivers/hwmon/vt1211.c
+++ b/drivers/hwmon/vt1211.c
@@ -1262,7 +1262,7 @@ static int __init vt1211_device_add(unsigned short address)
res.name = pdev->name;
err = acpi_check_resource_conflict(&res);
if (err)
- goto EXIT;
+ goto EXIT_DEV_PUT;
err = platform_device_add_resources(pdev, &res, 1);
if (err) {
diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c
index cb808d01536..feae743ba99 100644
--- a/drivers/hwmon/w83627ehf.c
+++ b/drivers/hwmon/w83627ehf.c
@@ -1548,7 +1548,7 @@ static int __init sensors_w83627ehf_init(void)
err = acpi_check_resource_conflict(&res);
if (err)
- goto exit;
+ goto exit_device_put;
err = platform_device_add_resources(pdev, &res, 1);
if (err) {
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index a34338567a2..f14813be4ef 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -328,7 +328,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
struct dpages old_pages = *dp;
if (sync)
- rw |= (1 << BIO_RW_SYNC);
+ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
/*
* For multiple regions we need to be careful to rewind
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3073618269e..0a225da2127 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -344,7 +344,7 @@ static int run_io_job(struct kcopyd_job *job)
{
int r;
struct dm_io_request io_req = {
- .bi_rw = job->rw | (1 << BIO_RW_SYNC),
+ .bi_rw = job->rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG),
.mem.type = DM_IO_PAGE_LIST,
.mem.ptr.pl = job->pages,
.mem.offset = job->offset,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4495104f6c9..03b4cd0a634 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -474,7 +474,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
* causes ENOTSUPP, we allocate a spare bio...
*/
struct bio *bio = bio_alloc(GFP_NOIO, 1);
- int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNC);
+ int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
bio->bi_bdev = rdev->bdev;
bio->bi_sector = sector;
@@ -531,7 +531,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size,
struct completion event;
int ret;
- rw |= (1 << BIO_RW_SYNC);
+ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
bio->bi_bdev = bdev;
bio->bi_sector = sector;
diff --git a/drivers/media/common/tuners/tuner-simple.c b/drivers/media/common/tuners/tuner-simple.c
index de7adaf5fa5..78412c9c424 100644
--- a/drivers/media/common/tuners/tuner-simple.c
+++ b/drivers/media/common/tuners/tuner-simple.c
@@ -318,7 +318,6 @@ static int simple_std_setup(struct dvb_frontend *fe,
u8 *config, u8 *cb)
{
struct tuner_simple_priv *priv = fe->tuner_priv;
- u8 tuneraddr;
int rc;
/* tv norm specific stuff for multi-norm tuners */
@@ -387,6 +386,7 @@ static int simple_std_setup(struct dvb_frontend *fe,
case TUNER_PHILIPS_TUV1236D:
{
+ struct tuner_i2c_props i2c = priv->i2c_props;
/* 0x40 -> ATSC antenna input 1 */
/* 0x48 -> ATSC antenna input 2 */
/* 0x00 -> NTSC antenna input 1 */
@@ -398,17 +398,15 @@ static int simple_std_setup(struct dvb_frontend *fe,
buffer[1] = 0x04;
}
/* set to the correct mode (analog or digital) */
- tuneraddr = priv->i2c_props.addr;
- priv->i2c_props.addr = 0x0a;
- rc = tuner_i2c_xfer_send(&priv->i2c_props, &buffer[0], 2);
+ i2c.addr = 0x0a;
+ rc = tuner_i2c_xfer_send(&i2c, &buffer[0], 2);
if (2 != rc)
tuner_warn("i2c i/o error: rc == %d "
"(should be 2)\n", rc);
- rc = tuner_i2c_xfer_send(&priv->i2c_props, &buffer[2], 2);
+ rc = tuner_i2c_xfer_send(&i2c, &buffer[2], 2);
if (2 != rc)
tuner_warn("i2c i/o error: rc == %d "
"(should be 2)\n", rc);
- priv->i2c_props.addr = tuneraddr;
break;
}
}
diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 0c733c66a44..069d847ba88 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -364,16 +364,15 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
enum dmx_success success)
{
struct dmxdev_filter *dmxdevfilter = filter->priv;
- unsigned long flags;
int ret;
if (dmxdevfilter->buffer.error) {
wake_up(&dmxdevfilter->buffer.queue);
return 0;
}
- spin_lock_irqsave(&dmxdevfilter->dev->lock, flags);
+ spin_lock(&dmxdevfilter->dev->lock);
if (dmxdevfilter->state != DMXDEV_STATE_GO) {
- spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
+ spin_unlock(&dmxdevfilter->dev->lock);
return 0;
}
del_timer(&dmxdevfilter->timer);
@@ -392,7 +391,7 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len,
}
if (dmxdevfilter->params.sec.flags & DMX_ONESHOT)
dmxdevfilter->state = DMXDEV_STATE_DONE;
- spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
+ spin_unlock(&dmxdevfilter->dev->lock);
wake_up(&dmxdevfilter->buffer.queue);
return 0;
}
@@ -404,12 +403,11 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
{
struct dmxdev_filter *dmxdevfilter = feed->priv;
struct dvb_ringbuffer *buffer;
- unsigned long flags;
int ret;
- spin_lock_irqsave(&dmxdevfilter->dev->lock, flags);
+ spin_lock(&dmxdevfilter->dev->lock);
if (dmxdevfilter->params.pes.output == DMX_OUT_DECODER) {
- spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
+ spin_unlock(&dmxdevfilter->dev->lock);
return 0;
}
@@ -419,7 +417,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
else
buffer = &dmxdevfilter->dev->dvr_buffer;
if (buffer->error) {
- spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
+ spin_unlock(&dmxdevfilter->dev->lock);
wake_up(&buffer->queue);
return 0;
}
@@ -430,7 +428,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
dvb_ringbuffer_flush(buffer);
buffer->error = ret;
}
- spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags);
+ spin_unlock(&dmxdevfilter->dev->lock);
wake_up(&buffer->queue);
return 0;
}
diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c
index a2c1fd5d2f6..e2eca0b1fe7 100644
--- a/drivers/media/dvb/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb/dvb-core/dvb_demux.c
@@ -399,9 +399,7 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf)
void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf,
size_t count)
{
- unsigned long flags;
-
- spin_lock_irqsave(&demux->lock, flags);
+ spin_lock(&demux->lock);
while (count--) {
if (buf[0] == 0x47)
@@ -409,17 +407,16 @@ void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf,
buf += 188;
}
- spin_unlock_irqrestore(&demux->lock, flags);
+ spin_unlock(&demux->lock);
}
EXPORT_SYMBOL(dvb_dmx_swfilter_packets);
void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count)
{
- unsigned long flags;
int p = 0, i, j;
- spin_lock_irqsave(&demux->lock, flags);
+ spin_lock(&demux->lock);
if (demux->tsbufp) {
i = demux->tsbufp;
@@ -452,18 +449,17 @@ void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count)
}
bailout:
- spin_unlock_irqrestore(&demux->lock, flags);
+ spin_unlock(&demux->lock);
}
EXPORT_SYMBOL(dvb_dmx_swfilter);
void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count)
{
- unsigned long flags;
int p = 0, i, j;
u8 tmppack[188];
- spin_lock_irqsave(&demux->lock, flags);
+ spin_lock(&demux->lock);
if (demux->tsbufp) {
i = demux->tsbufp;
@@ -504,7 +500,7 @@ void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count)
}
bailout:
- spin_unlock_irqrestore(&demux->lock, flags);
+ spin_unlock(&demux->lock);
}
EXPORT_SYMBOL(dvb_dmx_swfilter_204);
diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c
index 67cbce82cb9..4dfed6aa2db 100644
--- a/drivers/media/radio/radio-si470x.c
+++ b/drivers/media/radio/radio-si470x.c
@@ -98,11 +98,16 @@
* - blacklisted KWorld radio in hid-core.c and hid-ids.h
* 2008-12-03 Mark Lord <mlord@pobox.com>
* - add support for DealExtreme USB Radio
+ * 2009-01-31 Bob Ross <pigiron@gmx.com>
+ * - correction of stereo detection/setting
+ * - correction of signal strength indicator scaling
+ * 2009-01-31 Rick Bronson <rick@efn.org>
+ * Tobias Lorenz <tobias.lorenz@gmx.net>
+ * - add LED status output
*
* ToDo:
* - add firmware download/update support
* - RDS support: interrupt mode, instead of polling
- * - add LED status output (check if that's not already done in firmware)
*/
@@ -882,6 +887,30 @@ static int si470x_rds_on(struct si470x_device *radio)
/**************************************************************************
+ * General Driver Functions - LED_REPORT
+ **************************************************************************/
+
+/*
+ * si470x_set_led_state - sets the led state
+ */
+static int si470x_set_led_state(struct si470x_device *radio,
+ unsigned char led_state)
+{
+ unsigned char buf[LED_REPORT_SIZE];
+ int retval;
+
+ buf[0] = LED_REPORT;
+ buf[1] = LED_COMMAND;
+ buf[2] = led_state;
+
+ retval = si470x_set_report(radio, (void *) &buf, sizeof(buf));
+
+ return (retval < 0) ? -EINVAL : 0;
+}
+
+
+
+/**************************************************************************
* RDS Driver Functions
**************************************************************************/
@@ -1385,20 +1414,22 @@ static int si470x_vidioc_g_tuner(struct file *file, void *priv,
};
/* stereo indicator == stereo (instead of mono) */
- if ((radio->registers[STATUSRSSI] & STATUSRSSI_ST) == 1)
- tuner->rxsubchans = V4L2_TUNER_SUB_MONO | V4L2_TUNER_SUB_STEREO;
- else
+ if ((radio->registers[STATUSRSSI] & STATUSRSSI_ST) == 0)
tuner->rxsubchans = V4L2_TUNER_SUB_MONO;
+ else
+ tuner->rxsubchans = V4L2_TUNER_SUB_MONO | V4L2_TUNER_SUB_STEREO;
/* mono/stereo selector */
- if ((radio->registers[POWERCFG] & POWERCFG_MONO) == 1)
- tuner->audmode = V4L2_TUNER_MODE_MONO;
- else
+ if ((radio->registers[POWERCFG] & POWERCFG_MONO) == 0)
tuner->audmode = V4L2_TUNER_MODE_STEREO;
+ else
+ tuner->audmode = V4L2_TUNER_MODE_MONO;
/* min is worst, max is best; signal:0..0xffff; rssi: 0..0xff */
- tuner->signal = (radio->registers[STATUSRSSI] & STATUSRSSI_RSSI)
- * 0x0101;
+ /* measured in units of dbµV in 1 db increments (max at ~75 dbµV) */
+ tuner->signal = (radio->registers[STATUSRSSI] & STATUSRSSI_RSSI);
+ /* the ideal factor is 0xffff/75 = 873,8 */
+ tuner->signal = (tuner->signal * 873) + (8 * tuner->signal / 10);
/* automatic frequency control: -1: freq to low, 1 freq to high */
/* AFCRL does only indicate that freq. differs, not if too low/high */
@@ -1632,6 +1663,9 @@ static int si470x_usb_driver_probe(struct usb_interface *intf,
/* set initial frequency */
si470x_set_freq(radio, 87.5 * FREQ_MUL); /* available in all regions */
+ /* set led to connect state */
+ si470x_set_led_state(radio, BLINK_GREEN_LED);
+
/* rds buffer allocation */
radio->buf_size = rds_buf * 3;
radio->buffer = kmalloc(radio->buf_size, GFP_KERNEL);
@@ -1715,6 +1749,9 @@ static void si470x_usb_driver_disconnect(struct usb_interface *intf)
cancel_delayed_work_sync(&radio->work);
usb_set_intfdata(intf, NULL);
if (radio->users == 0) {
+ /* set led to disconnect state */
+ si470x_set_led_state(radio, BLINK_ORANGE_LED);
+
video_unregister_device(radio->videodev);
kfree(radio->buffer);
kfree(radio);
diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
index 2ed24527ecd..65e4901f4db 100644
--- a/drivers/media/video/gspca/gspca.c
+++ b/drivers/media/video/gspca/gspca.c
@@ -422,6 +422,7 @@ static void destroy_urbs(struct gspca_dev *gspca_dev)
if (urb == NULL)
break;
+ BUG_ON(!gspca_dev->dev);
gspca_dev->urb[i] = NULL;
if (!gspca_dev->present)
usb_kill_urb(urb);
@@ -1950,8 +1951,12 @@ void gspca_disconnect(struct usb_interface *intf)
{
struct gspca_dev *gspca_dev = usb_get_intfdata(intf);
+ mutex_lock(&gspca_dev->usb_lock);
gspca_dev->present = 0;
+ mutex_unlock(&gspca_dev->usb_lock);
+ destroy_urbs(gspca_dev);
+ gspca_dev->dev = NULL;
usb_set_intfdata(intf, NULL);
/* release the device */
diff --git a/drivers/media/video/ivtv/ivtv-ioctl.c b/drivers/media/video/ivtv/ivtv-ioctl.c
index f6b3ef6e691..c13bd2aa0be 100644
--- a/drivers/media/video/ivtv/ivtv-ioctl.c
+++ b/drivers/media/video/ivtv/ivtv-ioctl.c
@@ -393,7 +393,7 @@ static int ivtv_g_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_fo
return 0;
}
- v4l2_subdev_call(itv->sd_video, video, s_fmt, fmt);
+ v4l2_subdev_call(itv->sd_video, video, g_fmt, fmt);
vbifmt->service_set = ivtv_get_service_set(vbifmt);
return 0;
}
@@ -1748,6 +1748,18 @@ static long ivtv_default(struct file *file, void *fh, int cmd, void *arg)
break;
}
+ case IVTV_IOC_DMA_FRAME:
+ case VIDEO_GET_PTS:
+ case VIDEO_GET_FRAME_COUNT:
+ case VIDEO_GET_EVENT:
+ case VIDEO_PLAY:
+ case VIDEO_STOP:
+ case VIDEO_FREEZE:
+ case VIDEO_CONTINUE:
+ case VIDEO_COMMAND:
+ case VIDEO_TRY_COMMAND:
+ return ivtv_decoder_ioctls(file, cmd, (void *)arg);
+
default:
return -EINVAL;
}
@@ -1790,18 +1802,6 @@ static long ivtv_serialized_ioctl(struct ivtv *itv, struct file *filp,
ivtv_vapi(itv, CX2341X_DEC_SET_AUDIO_MODE, 2, itv->audio_bilingual_mode, itv->audio_stereo_mode);
return 0;
- case IVTV_IOC_DMA_FRAME:
- case VIDEO_GET_PTS:
- case VIDEO_GET_FRAME_COUNT:
- case VIDEO_GET_EVENT:
- case VIDEO_PLAY:
- case VIDEO_STOP:
- case VIDEO_FREEZE:
- case VIDEO_CONTINUE:
- case VIDEO_COMMAND:
- case VIDEO_TRY_COMMAND:
- return ivtv_decoder_ioctls(filp, cmd, (void *)arg);
-
default:
break;
}
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c
index 1a4d04664d6..aa266e1f69b 100644
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -286,7 +286,7 @@ static int __init egpio_probe(struct platform_device *pdev)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
goto fail;
- ei->base_addr = ioremap_nocache(res->start, res->end - res->start);
+ ei->base_addr = ioremap_nocache(res->start, resource_size(res));
if (!ei->base_addr)
goto fail;
pr_debug("EGPIO phys=%08x virt=%p\n", (u32)res->start, ei->base_addr);
@@ -307,7 +307,7 @@ static int __init egpio_probe(struct platform_device *pdev)
ei->nchips = pdata->num_chips;
ei->chip = kzalloc(sizeof(struct egpio_chip) * ei->nchips, GFP_KERNEL);
- if (!ei) {
+ if (!ei->chip) {
ret = -ENOMEM;
goto fail;
}
diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index ea9488e7ad6..2e36057659e 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -678,6 +678,7 @@ static int __devexit pcf50633_remove(struct i2c_client *client)
static struct i2c_device_id pcf50633_id_table[] = {
{"pcf50633", 0x73},
+ {/* end of list */}
};
static struct i2c_driver pcf50633_driver = {
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 0e5761f1263..4c7b7962f6b 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -1050,7 +1050,7 @@ static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
return gpiochip_add(gchip);
}
-static int sm501_register_gpio(struct sm501_devdata *sm)
+static int __devinit sm501_register_gpio(struct sm501_devdata *sm)
{
struct sm501_gpio *gpio = &sm->gpio;
resource_size_t iobase = sm->io_res->start + SM501_GPIO;
@@ -1321,7 +1321,7 @@ static unsigned int sm501_mem_local[] = {
* Common init code for an SM501
*/
-static int sm501_init_dev(struct sm501_devdata *sm)
+static int __devinit sm501_init_dev(struct sm501_devdata *sm)
{
struct sm501_initdata *idata;
struct sm501_platdata *pdata;
@@ -1397,7 +1397,7 @@ static int sm501_init_dev(struct sm501_devdata *sm)
return 0;
}
-static int sm501_plat_probe(struct platform_device *dev)
+static int __devinit sm501_plat_probe(struct platform_device *dev)
{
struct sm501_devdata *sm;
int ret;
@@ -1586,8 +1586,8 @@ static struct sm501_platdata sm501_pci_platdata = {
.gpio_base = -1,
};
-static int sm501_pci_probe(struct pci_dev *dev,
- const struct pci_device_id *id)
+static int __devinit sm501_pci_probe(struct pci_dev *dev,
+ const struct pci_device_id *id)
{
struct sm501_devdata *sm;
int err;
@@ -1693,7 +1693,7 @@ static void sm501_dev_remove(struct sm501_devdata *sm)
sm501_gpio_remove(sm);
}
-static void sm501_pci_remove(struct pci_dev *dev)
+static void __devexit sm501_pci_remove(struct pci_dev *dev)
{
struct sm501_devdata *sm = pci_get_drvdata(dev);
@@ -1727,16 +1727,16 @@ static struct pci_device_id sm501_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, sm501_pci_tbl);
-static struct pci_driver sm501_pci_drv = {
+static struct pci_driver sm501_pci_driver = {
.name = "sm501",
.id_table = sm501_pci_tbl,
.probe = sm501_pci_probe,
- .remove = sm501_pci_remove,
+ .remove = __devexit_p(sm501_pci_remove),
};
MODULE_ALIAS("platform:sm501");
-static struct platform_driver sm501_plat_drv = {
+static struct platform_driver sm501_plat_driver = {
.driver = {
.name = "sm501",
.owner = THIS_MODULE,
@@ -1749,14 +1749,14 @@ static struct platform_driver sm501_plat_drv = {
static int __init sm501_base_init(void)
{
- platform_driver_register(&sm501_plat_drv);
- return pci_register_driver(&sm501_pci_drv);
+ platform_driver_register(&sm501_plat_driver);
+ return pci_register_driver(&sm501_pci_driver);
}
static void __exit sm501_base_exit(void)
{
- platform_driver_unregister(&sm501_plat_drv);
- pci_unregister_driver(&sm501_pci_drv);
+ platform_driver_unregister(&sm501_plat_driver);
+ pci_unregister_driver(&sm501_pci_driver);
}
module_init(sm501_base_init);
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index e7ab0035d30..68826f1e36b 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -38,7 +38,7 @@
#include <linux/i2c.h>
#include <linux/i2c/twl4030.h>
-#ifdef CONFIG_ARM
+#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
#include <mach/cpu.h>
#endif
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index f92595c8f16..84d5ea1ec17 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1111,7 +1111,7 @@ int wm8350_read_auxadc(struct wm8350 *wm8350, int channel, int scale, int vref)
do {
schedule_timeout_interruptible(1);
reg = wm8350_reg_read(wm8350, WM8350_DIGITISER_CONTROL_1);
- } while (tries-- && (reg & WM8350_AUXADC_POLL));
+ } while (--tries && (reg & WM8350_AUXADC_POLL));
if (!tries)
dev_err(wm8350->dev, "adc chn %d read timeout\n", channel);
@@ -1297,14 +1297,29 @@ static void wm8350_client_dev_register(struct wm8350 *wm8350,
int wm8350_device_init(struct wm8350 *wm8350, int irq,
struct wm8350_platform_data *pdata)
{
- int ret = -EINVAL;
+ int ret;
u16 id1, id2, mask_rev;
u16 cust_id, mode, chip_rev;
/* get WM8350 revision and config mode */
- wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1);
- wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2);
- wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev), &mask_rev);
+ ret = wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1);
+ if (ret != 0) {
+ dev_err(wm8350->dev, "Failed to read ID: %d\n", ret);
+ goto err;
+ }
+
+ ret = wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2);
+ if (ret != 0) {
+ dev_err(wm8350->dev, "Failed to read ID: %d\n", ret);
+ goto err;
+ }
+
+ ret = wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev),
+ &mask_rev);
+ if (ret != 0) {
+ dev_err(wm8350->dev, "Failed to read revision: %d\n", ret);
+ goto err;
+ }
id1 = be16_to_cpu(id1);
id2 = be16_to_cpu(id2);
@@ -1404,14 +1419,12 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
return ret;
}
- if (pdata && pdata->init) {
- ret = pdata->init(wm8350);
- if (ret != 0) {
- dev_err(wm8350->dev, "Platform init() failed: %d\n",
- ret);
- goto err;
- }
- }
+ wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0xFFFF);
+ wm8350_reg_write(wm8350, WM8350_INT_STATUS_1_MASK, 0xFFFF);
+ wm8350_reg_write(wm8350, WM8350_INT_STATUS_2_MASK, 0xFFFF);
+ wm8350_reg_write(wm8350, WM8350_UNDER_VOLTAGE_INT_STATUS_MASK, 0xFFFF);
+ wm8350_reg_write(wm8350, WM8350_GPIO_INT_STATUS_MASK, 0xFFFF);
+ wm8350_reg_write(wm8350, WM8350_COMPARATOR_INT_STATUS_MASK, 0xFFFF);
mutex_init(&wm8350->auxadc_mutex);
mutex_init(&wm8350->irq_mutex);
@@ -1430,6 +1443,15 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
}
wm8350->chip_irq = irq;
+ if (pdata && pdata->init) {
+ ret = pdata->init(wm8350);
+ if (ret != 0) {
+ dev_err(wm8350->dev, "Platform init() failed: %d\n",
+ ret);
+ goto err;
+ }
+ }
+
wm8350_reg_write(wm8350, WM8350_SYSTEM_INTERRUPTS_MASK, 0x0);
wm8350_client_dev_register(wm8350, "wm8350-codec",
diff --git a/drivers/mfd/wm8350-regmap.c b/drivers/mfd/wm8350-regmap.c
index 68887b817d1..9a4cc954cb7 100644
--- a/drivers/mfd/wm8350-regmap.c
+++ b/drivers/mfd/wm8350-regmap.c
@@ -3188,7 +3188,7 @@ const struct wm8350_reg_access wm8350_reg_io_map[] = {
{ 0x7CFF, 0x0C00, 0x7FFF }, /* R1 - ID */
{ 0x0000, 0x0000, 0x0000 }, /* R2 */
{ 0xBE3B, 0xBE3B, 0x8000 }, /* R3 - System Control 1 */
- { 0xFCF7, 0xFCF7, 0xF800 }, /* R4 - System Control 2 */
+ { 0xFEF7, 0xFEF7, 0xF800 }, /* R4 - System Control 2 */
{ 0x80FF, 0x80FF, 0x8000 }, /* R5 - System Hibernate */
{ 0xFB0E, 0xFB0E, 0x0000 }, /* R6 - Interface Control */
{ 0x0000, 0x0000, 0x0000 }, /* R7 */
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 45b1f430685..513eb09a638 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -584,7 +584,7 @@ static int mmc_blk_probe(struct mmc_card *card)
if (err)
goto out;
- string_get_size(get_capacity(md->disk) << 9, STRING_UNITS_2,
+ string_get_size((u64)get_capacity(md->disk) << 9, STRING_UNITS_2,
cap_str, sizeof(cap_str));
printk(KERN_INFO "%s: %s %s %s %s\n",
md->disk->disk_name, mmc_card_id(card), mmc_card_name(card),
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index b92b172074e..b9f1e84897c 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -494,7 +494,7 @@ static int mmc_test_basic_read(struct mmc_test_card *test)
sg_init_one(&sg, test->buffer, 512);
- ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 1);
+ ret = mmc_test_simple_transfer(test, &sg, 1, 0, 1, 512, 0);
if (ret)
return ret;
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 76bfe16c09b..2b1196e6142 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -1548,9 +1548,10 @@ static bool filter(struct dma_chan *chan, void *slave)
{
struct dw_dma_slave *dws = slave;
- if (dws->dma_dev == chan->device->dev)
+ if (dws->dma_dev == chan->device->dev) {
+ chan->private = dws;
return true;
- else
+ } else
return false;
}
#endif
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index db37490f67e..a631c81dce1 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -55,6 +55,7 @@
#define VS30 (1 << 25)
#define SDVS18 (0x5 << 9)
#define SDVS30 (0x6 << 9)
+#define SDVS33 (0x7 << 9)
#define SDVSCLR 0xFFFFF1FF
#define SDVSDET 0x00000400
#define AUTOIDLE 0x1
@@ -375,6 +376,32 @@ static void mmc_omap_report_irq(struct mmc_omap_host *host, u32 status)
}
#endif /* CONFIG_MMC_DEBUG */
+/*
+ * MMC controller internal state machines reset
+ *
+ * Used to reset command or data internal state machines, using respectively
+ * SRC or SRD bit of SYSCTL register
+ * Can be called from interrupt context
+ */
+static inline void mmc_omap_reset_controller_fsm(struct mmc_omap_host *host,
+ unsigned long bit)
+{
+ unsigned long i = 0;
+ unsigned long limit = (loops_per_jiffy *
+ msecs_to_jiffies(MMC_TIMEOUT_MS));
+
+ OMAP_HSMMC_WRITE(host->base, SYSCTL,
+ OMAP_HSMMC_READ(host->base, SYSCTL) | bit);
+
+ while ((OMAP_HSMMC_READ(host->base, SYSCTL) & bit) &&
+ (i++ < limit))
+ cpu_relax();
+
+ if (OMAP_HSMMC_READ(host->base, SYSCTL) & bit)
+ dev_err(mmc_dev(host->mmc),
+ "Timeout waiting on controller reset in %s\n",
+ __func__);
+}
/*
* MMC controller IRQ handler
@@ -403,21 +430,17 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
(status & CMD_CRC)) {
if (host->cmd) {
if (status & CMD_TIMEOUT) {
- OMAP_HSMMC_WRITE(host->base, SYSCTL,
- OMAP_HSMMC_READ(host->base,
- SYSCTL) | SRC);
- while (OMAP_HSMMC_READ(host->base,
- SYSCTL) & SRC)
- ;
-
+ mmc_omap_reset_controller_fsm(host, SRC);
host->cmd->error = -ETIMEDOUT;
} else {
host->cmd->error = -EILSEQ;
}
end_cmd = 1;
}
- if (host->data)
+ if (host->data) {
mmc_dma_cleanup(host);
+ mmc_omap_reset_controller_fsm(host, SRD);
+ }
}
if ((status & DATA_TIMEOUT) ||
(status & DATA_CRC)) {
@@ -426,12 +449,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
mmc_dma_cleanup(host);
else
host->data->error = -EILSEQ;
- OMAP_HSMMC_WRITE(host->base, SYSCTL,
- OMAP_HSMMC_READ(host->base,
- SYSCTL) | SRD);
- while (OMAP_HSMMC_READ(host->base,
- SYSCTL) & SRD)
- ;
+ mmc_omap_reset_controller_fsm(host, SRD);
end_trans = 1;
}
}
@@ -456,13 +474,20 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
}
/*
- * Switch MMC operating voltage
+ * Switch MMC interface voltage ... only relevant for MMC1.
+ *
+ * MMC2 and MMC3 use fixed 1.8V levels, and maybe a transceiver.
+ * The MMC2 transceiver controls are used instead of DAT4..DAT7.
+ * Some chips, like eMMC ones, use internal transceivers.
*/
static int omap_mmc_switch_opcond(struct mmc_omap_host *host, int vdd)
{
u32 reg_val = 0;
int ret;
+ if (host->id != OMAP_MMC1_DEVID)
+ return 0;
+
/* Disable the clocks */
clk_disable(host->fclk);
clk_disable(host->iclk);
@@ -485,19 +510,26 @@ static int omap_mmc_switch_opcond(struct mmc_omap_host *host, int vdd)
OMAP_HSMMC_WRITE(host->base, HCTL,
OMAP_HSMMC_READ(host->base, HCTL) & SDVSCLR);
reg_val = OMAP_HSMMC_READ(host->base, HCTL);
+
/*
* If a MMC dual voltage card is detected, the set_ios fn calls
* this fn with VDD bit set for 1.8V. Upon card removal from the
* slot, omap_mmc_set_ios sets the VDD back to 3V on MMC_POWER_OFF.
*
- * Only MMC1 supports 3.0V. MMC2 will not function if SDVS30 is
- * set in HCTL.
+ * Cope with a bit of slop in the range ... per data sheets:
+ * - "1.8V" for vdds_mmc1/vdds_mmc1a can be up to 2.45V max,
+ * but recommended values are 1.71V to 1.89V
+ * - "3.0V" for vdds_mmc1/vdds_mmc1a can be up to 3.5V max,
+ * but recommended values are 2.7V to 3.3V
+ *
+ * Board setup code shouldn't permit anything very out-of-range.
+ * TWL4030-family VMMC1 and VSIM regulators are fine (avoiding the
+ * middle range) but VSIM can't power DAT4..DAT7 at more than 3V.
*/
- if (host->id == OMAP_MMC1_DEVID && (((1 << vdd) == MMC_VDD_32_33) ||
- ((1 << vdd) == MMC_VDD_33_34)))
- reg_val |= SDVS30;
- if ((1 << vdd) == MMC_VDD_165_195)
+ if ((1 << vdd) <= MMC_VDD_23_24)
reg_val |= SDVS18;
+ else
+ reg_val |= SDVS30;
OMAP_HSMMC_WRITE(host->base, HCTL, reg_val);
@@ -517,16 +549,15 @@ static void mmc_omap_detect(struct work_struct *work)
{
struct mmc_omap_host *host = container_of(work, struct mmc_omap_host,
mmc_carddetect_work);
+ struct omap_mmc_slot_data *slot = &mmc_slot(host);
+
+ host->carddetect = slot->card_detect(slot->card_detect_irq);
sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
if (host->carddetect) {
mmc_detect_change(host->mmc, (HZ * 200) / 1000);
} else {
- OMAP_HSMMC_WRITE(host->base, SYSCTL,
- OMAP_HSMMC_READ(host->base, SYSCTL) | SRD);
- while (OMAP_HSMMC_READ(host->base, SYSCTL) & SRD)
- ;
-
+ mmc_omap_reset_controller_fsm(host, SRD);
mmc_detect_change(host->mmc, (HZ * 50) / 1000);
}
}
@@ -538,7 +569,6 @@ static irqreturn_t omap_mmc_cd_handler(int irq, void *dev_id)
{
struct mmc_omap_host *host = (struct mmc_omap_host *)dev_id;
- host->carddetect = mmc_slot(host).card_detect(irq);
schedule_work(&host->mmc_carddetect_work);
return IRQ_HANDLED;
@@ -757,10 +787,14 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
case MMC_POWER_OFF:
mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
/*
- * Reset bus voltage to 3V if it got set to 1.8V earlier.
+ * Reset interface voltage to 3V if it's 1.8V now;
+ * only relevant on MMC-1, the others always use 1.8V.
+ *
* REVISIT: If we are able to detect cards after unplugging
* a 1.8V card, this code should not be needed.
*/
+ if (host->id != OMAP_MMC1_DEVID)
+ break;
if (!(OMAP_HSMMC_READ(host->base, HCTL) & SDVSDET)) {
int vdd = fls(host->mmc->ocr_avail) - 1;
if (omap_mmc_switch_opcond(host, vdd) != 0)
@@ -784,7 +818,9 @@ static void omap_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
}
if (host->id == OMAP_MMC1_DEVID) {
- /* Only MMC1 can operate at 3V/1.8V */
+ /* Only MMC1 can interface at 3V without some flavor
+ * of external transceiver; but they all handle 1.8V.
+ */
if ((OMAP_HSMMC_READ(host->base, HCTL) & SDVSDET) &&
(ios->vdd == DUAL_VOLT_OCR_BIT)) {
/*
@@ -1137,7 +1173,9 @@ static int omap_mmc_suspend(struct platform_device *pdev, pm_message_t state)
" level suspend\n");
}
- if (!(OMAP_HSMMC_READ(host->base, HCTL) & SDVSDET)) {
+ if (host->id == OMAP_MMC1_DEVID
+ && !(OMAP_HSMMC_READ(host->base, HCTL)
+ & SDVSDET)) {
OMAP_HSMMC_WRITE(host->base, HCTL,
OMAP_HSMMC_READ(host->base, HCTL)
& SDVSCLR);
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 35a98eec741..f4a67c65d30 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -329,7 +329,7 @@ static void do_pio_write(struct s3cmci_host *host)
to_ptr = host->base + host->sdidata;
- while ((fifo = fifo_free(host))) {
+ while ((fifo = fifo_free(host)) > 3) {
if (!host->pio_bytes) {
res = get_data_buffer(host, &host->pio_bytes,
&host->pio_ptr);
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index f07255cb17e..8cff5f5e7f8 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -144,8 +144,7 @@ static int jmicron_probe(struct sdhci_pci_chip *chip)
SDHCI_QUIRK_32BIT_DMA_SIZE |
SDHCI_QUIRK_32BIT_ADMA_SIZE |
SDHCI_QUIRK_RESET_AFTER_REQUEST |
- SDHCI_QUIRK_BROKEN_SMALL_PIO |
- SDHCI_QUIRK_FORCE_HIGHSPEED;
+ SDHCI_QUIRK_BROKEN_SMALL_PIO;
}
/*
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 6b2d1f99af6..f52f3053ed9 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1636,8 +1636,7 @@ int sdhci_add_host(struct sdhci_host *host)
mmc->f_max = host->max_clk;
mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ;
- if ((caps & SDHCI_CAN_DO_HISPD) ||
- (host->quirks & SDHCI_QUIRK_FORCE_HIGHSPEED))
+ if (caps & SDHCI_CAN_DO_HISPD)
mmc->caps |= MMC_CAP_SD_HIGHSPEED;
mmc->ocr_avail = 0;
@@ -1723,7 +1722,9 @@ int sdhci_add_host(struct sdhci_host *host)
#endif
#ifdef SDHCI_USE_LEDS_CLASS
- host->led.name = mmc_hostname(mmc);
+ snprintf(host->led_name, sizeof(host->led_name),
+ "%s::", mmc_hostname(mmc));
+ host->led.name = host->led_name;
host->led.brightness = LED_OFF;
host->led.default_trigger = mmc_hostname(mmc);
host->led.brightness_set = sdhci_led_control;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 3efba236394..ebb83657e27 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -208,8 +208,6 @@ struct sdhci_host {
#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL (1<<12)
/* Controller has an issue with buffer bits for small transfers */
#define SDHCI_QUIRK_BROKEN_SMALL_PIO (1<<13)
-/* Controller supports high speed but doesn't have the caps bit set */
-#define SDHCI_QUIRK_FORCE_HIGHSPEED (1<<14)
int irq; /* Device IRQ */
void __iomem * ioaddr; /* Mapped address */
@@ -222,6 +220,7 @@ struct sdhci_host {
#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
struct led_classdev led; /* LED control */
+ char led_name[32];
#endif
spinlock_t lock; /* Mutex */
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index e76d715e434..f0e99d4c066 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -161,7 +161,7 @@ struct op_sample
{
entry->event = ring_buffer_lock_reserve
(op_ring_buffer_write, sizeof(struct op_sample) +
- size * sizeof(entry->sample->data[0]), &entry->irq_flags);
+ size * sizeof(entry->sample->data[0]));
if (entry->event)
entry->sample = ring_buffer_event_data(entry->event);
else
@@ -178,8 +178,7 @@ struct op_sample
int op_cpu_buffer_write_commit(struct op_entry *entry)
{
- return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event,
- entry->irq_flags);
+ return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event);
}
struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f4b7c79023f..f3f686581a9 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -61,6 +61,8 @@
/* global iommu list, set NULL for ignored DMAR units */
static struct intel_iommu **g_iommus;
+static int rwbf_quirk;
+
/*
* 0: Present
* 1-11: Reserved
@@ -785,7 +787,7 @@ static void iommu_flush_write_buffer(struct intel_iommu *iommu)
u32 val;
unsigned long flag;
- if (!cap_rwbf(iommu->cap))
+ if (!rwbf_quirk && !cap_rwbf(iommu->cap))
return;
val = iommu->gcmd | DMA_GCMD_WBF;
@@ -3137,3 +3139,15 @@ static struct iommu_ops intel_iommu_ops = {
.unmap = intel_iommu_unmap_range,
.iova_to_phys = intel_iommu_iova_to_phys,
};
+
+static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
+{
+ /*
+ * Mobile 4 Series Chipset neglects to set RWBF capability,
+ * but needs it:
+ */
+ printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
+ rwbf_quirk = 1;
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 44f15ff70c1..baba2eb5367 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -103,14 +103,12 @@ static void msix_set_enable(struct pci_dev *dev, int enable)
}
}
-/*
- * Essentially, this is ((1 << (1 << x)) - 1), but without the
- * undefinedness of a << 32.
- */
static inline __attribute_const__ u32 msi_mask(unsigned x)
{
- static const u32 mask[] = { 1, 2, 4, 0xf, 0xff, 0xffff, 0xffffffff };
- return mask[x];
+ /* Don't shift by >= width of type */
+ if (x >= 5)
+ return 0xffffffff;
+ return (1 << (1 << x)) - 1;
}
static void msix_flush_writes(struct irq_desc *desc)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e3efe6b19ee..6d6120007af 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1540,16 +1540,21 @@ void pci_release_region(struct pci_dev *pdev, int bar)
}
/**
- * pci_request_region - Reserved PCI I/O and memory resource
+ * __pci_request_region - Reserved PCI I/O and memory resource
* @pdev: PCI device whose resources are to be reserved
* @bar: BAR to be reserved
* @res_name: Name to be associated with resource.
+ * @exclusive: whether the region access is exclusive or not
*
* Mark the PCI region associated with PCI device @pdev BR @bar as
* being reserved by owner @res_name. Do not access any
* address inside the PCI regions unless this call returns
* successfully.
*
+ * If @exclusive is set, then the region is marked so that userspace
+ * is explicitly not allowed to map the resource via /dev/mem or
+ * sysfs MMIO access.
+ *
* Returns 0 on success, or %EBUSY on error. A warning
* message is also printed on failure.
*/
@@ -1588,12 +1593,12 @@ err_out:
}
/**
- * pci_request_region - Reserved PCI I/O and memory resource
+ * pci_request_region - Reserve PCI I/O and memory resource
* @pdev: PCI device whose resources are to be reserved
* @bar: BAR to be reserved
- * @res_name: Name to be associated with resource.
+ * @res_name: Name to be associated with resource
*
- * Mark the PCI region associated with PCI device @pdev BR @bar as
+ * Mark the PCI region associated with PCI device @pdev BAR @bar as
* being reserved by owner @res_name. Do not access any
* address inside the PCI regions unless this call returns
* successfully.
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 26ddf78ac30..07c0aa5275e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -16,21 +16,21 @@ extern int pci_mmap_fits(struct pci_dev *pdev, int resno,
#endif
/**
- * Firmware PM callbacks
+ * struct pci_platform_pm_ops - Firmware PM callbacks
*
- * @is_manageable - returns 'true' if given device is power manageable by the
- * platform firmware
+ * @is_manageable: returns 'true' if given device is power manageable by the
+ * platform firmware
*
- * @set_state - invokes the platform firmware to set the device's power state
+ * @set_state: invokes the platform firmware to set the device's power state
*
- * @choose_state - returns PCI power state of given device preferred by the
- * platform; to be used during system-wide transitions from a
- * sleeping state to the working state and vice versa
+ * @choose_state: returns PCI power state of given device preferred by the
+ * platform; to be used during system-wide transitions from a
+ * sleeping state to the working state and vice versa
*
- * @can_wakeup - returns 'true' if given device is capable of waking up the
- * system from a sleeping state
+ * @can_wakeup: returns 'true' if given device is capable of waking up the
+ * system from a sleeping state
*
- * @sleep_wake - enables/disables the system wake up capability of given device
+ * @sleep_wake: enables/disables the system wake up capability of given device
*
* If given platform is generally capable of power managing PCI devices, all of
* these callbacks are mandatory.
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 29cbe47f219..36864a935d6 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -55,6 +55,7 @@ void pci_disable_rom(struct pci_dev *pdev)
/**
* pci_get_rom_size - obtain the actual size of the ROM image
+ * @pdev: target PCI device
* @rom: kernel virtual pointer to image of ROM
* @size: size of PCI window
* return: size of actual ROM image
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 94363115a42..b3866ad5022 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -62,6 +62,7 @@ config DELL_LAPTOP
depends on EXPERIMENTAL
depends on BACKLIGHT_CLASS_DEVICE
depends on RFKILL
+ depends on POWER_SUPPLY
default n
---help---
This driver adds support for rfkill and backlight control to Dell
@@ -301,6 +302,7 @@ config INTEL_MENLOW
config EEEPC_LAPTOP
tristate "Eee PC Hotkey Driver (EXPERIMENTAL)"
depends on ACPI
+ depends on INPUT
depends on EXPERIMENTAL
select BACKLIGHT_CLASS_DEVICE
select HWMON
diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c
index 89362d733d6..8f58f7ff0dd 100644
--- a/drivers/serial/atmel_serial.c
+++ b/drivers/serial/atmel_serial.c
@@ -877,6 +877,10 @@ static int atmel_startup(struct uart_port *port)
}
}
+ /* Save current CSR for comparison in atmel_tasklet_func() */
+ atmel_port->irq_status_prev = UART_GET_CSR(port);
+ atmel_port->irq_status = atmel_port->irq_status_prev;
+
/*
* Finally, enable the serial port
*/
diff --git a/drivers/serial/jsm/jsm_driver.c b/drivers/serial/jsm/jsm_driver.c
index 92187e28608..ac79cbe4c2c 100644
--- a/drivers/serial/jsm/jsm_driver.c
+++ b/drivers/serial/jsm/jsm_driver.c
@@ -84,6 +84,8 @@ static int jsm_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent)
brd->pci_dev = pdev;
if (pdev->device == PCIE_DEVICE_ID_NEO_4_IBM)
brd->maxports = 4;
+ else if (pdev->device == PCI_DEVICE_ID_DIGI_NEO_8)
+ brd->maxports = 8;
else
brd->maxports = 2;
@@ -212,6 +214,7 @@ static struct pci_device_id jsm_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_NEO_2RJ45), 0, 0, 2 },
{ PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_NEO_2RJ45PRI), 0, 0, 3 },
{ PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCIE_DEVICE_ID_NEO_4_IBM), 0, 0, 4 },
+ { PCI_DEVICE(PCI_VENDOR_ID_DIGI, PCI_DEVICE_ID_DIGI_NEO_8), 0, 0, 5 },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, jsm_pci_tbl);
diff --git a/drivers/spi/spi_gpio.c b/drivers/spi/spi_gpio.c
index 49698cabc30..f5ed9721aab 100644
--- a/drivers/spi/spi_gpio.c
+++ b/drivers/spi/spi_gpio.c
@@ -114,7 +114,7 @@ static inline void setmosi(const struct spi_device *spi, int is_on)
static inline int getmiso(const struct spi_device *spi)
{
- return gpio_get_value(SPI_MISO_GPIO);
+ return !!gpio_get_value(SPI_MISO_GPIO);
}
#undef pdata
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index c54fc40458b..a4301dc02d2 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -298,19 +298,6 @@ int usb_hcd_pci_suspend(struct pci_dev *dev, pm_message_t message)
EXPORT_SYMBOL_GPL(usb_hcd_pci_suspend);
/**
- * usb_hcd_pci_resume_early - resume a PCI-based HCD before IRQs are enabled
- * @dev: USB Host Controller being resumed
- *
- * Store this function in the HCD's struct pci_driver as .resume_early.
- */
-int usb_hcd_pci_resume_early(struct pci_dev *dev)
-{
- pci_restore_state(dev);
- return 0;
-}
-EXPORT_SYMBOL_GPL(usb_hcd_pci_resume_early);
-
-/**
* usb_hcd_pci_resume - power management resume of a PCI-based HCD
* @dev: USB Host Controller being resumed
*
@@ -333,6 +320,8 @@ int usb_hcd_pci_resume(struct pci_dev *dev)
}
#endif
+ pci_restore_state(dev);
+
hcd = pci_get_drvdata(dev);
if (hcd->state != HC_STATE_SUSPENDED) {
dev_dbg(hcd->self.controller,
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
index 5b94a56bec2..f750eb1ab59 100644
--- a/drivers/usb/core/hcd.h
+++ b/drivers/usb/core/hcd.h
@@ -257,7 +257,6 @@ extern void usb_hcd_pci_remove(struct pci_dev *dev);
#ifdef CONFIG_PM
extern int usb_hcd_pci_suspend(struct pci_dev *dev, pm_message_t msg);
-extern int usb_hcd_pci_resume_early(struct pci_dev *dev);
extern int usb_hcd_pci_resume(struct pci_dev *dev);
#endif /* CONFIG_PM */
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index bb21fb0a496..abb9a7706ec 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -432,7 +432,6 @@ static struct pci_driver ehci_pci_driver = {
#ifdef CONFIG_PM
.suspend = usb_hcd_pci_suspend,
- .resume_early = usb_hcd_pci_resume_early,
.resume = usb_hcd_pci_resume,
#endif
.shutdown = usb_hcd_pci_shutdown,
diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c
index 5d625c3fd42..f9961b4c0da 100644
--- a/drivers/usb/host/ohci-pci.c
+++ b/drivers/usb/host/ohci-pci.c
@@ -487,7 +487,6 @@ static struct pci_driver ohci_pci_driver = {
#ifdef CONFIG_PM
.suspend = usb_hcd_pci_suspend,
- .resume_early = usb_hcd_pci_resume_early,
.resume = usb_hcd_pci_resume,
#endif
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index 944f7e0ca4d..cf5e4cf7ea4 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -942,7 +942,6 @@ static struct pci_driver uhci_pci_driver = {
#ifdef CONFIG_PM
.suspend = usb_hcd_pci_suspend,
- .resume_early = usb_hcd_pci_resume_early,
.resume = usb_hcd_pci_resume,
#endif /* PM */
};
diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c
index 2291c5f5af5..958751ccea4 100644
--- a/drivers/usb/host/whci/asl.c
+++ b/drivers/usb/host/whci/asl.c
@@ -227,13 +227,13 @@ void scan_async_work(struct work_struct *work)
* Now that the ASL is updated, complete the removal of any
* removed qsets.
*/
- spin_lock(&whc->lock);
+ spin_lock_irq(&whc->lock);
list_for_each_entry_safe(qset, t, &whc->async_removed_list, list_node) {
qset_remove_complete(whc, qset);
}
- spin_unlock(&whc->lock);
+ spin_unlock_irq(&whc->lock);
}
/**
diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c
index 7dc85a0bee7..df8b85f0709 100644
--- a/drivers/usb/host/whci/pzl.c
+++ b/drivers/usb/host/whci/pzl.c
@@ -255,13 +255,13 @@ void scan_periodic_work(struct work_struct *work)
* Now that the PZL is updated, complete the removal of any
* removed qsets.
*/
- spin_lock(&whc->lock);
+ spin_lock_irq(&whc->lock);
list_for_each_entry_safe(qset, t, &whc->periodic_removed_list, list_node) {
qset_remove_complete(whc, qset);
}
- spin_unlock(&whc->lock);
+ spin_unlock_irq(&whc->lock);
}
/**
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index bf0af660df8..fb19803060c 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1054,10 +1054,7 @@ config FB_RIVA_BACKLIGHT
config FB_I810
tristate "Intel 810/815 support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && PCI && X86_32
- select AGP
- select AGP_INTEL
- select FB
+ depends on EXPERIMENTAL && FB && PCI && X86_32 && AGP_INTEL
select FB_MODE_HELPERS
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
@@ -1120,10 +1117,7 @@ config FB_CARILLO_RANCH
config FB_INTEL
tristate "Intel 830M/845G/852GM/855GM/865G/915G/945G/945GM/965G/965GM support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && PCI && X86
- select FB
- select AGP
- select AGP_INTEL
+ depends on EXPERIMENTAL && FB && PCI && X86 && AGP_INTEL
select FB_MODE_HELPERS
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 09a3d5522b4..325c10ff6a2 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -406,7 +406,7 @@ config ITCO_WDT
---help---
Hardware driver for the intel TCO timer based watchdog devices.
These drivers are included in the Intel 82801 I/O Controller
- Hub family (from ICH0 up to ICH8) and in the Intel 6300ESB
+ Hub family (from ICH0 up to ICH10) and in the Intel 63xxESB
controller hub.
The TCO (Total Cost of Ownership) timer is a watchdog timer
diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c
index 2474ebca88f..d8264ad0be4 100644
--- a/drivers/watchdog/iTCO_vendor_support.c
+++ b/drivers/watchdog/iTCO_vendor_support.c
@@ -1,7 +1,7 @@
/*
* intel TCO vendor specific watchdog driver support
*
- * (c) Copyright 2006-2008 Wim Van Sebroeck <wim@iguana.be>.
+ * (c) Copyright 2006-2009 Wim Van Sebroeck <wim@iguana.be>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -19,7 +19,7 @@
/* Module and version information */
#define DRV_NAME "iTCO_vendor_support"
-#define DRV_VERSION "1.02"
+#define DRV_VERSION "1.03"
#define PFX DRV_NAME ": "
/* Includes */
@@ -77,6 +77,26 @@ MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default=0 (n
* 20.6 seconds.
*/
+static void supermicro_old_pre_start(unsigned long acpibase)
+{
+ unsigned long val32;
+
+ /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */
+ val32 = inl(SMI_EN);
+ val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */
+ outl(val32, SMI_EN); /* Needed to activate watchdog */
+}
+
+static void supermicro_old_pre_stop(unsigned long acpibase)
+{
+ unsigned long val32;
+
+ /* Bit 13: TCO_EN -> 1 = Enables the TCO logic to generate SMI# */
+ val32 = inl(SMI_EN);
+ val32 |= 0x00002000; /* Turn on SMI clearing watchdog */
+ outl(val32, SMI_EN); /* Needed to deactivate watchdog */
+}
+
static void supermicro_old_pre_keepalive(unsigned long acpibase)
{
/* Reload TCO Timer (done in iTCO_wdt_keepalive) + */
@@ -228,14 +248,18 @@ static void supermicro_new_pre_set_heartbeat(unsigned int heartbeat)
void iTCO_vendor_pre_start(unsigned long acpibase,
unsigned int heartbeat)
{
- if (vendorsupport == SUPERMICRO_NEW_BOARD)
+ if (vendorsupport == SUPERMICRO_OLD_BOARD)
+ supermicro_old_pre_start(acpibase);
+ else if (vendorsupport == SUPERMICRO_NEW_BOARD)
supermicro_new_pre_start(heartbeat);
}
EXPORT_SYMBOL(iTCO_vendor_pre_start);
void iTCO_vendor_pre_stop(unsigned long acpibase)
{
- if (vendorsupport == SUPERMICRO_NEW_BOARD)
+ if (vendorsupport == SUPERMICRO_OLD_BOARD)
+ supermicro_old_pre_stop(acpibase);
+ else if (vendorsupport == SUPERMICRO_NEW_BOARD)
supermicro_new_pre_stop();
}
EXPORT_SYMBOL(iTCO_vendor_pre_stop);
diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 5b395a4ddfd..352334947ea 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -1,7 +1,7 @@
/*
- * intel TCO Watchdog Driver (Used in i82801 and i6300ESB chipsets)
+ * intel TCO Watchdog Driver (Used in i82801 and i63xxESB chipsets)
*
- * (c) Copyright 2006-2008 Wim Van Sebroeck <wim@iguana.be>.
+ * (c) Copyright 2006-2009 Wim Van Sebroeck <wim@iguana.be>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -63,7 +63,7 @@
/* Module and version information */
#define DRV_NAME "iTCO_wdt"
-#define DRV_VERSION "1.04"
+#define DRV_VERSION "1.05"
#define PFX DRV_NAME ": "
/* Includes */
@@ -236,16 +236,16 @@ MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl);
/* Address definitions for the TCO */
/* TCO base address */
-#define TCOBASE iTCO_wdt_private.ACPIBASE + 0x60
+#define TCOBASE iTCO_wdt_private.ACPIBASE + 0x60
/* SMI Control and Enable Register */
-#define SMI_EN iTCO_wdt_private.ACPIBASE + 0x30
+#define SMI_EN iTCO_wdt_private.ACPIBASE + 0x30
#define TCO_RLD TCOBASE + 0x00 /* TCO Timer Reload and Curr. Value */
#define TCOv1_TMR TCOBASE + 0x01 /* TCOv1 Timer Initial Value */
-#define TCO_DAT_IN TCOBASE + 0x02 /* TCO Data In Register */
-#define TCO_DAT_OUT TCOBASE + 0x03 /* TCO Data Out Register */
-#define TCO1_STS TCOBASE + 0x04 /* TCO1 Status Register */
-#define TCO2_STS TCOBASE + 0x06 /* TCO2 Status Register */
+#define TCO_DAT_IN TCOBASE + 0x02 /* TCO Data In Register */
+#define TCO_DAT_OUT TCOBASE + 0x03 /* TCO Data Out Register */
+#define TCO1_STS TCOBASE + 0x04 /* TCO1 Status Register */
+#define TCO2_STS TCOBASE + 0x06 /* TCO2 Status Register */
#define TCO1_CNT TCOBASE + 0x08 /* TCO1 Control Register */
#define TCO2_CNT TCOBASE + 0x0a /* TCO2 Control Register */
#define TCOv2_TMR TCOBASE + 0x12 /* TCOv2 Timer Initial Value */
@@ -338,7 +338,6 @@ static int iTCO_wdt_unset_NO_REBOOT_bit(void)
static int iTCO_wdt_start(void)
{
unsigned int val;
- unsigned long val32;
spin_lock(&iTCO_wdt_private.io_lock);
@@ -351,11 +350,6 @@ static int iTCO_wdt_start(void)
return -EIO;
}
- /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */
- val32 = inl(SMI_EN);
- val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */
- outl(val32, SMI_EN);
-
/* Force the timer to its reload value by writing to the TCO_RLD
register */
if (iTCO_wdt_private.iTCO_version == 2)
@@ -378,7 +372,6 @@ static int iTCO_wdt_start(void)
static int iTCO_wdt_stop(void)
{
unsigned int val;
- unsigned long val32;
spin_lock(&iTCO_wdt_private.io_lock);
@@ -390,11 +383,6 @@ static int iTCO_wdt_stop(void)
outw(val, TCO1_CNT);
val = inw(TCO1_CNT);
- /* Bit 13: TCO_EN -> 1 = Enables the TCO logic to generate SMI# */
- val32 = inl(SMI_EN);
- val32 |= 0x00002000;
- outl(val32, SMI_EN);
-
/* Set the NO_REBOOT bit to prevent later reboots, just for sure */
iTCO_wdt_set_NO_REBOOT_bit();
@@ -649,6 +637,7 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
int ret;
u32 base_address;
unsigned long RCBA;
+ unsigned long val32;
/*
* Find the ACPI/PM base I/O address which is the base
@@ -695,6 +684,10 @@ static int __devinit iTCO_wdt_init(struct pci_dev *pdev,
ret = -EIO;
goto out;
}
+ /* Bit 13: TCO_EN -> 0 = Disables TCO logic generating an SMI# */
+ val32 = inl(SMI_EN);
+ val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */
+ outl(val32, SMI_EN);
/* The TCO I/O registers reside in a 32-byte range pointed to
by the TCOBASE value */
diff --git a/fs/bio.c b/fs/bio.c
index 062299acbcc..72ab251cdb9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -302,9 +302,10 @@ void bio_init(struct bio *bio)
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
struct bio *bio = NULL;
+ void *p;
if (bs) {
- void *p = mempool_alloc(bs->bio_pool, gfp_mask);
+ p = mempool_alloc(bs->bio_pool, gfp_mask);
if (p)
bio = p + bs->front_pad;
@@ -329,7 +330,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
}
if (unlikely(!bvl)) {
if (bs)
- mempool_free(bio, bs->bio_pool);
+ mempool_free(p, bs->bio_pool);
else
kfree(bio);
bio = NULL;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 35443cc4b9a..42491d728e9 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -38,19 +38,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
-inline void btrfs_init_path(struct btrfs_path *p)
-{
- memset(p, 0, sizeof(*p));
-}
-
struct btrfs_path *btrfs_alloc_path(void)
{
struct btrfs_path *path;
- path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
- if (path) {
- btrfs_init_path(path);
+ path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
+ if (path)
path->reada = 1;
- }
return path;
}
@@ -69,14 +62,38 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
/*
* reset all the locked nodes in the patch to spinning locks.
+ *
+ * held is used to keep lockdep happy, when lockdep is enabled
+ * we set held to a blocking lock before we go around and
+ * retake all the spinlocks in the path. You can safely use NULL
+ * for held
*/
-noinline void btrfs_clear_path_blocking(struct btrfs_path *p)
+noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
+ struct extent_buffer *held)
{
int i;
- for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /* lockdep really cares that we take all of these spinlocks
+ * in the right order. If any of the locks in the path are not
+ * currently blocking, it is going to complain. So, make really
+ * really sure by forcing the path to blocking before we clear
+ * the path blocking.
+ */
+ if (held)
+ btrfs_set_lock_blocking(held);
+ btrfs_set_path_blocking(p);
+#endif
+
+ for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
if (p->nodes[i] && p->locks[i])
btrfs_clear_lock_blocking(p->nodes[i]);
}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ if (held)
+ btrfs_clear_lock_blocking(held);
+#endif
}
/* this also releases the path */
@@ -286,7 +303,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid, level, &ins);
BUG_ON(ret);
cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
- buf->len);
+ buf->len, level);
} else {
cow = btrfs_alloc_free_block(trans, root, buf->len,
parent_start,
@@ -917,9 +934,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* promote the child to a root */
child = read_node_slot(root, mid, 0);
+ BUG_ON(!child);
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
- BUG_ON(!child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
BUG_ON(ret);
@@ -1566,7 +1583,7 @@ cow_done:
if (!p->skip_locking)
p->locks[level] = 1;
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
/*
* we have a lock on b and as long as we aren't changing
@@ -1605,7 +1622,7 @@ cow_done:
btrfs_set_path_blocking(p);
sret = split_node(trans, root, p, level);
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0);
if (sret) {
@@ -1625,7 +1642,7 @@ cow_done:
btrfs_set_path_blocking(p);
sret = balance_level(trans, root, p, level);
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
if (sret) {
ret = sret;
@@ -1688,13 +1705,13 @@ cow_done:
if (!p->skip_locking) {
int lret;
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
lret = btrfs_try_spin_lock(b);
if (!lret) {
btrfs_set_path_blocking(p);
btrfs_tree_lock(b);
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, b);
}
}
} else {
@@ -1706,7 +1723,7 @@ cow_done:
btrfs_set_path_blocking(p);
sret = split_leaf(trans, root, key,
p, ins_len, ret == 0);
- btrfs_clear_path_blocking(p);
+ btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0);
if (sret) {
@@ -3926,7 +3943,6 @@ find_next_key:
btrfs_release_path(root, path);
goto again;
} else {
- btrfs_clear_path_blocking(path);
goto out;
}
}
@@ -3946,7 +3962,7 @@ find_next_key:
path->locks[level - 1] = 1;
path->nodes[level - 1] = cur;
unlock_up(path, level, 1);
- btrfs_clear_path_blocking(path);
+ btrfs_clear_path_blocking(path, NULL);
}
out:
if (ret == 0)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 531db112c8b..766b31ae318 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -43,11 +43,7 @@ struct btrfs_ordered_sum;
#define BTRFS_ACL_NOT_CACHED ((void *)-1)
-#ifdef CONFIG_LOCKDEP
-# define BTRFS_MAX_LEVEL 7
-#else
-# define BTRFS_MAX_LEVEL 8
-#endif
+#define BTRFS_MAX_LEVEL 8
/* holds pointers to all of the tree roots */
#define BTRFS_ROOT_TREE_OBJECTID 1ULL
@@ -1715,7 +1711,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 empty_size);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u32 blocksize);
+ u64 bytenr, u32 blocksize,
+ int level);
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 parent, u64 min_bytes,
@@ -1834,9 +1831,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
-void btrfs_init_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p);
-void btrfs_clear_path_blocking(struct btrfs_path *p);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5aebddd7119..adda739a021 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -75,6 +75,40 @@ struct async_submit_bio {
struct btrfs_work work;
};
+/* These are used to set the lockdep class on the extent buffer locks.
+ * The class is set by the readpage_end_io_hook after the buffer has
+ * passed csum validation but before the pages are unlocked.
+ *
+ * The lockdep class is also set by btrfs_init_new_buffer on freshly
+ * allocated blocks.
+ *
+ * The class is based on the level in the tree block, which allows lockdep
+ * to know that lower nodes nest inside the locks of higher nodes.
+ *
+ * We also add a check to make sure the highest level of the tree is
+ * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
+ * code needs update as well.
+ */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# if BTRFS_MAX_LEVEL != 8
+# error
+# endif
+static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
+static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
+ /* leaf */
+ "btrfs-extent-00",
+ "btrfs-extent-01",
+ "btrfs-extent-02",
+ "btrfs-extent-03",
+ "btrfs-extent-04",
+ "btrfs-extent-05",
+ "btrfs-extent-06",
+ "btrfs-extent-07",
+ /* highest possible level */
+ "btrfs-extent-08",
+};
+#endif
+
/*
* extents on the btree inode are pretty simple, there's one extent
* that covers the entire device
@@ -347,6 +381,15 @@ static int check_tree_block_fsid(struct btrfs_root *root,
return ret;
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
+{
+ lockdep_set_class_and_name(&eb->lock,
+ &btrfs_eb_class[level],
+ btrfs_eb_name[level]);
+}
+#endif
+
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
@@ -392,6 +435,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
found_level = btrfs_header_level(eb);
+ btrfs_set_buffer_lockdep_class(eb, found_level);
+
ret = csum_tree_block(root, eb, 1);
if (ret)
ret = -EIO;
@@ -1777,7 +1822,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_DEV_TREE_OBJECTID, dev_root);
dev_root->track_dirty = 1;
-
if (ret)
goto fail_extent_root;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 494a56eb298..95029db227b 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -101,4 +101,14 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btree_lock_page_hook(struct page *page);
+
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
+#else
+static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
+ int level)
+{
+}
+#endif
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7527523c2d2..0a5d796c9f7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1323,8 +1323,25 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- finish_current_insert(trans, root->fs_info->extent_root, 1);
- del_pending_extents(trans, root->fs_info->extent_root, 1);
+ u64 start;
+ u64 end;
+ int ret;
+
+ while(1) {
+ finish_current_insert(trans, root->fs_info->extent_root, 1);
+ del_pending_extents(trans, root->fs_info->extent_root, 1);
+
+ /* is there more work to do? */
+ ret = find_first_extent_bit(&root->fs_info->pending_del,
+ 0, &start, &end, EXTENT_WRITEBACK);
+ if (!ret)
+ continue;
+ ret = find_first_extent_bit(&root->fs_info->extent_ins,
+ 0, &start, &end, EXTENT_WRITEBACK);
+ if (!ret)
+ continue;
+ break;
+ }
return 0;
}
@@ -2211,13 +2228,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
u64 end;
u64 priv;
u64 search = 0;
- u64 skipped = 0;
struct btrfs_fs_info *info = extent_root->fs_info;
struct btrfs_path *path;
struct pending_extent_op *extent_op, *tmp;
struct list_head insert_list, update_list;
int ret;
- int num_inserts = 0, max_inserts;
+ int num_inserts = 0, max_inserts, restart = 0;
path = btrfs_alloc_path();
INIT_LIST_HEAD(&insert_list);
@@ -2233,19 +2249,19 @@ again:
ret = find_first_extent_bit(&info->extent_ins, search, &start,
&end, EXTENT_WRITEBACK);
if (ret) {
- if (skipped && all && !num_inserts &&
+ if (restart && !num_inserts &&
list_empty(&update_list)) {
- skipped = 0;
+ restart = 0;
search = 0;
continue;
}
- mutex_unlock(&info->extent_ins_mutex);
break;
}
ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
if (!ret) {
- skipped = 1;
+ if (all)
+ restart = 1;
search = end + 1;
if (need_resched()) {
mutex_unlock(&info->extent_ins_mutex);
@@ -2264,7 +2280,7 @@ again:
list_add_tail(&extent_op->list, &insert_list);
search = end + 1;
if (num_inserts == max_inserts) {
- mutex_unlock(&info->extent_ins_mutex);
+ restart = 1;
break;
}
} else if (extent_op->type == PENDING_BACKREF_UPDATE) {
@@ -2280,7 +2296,6 @@ again:
* somebody marked this thing for deletion then just unlock it and be
* done, the free_extents will handle it
*/
- mutex_lock(&info->extent_ins_mutex);
list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
clear_extent_bits(&info->extent_ins, extent_op->bytenr,
extent_op->bytenr + extent_op->num_bytes - 1,
@@ -2302,6 +2317,10 @@ again:
if (!list_empty(&update_list)) {
ret = update_backrefs(trans, extent_root, path, &update_list);
BUG_ON(ret);
+
+ /* we may have COW'ed new blocks, so lets start over */
+ if (all)
+ restart = 1;
}
/*
@@ -2309,9 +2328,9 @@ again:
* need to make sure everything is cleaned then reset everything and
* go back to the beginning
*/
- if (!num_inserts && all && skipped) {
+ if (!num_inserts && restart) {
search = 0;
- skipped = 0;
+ restart = 0;
INIT_LIST_HEAD(&update_list);
INIT_LIST_HEAD(&insert_list);
goto again;
@@ -2368,27 +2387,19 @@ again:
BUG_ON(ret);
/*
- * if we broke out of the loop in order to insert stuff because we hit
- * the maximum number of inserts at a time we can handle, then loop
- * back and pick up where we left off
+ * if restart is set for whatever reason we need to go back and start
+ * searching through the pending list again.
+ *
+ * We just inserted some extents, which could have resulted in new
+ * blocks being allocated, which would result in new blocks needing
+ * updates, so if all is set we _must_ restart to get the updated
+ * blocks.
*/
- if (num_inserts == max_inserts) {
- INIT_LIST_HEAD(&insert_list);
- INIT_LIST_HEAD(&update_list);
- num_inserts = 0;
- goto again;
- }
-
- /*
- * again, if we need to make absolutely sure there are no more pending
- * extent operations left and we know that we skipped some, go back to
- * the beginning and do it all again
- */
- if (all && skipped) {
+ if (restart || all) {
INIT_LIST_HEAD(&insert_list);
INIT_LIST_HEAD(&update_list);
search = 0;
- skipped = 0;
+ restart = 0;
num_inserts = 0;
goto again;
}
@@ -2709,6 +2720,8 @@ again:
goto again;
}
+ if (!err)
+ finish_current_insert(trans, extent_root, 0);
return err;
}
@@ -2859,7 +2872,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (data & BTRFS_BLOCK_GROUP_METADATA) {
last_ptr = &root->fs_info->last_alloc;
- empty_cluster = 64 * 1024;
+ if (!btrfs_test_opt(root, SSD))
+ empty_cluster = 64 * 1024;
}
if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
@@ -3402,7 +3416,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u32 blocksize)
+ u64 bytenr, u32 blocksize,
+ int level)
{
struct extent_buffer *buf;
@@ -3410,6 +3425,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
if (!buf)
return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid);
+ btrfs_set_buffer_lockdep_class(buf, level);
btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf);
@@ -3453,7 +3469,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
}
- buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
+ buf = btrfs_init_new_buffer(trans, root, ins.objectid,
+ blocksize, level);
return buf;
}
@@ -5641,7 +5658,9 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
prev_block = block_start;
}
+ mutex_lock(&extent_root->fs_info->trans_mutex);
btrfs_record_root_in_trans(found_root);
+ mutex_unlock(&extent_root->fs_info->trans_mutex);
if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
/*
* try to update data extent references while
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 37d43b516b7..ebe6b29e606 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -415,8 +415,6 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
if (node) {
- struct extent_state *found;
- found = rb_entry(node, struct extent_state, rb_node);
free_extent_state(prealloc);
return -EEXIST;
}
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 2aa79873eb4..cc7334d833c 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -84,7 +84,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
search_key.type = 0;
search_key.offset = 0;
- btrfs_init_path(path);
start_found = 0;
ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
if (ret < 0)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8f0706210a4..3cee77ae03c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2531,8 +2531,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
key.offset = (u64)-1;
key.type = (u8)-1;
- btrfs_init_path(path);
-
search_again:
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
@@ -4263,7 +4261,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
if (PageWriteback(page) || PageDirty(page))
return 0;
- return __btrfs_releasepage(page, gfp_flags);
+ return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
}
static void btrfs_invalidatepage(struct page *page, unsigned long offset)
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 9ebe9385129..85506c4a3af 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -25,21 +25,10 @@
#include "extent_io.h"
#include "locking.h"
-/*
- * btrfs_header_level() isn't free, so don't call it when lockdep isn't
- * on
- */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static inline void spin_nested(struct extent_buffer *eb)
-{
- spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
-}
-#else
static inline void spin_nested(struct extent_buffer *eb)
{
spin_lock(&eb->lock);
}
-#endif
/*
* Setting a lock to blocking will drop the spinlock and set the
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f3fd7e2cbc3..19a4daf03cc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -379,7 +379,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
btrfs_start_delalloc_inodes(root);
btrfs_wait_ordered_extents(root, 0);
- btrfs_clean_old_snapshots(root);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
sb->s_dirt = 0;
@@ -511,6 +510,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
struct btrfs_root *root = btrfs_sb(sb);
int ret;
+ ret = btrfs_parse_options(root, data);
+ if (ret)
+ return -EINVAL;
+
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 919172de5c9..4112d53d4f4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -688,7 +688,9 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
num_bytes -= btrfs_root_used(&dirty->root->root_item);
bytes_used = btrfs_root_used(&root->root_item);
if (num_bytes) {
+ mutex_lock(&root->fs_info->trans_mutex);
btrfs_record_root_in_trans(root);
+ mutex_unlock(&root->fs_info->trans_mutex);
btrfs_set_root_used(&root->root_item,
bytes_used - num_bytes);
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 20794290256..9c462fbd60f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2832,7 +2832,9 @@ again:
BUG_ON(!wc.replay_dest);
wc.replay_dest->log_root = log;
+ mutex_lock(&fs_info->trans_mutex);
btrfs_record_root_in_trans(wc.replay_dest);
+ mutex_unlock(&fs_info->trans_mutex);
ret = walk_log_tree(trans, log, &wc);
BUG_ON(ret);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bcd14ebccae..1316139bf9e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2894,10 +2894,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
free_extent_map(em);
}
- map = kzalloc(sizeof(*map), GFP_NOFS);
- if (!map)
- return -ENOMEM;
-
em = alloc_extent_map(GFP_NOFS);
if (!em)
return -ENOMEM;
@@ -3106,6 +3102,8 @@ int btrfs_read_sys_array(struct btrfs_root *root)
if (!sb)
return -ENOMEM;
btrfs_set_buffer_uptodate(sb);
+ btrfs_set_buffer_lockdep_class(sb, 0);
+
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/buffer.c b/fs/buffer.c
index 665d446b25b..9f697419ed8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -777,6 +777,7 @@ static int __set_page_dirty(struct page *page,
__inc_zone_page_state(page, NR_FILE_DIRTY);
__inc_bdi_stat(mapping->backing_dev_info,
BDI_RECLAIMABLE);
+ task_dirty_inc(current);
task_io_account_write(PAGE_CACHE_SIZE);
}
radix_tree_tag_set(&mapping->page_tree,
@@ -3108,7 +3109,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(WRITE_SYNC, bh);
+ ret = submit_bh(WRITE, bh);
wait_on_buffer(bh);
if (buffer_eopnotsupp(bh)) {
clear_buffer_eopnotsupp(bh);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9c6d815dd19..39bd4d38e88 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1938,6 +1938,8 @@ ULONG_IOCTL(SET_BITMAP_FILE)
/* Big K */
COMPATIBLE_IOCTL(PIO_FONT)
COMPATIBLE_IOCTL(GIO_FONT)
+COMPATIBLE_IOCTL(PIO_CMAP)
+COMPATIBLE_IOCTL(GIO_CMAP)
ULONG_IOCTL(KDSIGACCEPT)
COMPATIBLE_IOCTL(KDGETKEYCODE)
COMPATIBLE_IOCTL(KDSETKEYCODE)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index aafc9eba1c2..b0c87dce66a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
{
unsigned len = le16_to_cpu(dlen);
- if (len == EXT4_MAX_REC_LEN)
+ if (len == EXT4_MAX_REC_LEN || len == 0)
return 1 << 16;
return len;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03ba20be132..cbd2ca99d11 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -47,8 +47,10 @@
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
- return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
- new_size);
+ return jbd2_journal_begin_ordered_truncate(
+ EXT4_SB(inode->i_sb)->s_journal,
+ &EXT4_I(inode)->jinode,
+ new_size);
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -2437,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping,
int no_nrwrite_index_update;
int pages_written = 0;
long pages_skipped;
+ int range_cyclic, cycled = 1, io_done = 0;
int needed_blocks, ret = 0, nr_to_writebump = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
@@ -2488,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping,
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
- if (wbc->range_cyclic)
+ range_cyclic = wbc->range_cyclic;
+ if (wbc->range_cyclic) {
index = mapping->writeback_index;
- else
+ if (index)
+ cycled = 0;
+ wbc->range_start = index << PAGE_CACHE_SHIFT;
+ wbc->range_end = LLONG_MAX;
+ wbc->range_cyclic = 0;
+ } else
index = wbc->range_start >> PAGE_CACHE_SHIFT;
mpd.wbc = wbc;
@@ -2504,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping,
wbc->no_nrwrite_index_update = 1;
pages_skipped = wbc->pages_skipped;
+retry:
while (!ret && wbc->nr_to_write > 0) {
/*
@@ -2546,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping,
pages_written += mpd.pages_written;
wbc->pages_skipped = pages_skipped;
ret = 0;
+ io_done = 1;
} else if (wbc->nr_to_write)
/*
* There is no more writeout needed
@@ -2554,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping,
*/
break;
}
+ if (!io_done && !cycled) {
+ cycled = 1;
+ index = 0;
+ wbc->range_start = index << PAGE_CACHE_SHIFT;
+ wbc->range_end = mapping->writeback_index - 1;
+ goto retry;
+ }
if (pages_skipped != wbc->pages_skipped)
printk(KERN_EMERG "This should not happen leaving %s "
"with nr_to_write = %ld ret = %d\n",
@@ -2561,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping,
/* Update index */
index += pages_written;
+ wbc->range_cyclic = range_cyclic;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
/*
* set the writeback_index so that range_cyclic
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index deba54f6cbe..4415beeb0b6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
pa->pa_free = pa->pa_len;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
+ INIT_LIST_HEAD(&pa->pa_inode_list);
+ INIT_LIST_HEAD(&pa->pa_group_list);
pa->pa_deleted = 0;
pa->pa_linear = 0;
@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
+ INIT_LIST_HEAD(&pa->pa_group_list);
pa->pa_deleted = 0;
pa->pa_linear = 1;
@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
pa->pa_free -= ac->ac_b_ex.fe_len;
pa->pa_len -= ac->ac_b_ex.fe_len;
spin_unlock(&pa->pa_lock);
- /*
- * We want to add the pa to the right bucket.
- * Remove it from the list and while adding
- * make sure the list to which we are adding
- * doesn't grow big.
- */
- if (likely(pa->pa_free)) {
- spin_lock(pa->pa_obj_lock);
- list_del_rcu(&pa->pa_inode_list);
- spin_unlock(pa->pa_obj_lock);
- ext4_mb_add_n_trim(ac);
- }
}
- ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->alloc_semp)
up_read(ac->alloc_semp);
+ if (pa) {
+ /*
+ * We want to add the pa to the right bucket.
+ * Remove it from the list and while adding
+ * make sure the list to which we are adding
+ * doesn't grow big. We need to release
+ * alloc_semp before calling ext4_mb_add_n_trim()
+ */
+ if (pa->pa_linear && likely(pa->pa_free)) {
+ spin_lock(pa->pa_obj_lock);
+ list_del_rcu(&pa->pa_inode_list);
+ spin_unlock(pa->pa_obj_lock);
+ ext4_mb_add_n_trim(ac);
+ }
+ ext4_mb_put_pa(ac, ac->ac_sb, pa);
+ }
if (ac->ac_bitmap_page)
page_cache_release(ac->ac_bitmap_page);
if (ac->ac_buddy_page)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 734abca25e3..fe64d9f7985 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
+ 1);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
- goto err_out;
+ return retval;
}
tmp_inode = ext4_new_inode(handle,
inode->i_sb->s_root->d_inode,
@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode)
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
ext4_journal_stop(handle);
- tmp_inode = NULL;
- goto err_out;
+ return retval;
}
i_size_write(tmp_inode, i_size_read(inode));
/*
@@ -618,8 +617,7 @@ err_out:
ext4_journal_stop(handle);
- if (tmp_inode)
- iput(tmp_inode);
+ iput(tmp_inode);
return retval;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e5f06a5f045..a5732c58f67 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb)
static int ext4_sync_fs(struct super_block *sb, int wait)
{
int ret = 0;
+ tid_t target;
trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
sb->s_dirt = 0;
if (EXT4_SB(sb)->s_journal) {
- if (wait)
- ret = ext4_force_commit(sb);
- else
- jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+ if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
+ &target)) {
+ if (wait)
+ jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
+ target);
+ }
} else {
ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb343008ede..58144102bf2 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
}
/*
- * Called under j_state_lock. Returns true if a transaction was started.
+ * Called under j_state_lock. Returns true if a transaction commit was started.
*/
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
/*
* Start a commit of the current running transaction (if any). Returns true
- * if a transaction was started, and fills its tid in at *ptid
+ * if a transaction is going to be committed (or is currently already
+ * committing), and fills its tid in at *ptid
*/
int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid;
- ret = __jbd2_log_start_commit(journal, tid);
- if (ret && ptid)
+ __jbd2_log_start_commit(journal, tid);
+ /* There's a running transaction and we've just made sure
+ * it's commit has been scheduled. */
+ if (ptid)
*ptid = tid;
- } else if (journal->j_committing_transaction && ptid) {
+ ret = 1;
+ } else if (journal->j_committing_transaction) {
/*
* If ext3_write_super() recently started a commit, then we
* have to wait for completion of that transaction
*/
- *ptid = journal->j_committing_transaction->t_tid;
+ if (ptid)
+ *ptid = journal->j_committing_transaction->t_tid;
ret = 1;
}
spin_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 46b4e347ed7..28ce21d8598 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2129,26 +2129,46 @@ done:
}
/*
- * This function must be called when inode is journaled in ordered mode
- * before truncation happens. It starts writeout of truncated part in
- * case it is in the committing transaction so that we stand to ordered
- * mode consistency guarantees.
+ * File truncate and transaction commit interact with each other in a
+ * non-trivial way. If a transaction writing data block A is
+ * committing, we cannot discard the data by truncate until we have
+ * written them. Otherwise if we crashed after the transaction with
+ * write has committed but before the transaction with truncate has
+ * committed, we could see stale data in block A. This function is a
+ * helper to solve this problem. It starts writeout of the truncated
+ * part in case it is in the committing transaction.
+ *
+ * Filesystem code must call this function when inode is journaled in
+ * ordered mode before truncation happens and after the inode has been
+ * placed on orphan list with the new inode size. The second condition
+ * avoids the race that someone writes new data and we start
+ * committing the transaction after this function has been called but
+ * before a transaction for truncate is started (and furthermore it
+ * allows us to optimize the case where the addition to orphan list
+ * happens in the same transaction as write --- we don't have to write
+ * any data in such case).
*/
-int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
+int jbd2_journal_begin_ordered_truncate(journal_t *journal,
+ struct jbd2_inode *jinode,
loff_t new_size)
{
- journal_t *journal;
- transaction_t *commit_trans;
+ transaction_t *inode_trans, *commit_trans;
int ret = 0;
- if (!inode->i_transaction && !inode->i_next_transaction)
+ /* This is a quick check to avoid locking if not necessary */
+ if (!jinode->i_transaction)
goto out;
- journal = inode->i_transaction->t_journal;
+ /* Locks are here just to force reading of recent values, it is
+ * enough that the transaction was not committing before we started
+ * a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock);
- if (inode->i_transaction == commit_trans) {
- ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
+ spin_lock(&journal->j_list_lock);
+ inode_trans = jinode->i_transaction;
+ spin_unlock(&journal->j_list_lock);
+ if (inode_trans == commit_trans) {
+ ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
new_size, LLONG_MAX);
if (ret)
jbd2_journal_abort(journal, ret);
diff --git a/fs/namespace.c b/fs/namespace.c
index 228d8c4bfd1..06f8e63f6cb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -614,9 +614,11 @@ static inline void __mntput(struct vfsmount *mnt)
*/
for_each_possible_cpu(cpu) {
struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
- if (cpu_writer->mnt != mnt)
- continue;
spin_lock(&cpu_writer->lock);
+ if (cpu_writer->mnt != mnt) {
+ spin_unlock(&cpu_writer->lock);
+ continue;
+ }
atomic_add(cpu_writer->count, &mnt->__mnt_writers);
cpu_writer->count = 0;
/*
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index dae3f28f30d..331f2e88e28 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -156,7 +156,7 @@ static int inotify_handle_get_wd(struct inotify_handle *ih,
int ret;
do {
- if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
+ if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
return -ENOSPC;
ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
} while (ret == -EAGAIN);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3c3532e1307..172850a9a12 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
- return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
- new_size);
+ return jbd2_journal_begin_ordered_truncate(
+ OCFS2_SB(inode->i_sb)->journal->j_journal,
+ &OCFS2_I(inode)->ip_jinode,
+ new_size);
}
#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f..8a17f7edcc7 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -19,6 +19,7 @@
#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
+#include <linux/blktrace_api.h>
#include "check.h"
@@ -294,6 +295,9 @@ static struct attribute_group part_attr_group = {
static struct attribute_group *part_attr_groups[] = {
&part_attr_group,
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+ &blk_trace_attr_group,
+#endif
NULL
};
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 5267098532b..a1a4cfe1921 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -48,8 +48,16 @@ int seq_open(struct file *file, const struct seq_operations *op)
*/
file->f_version = 0;
- /* SEQ files support lseek, but not pread/pwrite */
- file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE);
+ /*
+ * seq_files support lseek() and pread(). They do not implement
+ * write() at all, but we clear FMODE_PWRITE here for historical
+ * reasons.
+ *
+ * If a client of seq_files a) implements file.write() and b) wishes to
+ * support pwrite() then that client will need to implement its own
+ * file.open() which calls seq_open() and then sets FMODE_PWRITE.
+ */
+ file->f_mode &= ~FMODE_PWRITE;
return 0;
}
EXPORT_SYMBOL(seq_open);
@@ -131,6 +139,22 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
int err = 0;
mutex_lock(&m->lock);
+
+ /* Don't assume *ppos is where we left it */
+ if (unlikely(*ppos != m->read_pos)) {
+ m->read_pos = *ppos;
+ while ((err = traverse(m, *ppos)) == -EAGAIN)
+ ;
+ if (err) {
+ /* With prejudice... */
+ m->read_pos = 0;
+ m->version = 0;
+ m->index = 0;
+ m->count = 0;
+ goto Done;
+ }
+ }
+
/*
* seq_file->op->..m_start/m_stop/m_next may do special actions
* or optimisations based on the file->f_version, so we want to
@@ -230,8 +254,10 @@ Fill:
Done:
if (!copied)
copied = err;
- else
+ else {
*ppos += copied;
+ m->read_pos += copied;
+ }
file->f_version = m->version;
mutex_unlock(&m->lock);
return copied;
@@ -266,16 +292,18 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
if (offset < 0)
break;
retval = offset;
- if (offset != file->f_pos) {
+ if (offset != m->read_pos) {
while ((retval=traverse(m, offset)) == -EAGAIN)
;
if (retval) {
/* with extreme prejudice... */
file->f_pos = 0;
+ m->read_pos = 0;
m->version = 0;
m->index = 0;
m->count = 0;
} else {
+ m->read_pos = offset;
retval = file->f_pos = offset;
}
}
diff --git a/fs/super.c b/fs/super.c
index 61dce001dd5..8349ed6b141 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -82,7 +82,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
* lock ordering than usbfs:
*/
lockdep_set_class(&s->s_lock, &type->s_lock_key);
- down_write(&s->s_umount);
+ /*
+ * sget() can have s_umount recursion.
+ *
+ * When it cannot find a suitable sb, it allocates a new
+ * one (this one), and tries again to find a suitable old
+ * one.
+ *
+ * In case that succeeds, it will acquire the s_umount
+ * lock of the old one. Since these are clearly distrinct
+ * locks, and this object isn't exposed yet, there's no
+ * risk of deadlocks.
+ *
+ * Annotate this by putting this lock in a different
+ * subclass.
+ */
+ down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
s->s_count = S_BIAS;
atomic_set(&s->s_active, 1);
mutex_init(&s->s_vfs_rename_mutex);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 6a123b8ff3f..b042bd7034b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -186,10 +186,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
- if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
- return -EINVAL;
- if (clockid != CLOCK_MONOTONIC &&
- clockid != CLOCK_REALTIME)
+ if ((flags & ~TFD_CREATE_FLAGS) ||
+ (clockid != CLOCK_MONOTONIC &&
+ clockid != CLOCK_REALTIME))
return -EINVAL;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -201,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
- flags & (O_CLOEXEC | O_NONBLOCK));
+ flags & TFD_SHARED_FCNTL_FLAGS);
if (ufd < 0)
kfree(ctx);
@@ -219,7 +218,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
return -EFAULT;
- if (!timespec_valid(&ktmr.it_value) ||
+ if ((flags & ~TFD_SETTIME_FLAGS) ||
+ !timespec_valid(&ktmr.it_value) ||
!timespec_valid(&ktmr.it_interval))
return -EINVAL;
diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h
index 83c51aba534..e16fdb1f4f4 100644
--- a/include/asm-frv/pgtable.h
+++ b/include/asm-frv/pgtable.h
@@ -478,7 +478,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define __swp_type(x) (((x).val >> 2) & 0x1f)
#define __swp_offset(x) ((x).val >> 8)
#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 2) | ((offset) << 8) })
-#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte })
+#define __pte_to_swp_entry(_pte) ((swp_entry_t) { (_pte).pte })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
static inline int pte_file(pte_t pte)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2aa283ab062..1b16108a541 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -171,8 +171,6 @@ struct bio {
#define BIO_RW_FAILFAST_TRANSPORT 8
#define BIO_RW_FAILFAST_DRIVER 9
-#define BIO_RW_SYNC (BIO_RW_SYNCIO | BIO_RW_UNPLUG)
-
#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag)))
/*
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 25379cba237..d960889e92e 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -15,6 +15,7 @@ enum blktrace_cat {
BLK_TC_WRITE = 1 << 1, /* writes */
BLK_TC_BARRIER = 1 << 2, /* barrier */
BLK_TC_SYNC = 1 << 3, /* sync IO */
+ BLK_TC_SYNCIO = BLK_TC_SYNC,
BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
BLK_TC_REQUEUE = 1 << 5, /* requeueing */
BLK_TC_ISSUE = 1 << 6, /* issue */
@@ -143,6 +144,9 @@ struct blk_user_trace_setup {
#ifdef __KERNEL__
#if defined(CONFIG_BLK_DEV_IO_TRACE)
+
+#include <linux/sysfs.h>
+
struct blk_trace {
int trace_state;
struct rchan *rchan;
@@ -193,6 +197,8 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
extern int blk_trace_startstop(struct request_queue *q, int start);
extern int blk_trace_remove(struct request_queue *q);
+extern struct attribute_group blk_trace_attr_group;
+
#else /* !CONFIG_BLK_DEV_IO_TRACE */
#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
#define blk_trace_shutdown(q) do { } while (0)
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3e68469c188..f0413845f20 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -121,6 +121,7 @@ struct dma_chan_percpu {
* @local: per-cpu pointer to a struct dma_chan_percpu
* @client-count: how many clients are using this channel
* @table_count: number of appearances in the mem-to-mem allocation table
+ * @private: private data for certain client-channel associations
*/
struct dma_chan {
struct dma_device *device;
@@ -134,6 +135,7 @@ struct dma_chan {
struct dma_chan_percpu *local;
int client_count;
int table_count;
+ void *private;
};
/**
diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index 6e199c8dfac..cca686b3912 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -1,7 +1,7 @@
/*
* include/linux/firmware-map.h:
* Copyright (C) 2008 SUSE LINUX Products GmbH
- * by Bernhard Walle <bwalle@suse.de>
+ * by Bernhard Walle <bernhard.walle@gmx.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License v2.0 as published by
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6022f44043f..92734c0012e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -54,24 +54,30 @@ struct inodes_stat_t {
#define MAY_ACCESS 16
#define MAY_OPEN 32
+/*
+ * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
+ * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
+ */
+
/* file is open for reading */
#define FMODE_READ ((__force fmode_t)1)
/* file is open for writing */
#define FMODE_WRITE ((__force fmode_t)2)
/* file is seekable */
#define FMODE_LSEEK ((__force fmode_t)4)
-/* file can be accessed using pread/pwrite */
+/* file can be accessed using pread */
#define FMODE_PREAD ((__force fmode_t)8)
-#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */
+/* file can be accessed using pwrite */
+#define FMODE_PWRITE ((__force fmode_t)16)
/* File is opened for execution with sys_execve / sys_uselib */
-#define FMODE_EXEC ((__force fmode_t)16)
+#define FMODE_EXEC ((__force fmode_t)32)
/* File is opened with O_NDELAY (only set for block devices) */
-#define FMODE_NDELAY ((__force fmode_t)32)
+#define FMODE_NDELAY ((__force fmode_t)64)
/* File is opened with O_EXCL (only set for block devices) */
-#define FMODE_EXCL ((__force fmode_t)64)
+#define FMODE_EXCL ((__force fmode_t)128)
/* File is opened using open(.., 3, ..) and is writeable only for ioctls
(specialy hack for floppy.c) */
-#define FMODE_WRITE_IOCTL ((__force fmode_t)128)
+#define FMODE_WRITE_IOCTL ((__force fmode_t)256)
/*
* Don't update ctime and mtime.
@@ -87,10 +93,10 @@ struct inodes_stat_t {
#define WRITE 1
#define READA 2 /* read-ahead - don't block if no resources */
#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */
-#define READ_SYNC (READ | (1 << BIO_RW_SYNC))
+#define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
#define READ_META (READ | (1 << BIO_RW_META))
-#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC))
-#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC))
+#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
+#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 677432b9cb7..9d224c43e63 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -95,10 +95,41 @@ stack_trace_sysctl(struct ctl_table *table, int write,
loff_t *ppos);
#endif
+struct ftrace_func_command {
+ struct list_head list;
+ char *name;
+ int (*func)(char *func, char *cmd,
+ char *params, int enable);
+};
+
#ifdef CONFIG_DYNAMIC_FTRACE
/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */
#include <asm/ftrace.h>
+struct seq_file;
+
+struct ftrace_probe_ops {
+ void (*func)(unsigned long ip,
+ unsigned long parent_ip,
+ void **data);
+ int (*callback)(unsigned long ip, void **data);
+ void (*free)(void **data);
+ int (*print)(struct seq_file *m,
+ unsigned long ip,
+ struct ftrace_probe_ops *ops,
+ void *data);
+};
+
+extern int
+register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
+ void *data);
+extern void
+unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
+ void *data);
+extern void
+unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops);
+extern void unregister_ftrace_function_probe_all(char *glob);
+
enum {
FTRACE_FL_FREE = (1 << 0),
FTRACE_FL_FAILED = (1 << 1),
@@ -119,6 +150,9 @@ struct dyn_ftrace {
int ftrace_force_update(void);
void ftrace_set_filter(unsigned char *buf, int len, int reset);
+int register_ftrace_command(struct ftrace_func_command *cmd);
+int unregister_ftrace_command(struct ftrace_func_command *cmd);
+
/* defined in arch */
extern int ftrace_ip_converted(unsigned long ip);
extern int ftrace_dyn_arch_init(void *data);
@@ -126,6 +160,10 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func);
extern void ftrace_caller(void);
extern void ftrace_call(void);
extern void mcount_call(void);
+
+#ifndef FTRACE_ADDR
+#define FTRACE_ADDR ((unsigned long)ftrace_caller)
+#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
extern void ftrace_graph_caller(void);
extern int ftrace_enable_ftrace_graph_caller(void);
@@ -136,7 +174,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
#endif
/**
- * ftrace_make_nop - convert code into top
+ * ftrace_make_nop - convert code into nop
* @mod: module structure if called by module load initialization
* @rec: the mcount call site record
* @addr: the address that the call site should be calling
@@ -198,6 +236,14 @@ extern void ftrace_enable_daemon(void);
# define ftrace_disable_daemon() do { } while (0)
# define ftrace_enable_daemon() do { } while (0)
static inline void ftrace_release(void *start, unsigned long size) { }
+static inline int register_ftrace_command(struct ftrace_func_command *cmd)
+{
+ return -EINVAL;
+}
+static inline int unregister_ftrace_command(char *cmd_name)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_DYNAMIC_FTRACE */
/* totally disable ftrace - can not re-enable after this */
@@ -298,6 +344,9 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
extern int
__ftrace_printk(unsigned long ip, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3)));
+# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap)
+extern int
+__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
extern void ftrace_dump(void);
#else
static inline void
@@ -313,6 +362,11 @@ ftrace_printk(const char *fmt, ...)
{
return 0;
}
+static inline int
+ftrace_vprintk(const char *fmt, va_list ap)
+{
+ return 0;
+}
static inline void ftrace_dump(void) { }
#endif
@@ -327,36 +381,6 @@ ftrace_init_module(struct module *mod,
unsigned long *start, unsigned long *end) { }
#endif
-enum {
- POWER_NONE = 0,
- POWER_CSTATE = 1,
- POWER_PSTATE = 2,
-};
-
-struct power_trace {
-#ifdef CONFIG_POWER_TRACER
- ktime_t stamp;
- ktime_t end;
- int type;
- int state;
-#endif
-};
-
-#ifdef CONFIG_POWER_TRACER
-extern void trace_power_start(struct power_trace *it, unsigned int type,
- unsigned int state);
-extern void trace_power_mark(struct power_trace *it, unsigned int type,
- unsigned int state);
-extern void trace_power_end(struct power_trace *it);
-#else
-static inline void trace_power_start(struct power_trace *it, unsigned int type,
- unsigned int state) { }
-static inline void trace_power_mark(struct power_trace *it, unsigned int type,
- unsigned int state) { }
-static inline void trace_power_end(struct power_trace *it) { }
-#endif
-
-
/*
* Structure that defines an entry function trace.
*/
@@ -492,4 +516,17 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
#endif /* CONFIG_TRACING */
+
+#ifdef CONFIG_HW_BRANCH_TRACER
+
+void trace_hw_branch(u64 from, u64 to);
+void trace_hw_branch_oops(void);
+
+#else /* CONFIG_HW_BRANCH_TRACER */
+
+static inline void trace_hw_branch(u64 from, u64 to) {}
+static inline void trace_hw_branch_oops(void) {}
+
+#endif /* CONFIG_HW_BRANCH_TRACER */
+
#endif /* _LINUX_FTRACE_H */
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
index 366a054d0b0..dca7bf8cffe 100644
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -2,7 +2,7 @@
#define _LINUX_FTRACE_IRQ_H
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
+#ifdef CONFIG_FTRACE_NMI_ENTER
extern void ftrace_nmi_enter(void);
extern void ftrace_nmi_exit(void);
#else
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f83288347dd..faa1cf848bc 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -15,55 +15,61 @@
* - bits 0-7 are the preemption count (max preemption depth: 256)
* - bits 8-15 are the softirq count (max # of softirqs: 256)
*
- * The hardirq count can be overridden per architecture, the default is:
+ * The hardirq count can in theory reach the same as NR_IRQS.
+ * In reality, the number of nested IRQS is limited to the stack
+ * size as well. For archs with over 1000 IRQS it is not practical
+ * to expect that they will all nest. We give a max of 10 bits for
+ * hardirq nesting. An arch may choose to give less than 10 bits.
+ * m68k expects it to be 8.
*
- * - bits 16-27 are the hardirq count (max # of hardirqs: 4096)
- * - ( bit 28 is the PREEMPT_ACTIVE flag. )
+ * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
+ * - bit 26 is the NMI_MASK
+ * - bit 28 is the PREEMPT_ACTIVE flag
*
* PREEMPT_MASK: 0x000000ff
* SOFTIRQ_MASK: 0x0000ff00
- * HARDIRQ_MASK: 0x0fff0000
+ * HARDIRQ_MASK: 0x03ff0000
+ * NMI_MASK: 0x04000000
*/
#define PREEMPT_BITS 8
#define SOFTIRQ_BITS 8
+#define NMI_BITS 1
-#ifndef HARDIRQ_BITS
-#define HARDIRQ_BITS 12
+#define MAX_HARDIRQ_BITS 10
-#ifndef MAX_HARDIRQS_PER_CPU
-#define MAX_HARDIRQS_PER_CPU NR_IRQS
+#ifndef HARDIRQ_BITS
+# define HARDIRQ_BITS MAX_HARDIRQ_BITS
#endif
-/*
- * The hardirq mask has to be large enough to have space for potentially
- * all IRQ sources in the system nesting on a single CPU.
- */
-#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU
-# error HARDIRQ_BITS is too low!
-#endif
+#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
+#error HARDIRQ_BITS too high!
#endif
#define PREEMPT_SHIFT 0
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
#define __IRQ_MASK(x) ((1UL << (x))-1)
#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
+#define NMI_OFFSET (1UL << NMI_SHIFT)
-#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS))
+#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
#error PREEMPT_ACTIVE is too low!
#endif
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
-#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
+#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+ | NMI_MASK))
/*
* Are we doing bottom half or hardware interrupt processing?
@@ -73,6 +79,11 @@
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
+/*
+ * Are we in NMI context?
+ */
+#define in_nmi() (preempt_count() & NMI_MASK)
+
#if defined(CONFIG_PREEMPT)
# define PREEMPT_INATOMIC_BASE kernel_locked()
# define PREEMPT_CHECK_OFFSET 1
@@ -164,20 +175,24 @@ extern void irq_enter(void);
*/
extern void irq_exit(void);
-#define nmi_enter() \
- do { \
- ftrace_nmi_enter(); \
- lockdep_off(); \
- rcu_nmi_enter(); \
- __irq_enter(); \
+#define nmi_enter() \
+ do { \
+ ftrace_nmi_enter(); \
+ BUG_ON(in_nmi()); \
+ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
+ lockdep_off(); \
+ rcu_nmi_enter(); \
+ trace_hardirq_enter(); \
} while (0)
-#define nmi_exit() \
- do { \
- __irq_exit(); \
- rcu_nmi_exit(); \
- lockdep_on(); \
- ftrace_nmi_exit(); \
+#define nmi_exit() \
+ do { \
+ trace_hardirq_exit(); \
+ rcu_nmi_exit(); \
+ lockdep_on(); \
+ BUG_ON(!in_nmi()); \
+ sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
+ ftrace_nmi_exit(); \
} while (0)
#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index b28b37eb11c..4d248b3f132 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1150,7 +1150,8 @@ extern int jbd2_journal_clear_err (journal_t *);
extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
extern int jbd2_journal_force_commit(journal_t *);
extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
-extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
+extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
+ struct jbd2_inode *inode, loff_t new_size);
extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 5715f190760..0424326f167 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -58,10 +58,10 @@ struct kvm_irqchip {
__u32 pad;
union {
char dummy[512]; /* reserving space */
-#ifdef CONFIG_X86
+#ifdef __KVM_HAVE_PIT
struct kvm_pic_state pic;
#endif
-#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_IOAPIC
struct kvm_ioapic_state ioapic;
#endif
} chip;
@@ -384,16 +384,16 @@ struct kvm_trace_rec {
#define KVM_CAP_MP_STATE 14
#define KVM_CAP_COALESCED_MMIO 15
#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
-#if defined(CONFIG_X86)||defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
#define KVM_CAP_DEVICE_ASSIGNMENT 17
#endif
#define KVM_CAP_IOMMU 18
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_MSI
#define KVM_CAP_DEVICE_MSI 20
#endif
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_USER_NMI
#define KVM_CAP_USER_NMI 22
#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec49d0be7f5..bf6f703642f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,7 @@ void kvm_free_physmem(struct kvm *kvm);
struct kvm *kvm_arch_create_vm(void);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);
+void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 323561582c1..065cdf8c09f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1041,10 +1041,23 @@ extern void free_bootmem_with_active_regions(int nid,
typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
extern void sparse_memory_present_with_active_regions(int nid);
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-extern int early_pfn_to_nid(unsigned long pfn);
-#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
+#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \
+ !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
+static inline int __early_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+#else
+/* please see mm/page_alloc.c */
+extern int __meminit early_pfn_to_nid(unsigned long pfn);
+#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+/* there is a per-arch backend function. */
+extern int __meminit __early_pfn_to_nid(unsigned long pfn);
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+#endif
+
extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_zone(unsigned long, int, unsigned long,
unsigned long, enum memmap_context);
@@ -1159,6 +1172,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
/* mm/page-writeback.c */
int write_one_page(struct page *page, int wait);
+void task_dirty_inc(struct task_struct *tsk);
/* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
@@ -1304,5 +1318,6 @@ void vmemmap_populate_print_last(void);
extern void *alloc_locked_buffer(size_t size);
extern void free_locked_buffer(void *buffer, size_t size);
+extern void release_locked_buffer(void *buffer, size_t size);
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 09c14e213b6..1aca6cebbb7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1071,7 +1071,7 @@ void sparse_init(void);
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_NODES_SPAN_OTHER_NODES
-#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid))
+bool early_pfn_in_nid(unsigned long pfn, int nid);
#else
#define early_pfn_in_nid(pfn, nid) (1)
#endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 52a9fe08451..114b8192eab 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1312,6 +1312,7 @@
#define PCI_DEVICE_ID_VIA_VT3351 0x0351
#define PCI_DEVICE_ID_VIA_VT3364 0x0364
#define PCI_DEVICE_ID_VIA_8371_0 0x0391
+#define PCI_DEVICE_ID_VIA_6415 0x0415
#define PCI_DEVICE_ID_VIA_8501_0 0x0501
#define PCI_DEVICE_ID_VIA_82C561 0x0561
#define PCI_DEVICE_ID_VIA_82C586_1 0x0571
@@ -1444,6 +1445,7 @@
#define PCI_DEVICE_ID_DIGI_DF_M_E 0x0071
#define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A 0x0072
#define PCI_DEVICE_ID_DIGI_DF_M_A 0x0073
+#define PCI_DEVICE_ID_DIGI_NEO_8 0x00B1
#define PCI_DEVICE_ID_NEO_2DB9 0x00C8
#define PCI_DEVICE_ID_NEO_2DB9PRI 0x00C9
#define PCI_DEVICE_ID_NEO_2RJ45 0x00CA
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index b3b35966008..f5e793d69bd 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -8,7 +8,7 @@ struct ring_buffer;
struct ring_buffer_iter;
/*
- * Don't reference this struct directly, use functions below.
+ * Don't refer to this struct directly, use functions below.
*/
struct ring_buffer_event {
u32 type:2, len:3, time_delta:27;
@@ -74,13 +74,10 @@ void ring_buffer_free(struct ring_buffer *buffer);
int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
-struct ring_buffer_event *
-ring_buffer_lock_reserve(struct ring_buffer *buffer,
- unsigned long length,
- unsigned long *flags);
+struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer,
+ unsigned long length);
int ring_buffer_unlock_commit(struct ring_buffer *buffer,
- struct ring_buffer_event *event,
- unsigned long flags);
+ struct ring_buffer_event *event);
int ring_buffer_write(struct ring_buffer *buffer,
unsigned long length, void *data);
@@ -124,9 +121,20 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
u64 ring_buffer_time_stamp(int cpu);
void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
+/*
+ * The below functions are fine to use outside the tracing facility.
+ */
+#ifdef CONFIG_RING_BUFFER
void tracing_on(void);
void tracing_off(void);
void tracing_off_permanent(void);
+int tracing_is_on(void);
+#else
+static inline void tracing_on(void) { }
+static inline void tracing_off(void) { }
+static inline void tracing_off_permanent(void) { }
+static inline int tracing_is_on(void) { return 0; }
+#endif
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8981e52c714..426666dd820 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -137,6 +137,8 @@ extern unsigned long nr_uninterruptible(void);
extern unsigned long nr_active(void);
extern unsigned long nr_iowait(void);
+extern unsigned long get_parent_ip(unsigned long addr);
+
struct seq_file;
struct cfs_rq;
struct task_group;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 40ea5058c2e..f616f31576d 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -19,6 +19,7 @@ struct seq_file {
size_t from;
size_t count;
loff_t index;
+ loff_t read_pos;
u64 version;
struct mutex lock;
const struct seq_operations *op;
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 6ca6a7b66d7..f4523651fa4 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,6 +14,7 @@
#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
#include <linux/compiler.h>
+#include <trace/kmemtrace.h>
/* Size description struct for general caches. */
struct cache_sizes {
@@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[];
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);
-static inline void *kmalloc(size_t size, gfp_t flags)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
+extern size_t slab_buffer_size(struct kmem_cache *cachep);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
{
+ return kmem_cache_alloc(cachep, flags);
+}
+static inline size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+ return 0;
+}
+#endif
+
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
+{
+ struct kmem_cache *cachep;
+ void *ret;
+
if (__builtin_constant_p(size)) {
int i = 0;
@@ -47,10 +66,17 @@ static inline void *kmalloc(size_t size, gfp_t flags)
found:
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
- return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep,
- flags);
+ cachep = malloc_sizes[i].cs_dmacachep;
+ else
#endif
- return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags);
+ cachep = malloc_sizes[i].cs_cachep;
+
+ ret = kmem_cache_alloc_notrace(cachep, flags);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+ size, slab_buffer_size(cachep), flags);
+
+ return ret;
}
return __kmalloc(size, flags);
}
@@ -59,8 +85,25 @@ found:
extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid)
+{
+ return kmem_cache_alloc_node(cachep, flags, nodeid);
+}
+#endif
+
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
+ struct kmem_cache *cachep;
+ void *ret;
+
if (__builtin_constant_p(size)) {
int i = 0;
@@ -78,11 +121,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
found:
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
- return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep,
- flags, node);
+ cachep = malloc_sizes[i].cs_dmacachep;
+ else
#endif
- return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep,
- flags, node);
+ cachep = malloc_sizes[i].cs_cachep;
+
+ ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_,
+ ret, size, slab_buffer_size(cachep),
+ flags, node);
+
+ return ret;
}
return __kmalloc_node(size, flags, node);
}
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 59a3fa476ab..0ec00b39d00 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -3,14 +3,15 @@
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags)
{
return kmem_cache_alloc_node(cachep, flags, -1);
}
void *__kmalloc_node(size_t size, gfp_t flags, int node);
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
return __kmalloc_node(size, flags, node);
}
@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* kmalloc is the normal method of allocating memory
* in the kernel.
*/
-static inline void *kmalloc(size_t size, gfp_t flags)
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
return __kmalloc_node(size, flags, -1);
}
-static inline void *__kmalloc(size_t size, gfp_t flags)
+static __always_inline void *__kmalloc(size_t size, gfp_t flags)
{
return kmalloc(size, flags);
}
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b1aac..6b657f7dcb2 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
#include <linux/gfp.h>
#include <linux/workqueue.h>
#include <linux/kobject.h>
+#include <trace/kmemtrace.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
@@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return kmem_cache_alloc(s, gfpflags);
+}
+#endif
+
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
{
- return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+ unsigned int order = get_order(size);
+ void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+ size, PAGE_SIZE << order, flags);
+
+ return ret;
}
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
+ void *ret;
+
if (__builtin_constant_p(size)) {
if (size > PAGE_SIZE)
return kmalloc_large(size, flags);
@@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
if (!s)
return ZERO_SIZE_PTR;
- return kmem_cache_alloc(s, flags);
+ ret = kmem_cache_alloc_notrace(s, flags);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+ _THIS_IP_, ret,
+ size, s->size, flags);
+
+ return ret;
}
}
return __kmalloc(size, flags);
@@ -231,8 +256,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
void *__kmalloc_node(size_t size, gfp_t flags, int node);
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node)
+{
+ return kmem_cache_alloc_node(s, gfpflags, node);
+}
+#endif
+
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
+ void *ret;
+
if (__builtin_constant_p(size) &&
size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
struct kmem_cache *s = kmalloc_slab(size);
@@ -240,7 +281,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
if (!s)
return ZERO_SIZE_PTR;
- return kmem_cache_alloc_node(s, flags, node);
+ ret = kmem_cache_alloc_node_notrace(s, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _THIS_IP_, ret,
+ size, s->size, flags, node);
+
+ return ret;
}
return __kmalloc_node(size, flags, node);
}
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index 86cb0501d3e..2d0792983f8 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -11,13 +11,21 @@
/* For O_CLOEXEC and O_NONBLOCK */
#include <linux/fcntl.h>
-/* Flags for timerfd_settime. */
+/*
+ * CAREFUL: Check include/asm-generic/fcntl.h when defining
+ * new flags, since they might collide with O_* ones. We want
+ * to re-use O_* flags that couldn't possibly have a meaning
+ * from eventfd, in order to leave a free define-space for
+ * shared O_* flags.
+ */
#define TFD_TIMER_ABSTIME (1 << 0)
-
-/* Flags for timerfd_create. */
#define TFD_CLOEXEC O_CLOEXEC
#define TFD_NONBLOCK O_NONBLOCK
+#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
+/* Flags for timerfd_create. */
+#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
+/* Flags for timerfd_settime. */
+#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME
#endif /* _LINUX_TIMERFD_H */
-
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 506e7620a98..9c0890c7a06 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -84,6 +84,10 @@ extern struct vm_struct *get_vm_area_caller(unsigned long size,
unsigned long flags, void *caller);
extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end);
+extern struct vm_struct *__get_vm_area_caller(unsigned long size,
+ unsigned long flags,
+ unsigned long start, unsigned long end,
+ void *caller);
extern struct vm_struct *get_vm_area_node(unsigned long size,
unsigned long flags, int node,
gfp_t gfp_mask);
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
new file mode 100644
index 00000000000..ad8b7857855
--- /dev/null
+++ b/include/trace/kmemtrace.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/marker.h>
+
+enum kmemtrace_type_id {
+ KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
+ KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
+ KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
+};
+
+#ifdef CONFIG_KMEMTRACE
+
+extern void kmemtrace_init(void);
+
+extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node);
+
+extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr);
+
+#else /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_init(void)
+{
+}
+
+static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+}
+
+static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr)
+{
+}
+
+#endif /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags)
+{
+ kmemtrace_mark_alloc_node(type_id, call_site, ptr,
+ bytes_req, bytes_alloc, gfp_flags, -1);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/include/trace/power.h b/include/trace/power.h
new file mode 100644
index 00000000000..2c733e58e89
--- /dev/null
+++ b/include/trace/power.h
@@ -0,0 +1,34 @@
+#ifndef _TRACE_POWER_H
+#define _TRACE_POWER_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+
+enum {
+ POWER_NONE = 0,
+ POWER_CSTATE = 1,
+ POWER_PSTATE = 2,
+};
+
+struct power_trace {
+#ifdef CONFIG_POWER_TRACER
+ ktime_t stamp;
+ ktime_t end;
+ int type;
+ int state;
+#endif
+};
+
+DECLARE_TRACE(power_start,
+ TPPROTO(struct power_trace *it, unsigned int type, unsigned int state),
+ TPARGS(it, type, state));
+
+DECLARE_TRACE(power_mark,
+ TPPROTO(struct power_trace *it, unsigned int type, unsigned int state),
+ TPARGS(it, type, state));
+
+DECLARE_TRACE(power_end,
+ TPPROTO(struct power_trace *it),
+ TPARGS(it));
+
+#endif /* _TRACE_POWER_H */
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
new file mode 100644
index 00000000000..867829df457
--- /dev/null
+++ b/include/trace/workqueue.h
@@ -0,0 +1,25 @@
+#ifndef __TRACE_WORKQUEUE_H
+#define __TRACE_WORKQUEUE_H
+
+#include <linux/tracepoint.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+
+DECLARE_TRACE(workqueue_insertion,
+ TPPROTO(struct task_struct *wq_thread, struct work_struct *work),
+ TPARGS(wq_thread, work));
+
+DECLARE_TRACE(workqueue_execution,
+ TPPROTO(struct task_struct *wq_thread, struct work_struct *work),
+ TPARGS(wq_thread, work));
+
+/* Trace the creation of one workqueue thread on a cpu */
+DECLARE_TRACE(workqueue_creation,
+ TPPROTO(struct task_struct *wq_thread, int cpu),
+ TPARGS(wq_thread, cpu));
+
+DECLARE_TRACE(workqueue_destruction,
+ TPPROTO(struct task_struct *wq_thread),
+ TPARGS(wq_thread));
+
+#endif /* __TRACE_WORKQUEUE_H */
diff --git a/init/main.c b/init/main.c
index 844209453c0..db7974ff7a0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
+#include <trace/kmemtrace.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
@@ -641,6 +642,7 @@ asmlinkage void __init start_kernel(void)
enable_debug_pagealloc();
cpu_hotplug_init();
kmem_cache_init();
+ kmemtrace_init();
debug_objects_mem_init();
idr_init_cache();
setup_per_cpu_pageset();
diff --git a/kernel/Makefile b/kernel/Makefile
index 170a9213c1b..e4791b3ba55 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_PM) += power/
+obj-$(CONFIG_FREEZER) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e14db9c089b..9edb5c4b79b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1122,8 +1122,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
mutex_unlock(&cgroup_mutex);
- kfree(root);
kill_litter_super(sb);
+ kfree(root);
}
static struct file_system_type cgroup_fs_type = {
diff --git a/kernel/extable.c b/kernel/extable.c
index e136ed8d82b..0df6253730b 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -41,7 +41,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
return e;
}
-__notrace_funcgraph int core_kernel_text(unsigned long addr)
+int core_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_stext &&
addr <= (unsigned long)_etext)
@@ -54,7 +54,7 @@ __notrace_funcgraph int core_kernel_text(unsigned long addr)
return 0;
}
-__notrace_funcgraph int __kernel_text_address(unsigned long addr)
+int __kernel_text_address(unsigned long addr)
{
if (core_kernel_text(addr))
return 1;
diff --git a/kernel/futex.c b/kernel/futex.c
index f89d373a9c6..438701adce2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1165,6 +1165,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
{
struct task_struct *curr = current;
+ struct restart_block *restart;
DECLARE_WAITQUEUE(wait, curr);
struct futex_hash_bucket *hb;
struct futex_q q;
@@ -1216,11 +1217,13 @@ retry:
if (!ret)
goto retry;
- return ret;
+ goto out;
}
ret = -EWOULDBLOCK;
- if (uval != val)
- goto out_unlock_put_key;
+ if (unlikely(uval != val)) {
+ queue_unlock(&q, hb);
+ goto out_put_key;
+ }
/* Only actually queue if *uaddr contained val. */
queue_me(&q, hb);
@@ -1284,38 +1287,38 @@ retry:
*/
/* If we were woken (and unqueued), we succeeded, whatever. */
+ ret = 0;
if (!unqueue_me(&q))
- return 0;
+ goto out_put_key;
+ ret = -ETIMEDOUT;
if (rem)
- return -ETIMEDOUT;
+ goto out_put_key;
/*
* We expect signal_pending(current), but another thread may
* have handled it for us already.
*/
+ ret = -ERESTARTSYS;
if (!abs_time)
- return -ERESTARTSYS;
- else {
- struct restart_block *restart;
- restart = &current_thread_info()->restart_block;
- restart->fn = futex_wait_restart;
- restart->futex.uaddr = (u32 *)uaddr;
- restart->futex.val = val;
- restart->futex.time = abs_time->tv64;
- restart->futex.bitset = bitset;
- restart->futex.flags = 0;
-
- if (fshared)
- restart->futex.flags |= FLAGS_SHARED;
- if (clockrt)
- restart->futex.flags |= FLAGS_CLOCKRT;
- return -ERESTART_RESTARTBLOCK;
- }
+ goto out_put_key;
-out_unlock_put_key:
- queue_unlock(&q, hb);
- put_futex_key(fshared, &q.key);
+ restart = &current_thread_info()->restart_block;
+ restart->fn = futex_wait_restart;
+ restart->futex.uaddr = (u32 *)uaddr;
+ restart->futex.val = val;
+ restart->futex.time = abs_time->tv64;
+ restart->futex.bitset = bitset;
+ restart->futex.flags = 0;
+
+ if (fshared)
+ restart->futex.flags |= FLAGS_SHARED;
+ if (clockrt)
+ restart->futex.flags |= FLAGS_CLOCKRT;
+ ret = -ERESTART_RESTARTBLOCK;
+
+out_put_key:
+ put_futex_key(fshared, &q.key);
out:
return ret;
}
diff --git a/kernel/module.c b/kernel/module.c
index ba22484a987..22d7379709d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2735,7 +2735,7 @@ int is_module_address(unsigned long addr)
/* Is this a valid kernel address? */
-__notrace_funcgraph struct module *__module_text_address(unsigned long addr)
+struct module *__module_text_address(unsigned long addr)
{
struct module *mod;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 2313a4cc14e..e976e505648 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -681,6 +681,33 @@ static void cpu_timer_fire(struct k_itimer *timer)
}
/*
+ * Sample a process (thread group) timer for the given group_leader task.
+ * Must be called with tasklist_lock held for reading.
+ */
+static int cpu_timer_sample_group(const clockid_t which_clock,
+ struct task_struct *p,
+ union cpu_time_count *cpu)
+{
+ struct task_cputime cputime;
+
+ thread_group_cputimer(p, &cputime);
+ switch (CPUCLOCK_WHICH(which_clock)) {
+ default:
+ return -EINVAL;
+ case CPUCLOCK_PROF:
+ cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+ break;
+ case CPUCLOCK_VIRT:
+ cpu->cpu = cputime.utime;
+ break;
+ case CPUCLOCK_SCHED:
+ cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+ break;
+ }
+ return 0;
+}
+
+/*
* Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled.
* If we return TIMER_RETRY, it's necessary to release the timer's lock
@@ -741,7 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
cpu_clock_sample(timer->it_clock, p, &val);
} else {
- cpu_clock_sample_group(timer->it_clock, p, &val);
+ cpu_timer_sample_group(timer->it_clock, p, &val);
}
if (old) {
@@ -889,7 +916,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
read_unlock(&tasklist_lock);
goto dead;
} else {
- cpu_clock_sample_group(timer->it_clock, p, &now);
+ cpu_timer_sample_group(timer->it_clock, p, &now);
clear_dead = (unlikely(p->exit_state) &&
thread_group_empty(p));
}
@@ -1244,7 +1271,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
clear_dead_task(timer, now);
goto out_unlock;
}
- cpu_clock_sample_group(timer->it_clock, p, &now);
+ cpu_timer_sample_group(timer->it_clock, p, &now);
bump_cpu_timer(timer, now);
/* Leave the tasklist_lock locked for the call below. */
}
@@ -1409,33 +1436,6 @@ void run_posix_cpu_timers(struct task_struct *tsk)
}
/*
- * Sample a process (thread group) timer for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
- */
-static int cpu_timer_sample_group(const clockid_t which_clock,
- struct task_struct *p,
- union cpu_time_count *cpu)
-{
- struct task_cputime cputime;
-
- thread_group_cputimer(p, &cputime);
- switch (CPUCLOCK_WHICH(which_clock)) {
- default:
- return -EINVAL;
- case CPUCLOCK_PROF:
- cpu->cpu = cputime_add(cputime.utime, cputime.stime);
- break;
- case CPUCLOCK_VIRT:
- cpu->cpu = cputime.utime;
- break;
- case CPUCLOCK_SCHED:
- cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
- break;
- }
- return 0;
-}
-
-/*
* Set one of the process-wide special case CPU timers.
* The tsk->sighand->siglock must be held by the caller.
* The *newval argument is relative and we update it to be absolute, *oldval
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index d7a10167a25..720ea4f781b 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
endif
-obj-y := main.o
+obj-$(CONFIG_PM) += main.o
obj-$(CONFIG_PM_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 6da14358537..505f319e489 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -60,6 +60,7 @@ static struct block_device *resume_bdev;
static int submit(int rw, pgoff_t page_off, struct page *page,
struct bio **bio_chain)
{
+ const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
struct bio *bio;
bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
@@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
bio_get(bio);
if (bio_chain == NULL) {
- submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+ submit_bio(bio_rw, bio);
wait_on_page_locked(page);
if (rw == READ)
bio_set_pages_dirty(bio);
@@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
get_page(page); /* These pages are freed later */
bio->bi_private = *bio_chain;
*bio_chain = bio;
- submit_bio(rw | (1 << BIO_RW_SYNC), bio);
+ submit_bio(bio_rw, bio);
}
return 0;
}
diff --git a/kernel/relay.c b/kernel/relay.c
index 9d79b7854fa..edc0ba6d816 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -677,9 +677,7 @@ int relay_late_setup_files(struct rchan *chan,
*/
for_each_online_cpu(i) {
if (unlikely(!chan->buf[i])) {
- printk(KERN_ERR "relay_late_setup_files: CPU %u "
- "has no buffer, it must have!\n", i);
- BUG();
+ WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
err = -EINVAL;
break;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index c1d0ed36008..2492b9b3fed 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4404,10 +4404,7 @@ void scheduler_tick(void)
#endif
}
-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
- defined(CONFIG_PREEMPT_TRACER))
-
-static inline unsigned long get_parent_ip(unsigned long addr)
+unsigned long get_parent_ip(unsigned long addr)
{
if (in_lock_functions(addr)) {
addr = CALLER_ADDR2;
@@ -4417,6 +4414,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
return addr;
}
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+ defined(CONFIG_PREEMPT_TRACER))
+
void __kprobes add_preempt_count(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
@@ -6944,20 +6944,26 @@ static void free_rootdomain(struct root_domain *rd)
static void rq_attach_root(struct rq *rq, struct root_domain *rd)
{
+ struct root_domain *old_rd = NULL;
unsigned long flags;
spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
- struct root_domain *old_rd = rq->rd;
+ old_rd = rq->rd;
if (cpumask_test_cpu(rq->cpu, old_rd->online))
set_rq_offline(rq);
cpumask_clear_cpu(rq->cpu, old_rd->span);
- if (atomic_dec_and_test(&old_rd->refcount))
- free_rootdomain(old_rd);
+ /*
+ * If we dont want to free the old_rt yet then
+ * set old_rd to NULL to skip the freeing later
+ * in this function:
+ */
+ if (!atomic_dec_and_test(&old_rd->refcount))
+ old_rd = NULL;
}
atomic_inc(&rd->refcount);
@@ -6968,6 +6974,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
set_rq_online(rq);
spin_unlock_irqrestore(&rq->lock, flags);
+
+ if (old_rd)
+ free_rootdomain(old_rd);
}
static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index bdbe9de9cd8..6edfc2c11d9 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -21,6 +21,7 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/rcupdate.h>
+#include <linux/ftrace.h>
#include <linux/smp.h>
#include <linux/tick.h>
@@ -79,13 +80,23 @@ static void __local_bh_disable(unsigned long ip)
WARN_ON_ONCE(in_irq());
raw_local_irq_save(flags);
- add_preempt_count(SOFTIRQ_OFFSET);
+ /*
+ * The preempt tracer hooks into add_preempt_count and will break
+ * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
+ * is set and before current->softirq_enabled is cleared.
+ * We must manually increment preempt_count here and manually
+ * call the trace_preempt_off later.
+ */
+ preempt_count() += SOFTIRQ_OFFSET;
/*
* Were softirqs turned off above:
*/
if (softirq_count() == SOFTIRQ_OFFSET)
trace_softirqs_off(ip);
raw_local_irq_restore(flags);
+
+ if (preempt_count() == SOFTIRQ_OFFSET)
+ trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
}
#else /* !CONFIG_TRACE_IRQFLAGS */
static inline void __local_bh_disable(unsigned long ip)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 34e707e5ab8..07877f4b523 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,9 @@ config USER_STACKTRACE_SUPPORT
config NOP_TRACER
bool
+config HAVE_FTRACE_NMI_ENTER
+ bool
+
config HAVE_FUNCTION_TRACER
bool
@@ -37,6 +40,11 @@ config TRACER_MAX_TRACE
config RING_BUFFER
bool
+config FTRACE_NMI_ENTER
+ bool
+ depends on HAVE_FTRACE_NMI_ENTER
+ default y
+
config TRACING
bool
select DEBUG_FS
@@ -127,6 +135,7 @@ config SYSPROF_TRACER
bool "Sysprof Tracer"
depends on X86
select TRACING
+ select CONTEXT_SWITCH_TRACER
help
This tracer provides the trace needed by the 'Sysprof' userspace
tool.
@@ -165,9 +174,8 @@ config BOOT_TRACER
representation of the delays during initcalls - but the raw
/debug/tracing/trace text output is readable too.
- ( Note that tracing self tests can't be enabled if this tracer is
- selected, because the self-tests are an initcall as well and that
- would invalidate the boot trace. )
+ You must pass in ftrace=initcall to the kernel command line
+ to enable this on bootup.
config TRACE_BRANCH_PROFILING
bool "Trace likely/unlikely profiler"
@@ -266,6 +274,62 @@ config HW_BRANCH_TRACER
This tracer records all branches on the system in a circular
buffer giving access to the last N branches for each cpu.
+config KMEMTRACE
+ bool "Trace SLAB allocations"
+ select TRACING
+ help
+ kmemtrace provides tracing for slab allocator functions, such as
+ kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+ data is then fed to the userspace application in order to analyse
+ allocation hotspots, internal fragmentation and so on, making it
+ possible to see how well an allocator performs, as well as debug
+ and profile kernel code.
+
+ This requires an userspace application to use. See
+ Documentation/vm/kmemtrace.txt for more information.
+
+ Saying Y will make the kernel somewhat larger and slower. However,
+ if you disable kmemtrace at run-time or boot-time, the performance
+ impact is minimal (depending on the arch the kernel is built for).
+
+ If unsure, say N.
+
+config WORKQUEUE_TRACER
+ bool "Trace workqueues"
+ select TRACING
+ help
+ The workqueue tracer provides some statistical informations
+ about each cpu workqueue thread such as the number of the
+ works inserted and executed since their creation. It can help
+ to evaluate the amount of work each of them have to perform.
+ For example it can help a developer to decide whether he should
+ choose a per cpu workqueue instead of a singlethreaded one.
+
+config BLK_DEV_IO_TRACE
+ bool "Support for tracing block io actions"
+ depends on SYSFS
+ depends on BLOCK
+ select RELAY
+ select DEBUG_FS
+ select TRACEPOINTS
+ select TRACING
+ select STACKTRACE
+ help
+ Say Y here if you want to be able to trace the block layer actions
+ on a given queue. Tracing allows you to see any traffic happening
+ on a block device queue. For more information (and the userspace
+ support tools needed), fetch the blktrace tools from:
+
+ git://git.kernel.dk/blktrace.git
+
+ Tracing also is possible using the ftrace interface, e.g.:
+
+ echo 1 > /sys/block/sda/sda1/trace/enable
+ echo blk > /sys/kernel/debug/tracing/current_tracer
+ cat /sys/kernel/debug/tracing/trace_pipe
+
+ If unsure, say N.
+
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FUNCTION_TRACER
@@ -296,7 +360,7 @@ config FTRACE_SELFTEST
config FTRACE_STARTUP_TEST
bool "Perform a startup test on ftrace"
- depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
+ depends on TRACING && DEBUG_KERNEL
select FTRACE_SELFTEST
help
This option performs a series of startup tests on ftrace. On bootup
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a93653..627090bc262 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,8 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_output.o
+obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
@@ -33,5 +35,8 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
obj-$(CONFIG_POWER_TRACER) += trace_power.o
+obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
+obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
+obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
libftrace-y := ftrace.o
diff --git a/block/blktrace.c b/kernel/trace/blktrace.c
index 39cc3bfe56e..e82cb9e930c 100644
--- a/block/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -24,10 +24,28 @@
#include <linux/debugfs.h>
#include <linux/time.h>
#include <trace/block.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include "trace_output.h"
static unsigned int blktrace_seq __read_mostly = 1;
+static struct trace_array *blk_tr;
+static int __read_mostly blk_tracer_enabled;
+
+/* Select an alternative, minimalistic output than the original one */
+#define TRACE_BLK_OPT_CLASSIC 0x1
+
+static struct tracer_opt blk_tracer_opts[] = {
+ /* Default disable the minimalistic output */
+ { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
+ { }
+};
+
+static struct tracer_flags blk_tracer_flags = {
+ .val = 0,
+ .opts = blk_tracer_opts,
+};
+
/* Global reference count of probes */
static DEFINE_MUTEX(blk_probe_mutex);
static atomic_t blk_probes_ref = ATOMIC_INIT(0);
@@ -43,6 +61,9 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
{
struct blk_io_trace *t;
+ if (!bt->rchan)
+ return;
+
t = relay_reserve(bt->rchan, sizeof(*t) + len);
if (t) {
const int cpu = smp_processor_id();
@@ -90,6 +111,16 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
unsigned long flags;
char *buf;
+ if (blk_tr) {
+ va_start(args, fmt);
+ ftrace_vprintk(fmt, args);
+ va_end(args);
+ return;
+ }
+
+ if (!bt->msg_data)
+ return;
+
local_irq_save(flags);
buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
va_start(args, fmt);
@@ -117,11 +148,12 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
/*
* Data direction bit lookup
*/
-static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
+static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ),
+ BLK_TC_ACT(BLK_TC_WRITE) };
/* The ilog2() calls fall out because they're constant */
-#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
- (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
+#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
+ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
/*
* The worker for the various blk_add_trace*() types. Fills out a
@@ -131,18 +163,20 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
int rw, u32 what, int error, int pdu_len, void *pdu_data)
{
struct task_struct *tsk = current;
+ struct ring_buffer_event *event = NULL;
struct blk_io_trace *t;
- unsigned long flags;
+ unsigned long flags = 0;
unsigned long *sequence;
pid_t pid;
- int cpu;
+ int cpu, pc = 0;
- if (unlikely(bt->trace_state != Blktrace_running))
+ if (unlikely(bt->trace_state != Blktrace_running ||
+ !blk_tracer_enabled))
return;
what |= ddir_act[rw & WRITE];
what |= MASK_TC_BIT(rw, BARRIER);
- what |= MASK_TC_BIT(rw, SYNC);
+ what |= MASK_TC_BIT(rw, SYNCIO);
what |= MASK_TC_BIT(rw, AHEAD);
what |= MASK_TC_BIT(rw, META);
what |= MASK_TC_BIT(rw, DISCARD);
@@ -150,6 +184,20 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
pid = tsk->pid;
if (unlikely(act_log_check(bt, what, sector, pid)))
return;
+ cpu = raw_smp_processor_id();
+
+ if (blk_tr) {
+ tracing_record_cmdline(current);
+
+ pc = preempt_count();
+ event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
+ sizeof(*t) + pdu_len,
+ 0, pc);
+ if (!event)
+ return;
+ t = ring_buffer_event_data(event);
+ goto record_it;
+ }
/*
* A word about the locking here - we disable interrupts to reserve
@@ -163,23 +211,35 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
if (t) {
- cpu = smp_processor_id();
sequence = per_cpu_ptr(bt->sequence, cpu);
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
t->sequence = ++(*sequence);
t->time = ktime_to_ns(ktime_get());
+record_it:
+ /*
+ * These two are not needed in ftrace as they are in the
+ * generic trace_entry, filled by tracing_generic_entry_update,
+ * but for the trace_event->bin() synthesizer benefit we do it
+ * here too.
+ */
+ t->cpu = cpu;
+ t->pid = pid;
+
t->sector = sector;
t->bytes = bytes;
t->action = what;
- t->pid = pid;
t->device = bt->dev;
- t->cpu = cpu;
t->error = error;
t->pdu_len = pdu_len;
if (pdu_len)
memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
+
+ if (blk_tr) {
+ trace_buffer_unlock_commit(blk_tr, event, 0, pc);
+ return;
+ }
}
local_irq_restore(flags);
@@ -385,7 +445,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
atomic_set(&bt->dropped, 0);
ret = -EIO;
- bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
+ bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
+ &blk_dropped_fops);
if (!bt->dropped_file)
goto err;
@@ -467,10 +528,10 @@ EXPORT_SYMBOL_GPL(blk_trace_setup);
int blk_trace_startstop(struct request_queue *q, int start)
{
- struct blk_trace *bt;
int ret;
+ struct blk_trace *bt = q->blk_trace;
- if ((bt = q->blk_trace) == NULL)
+ if (bt == NULL)
return -EINVAL;
/*
@@ -606,12 +667,14 @@ static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
}
-static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
+static void blk_add_trace_rq_requeue(struct request_queue *q,
+ struct request *rq)
{
blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
}
-static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
+static void blk_add_trace_rq_complete(struct request_queue *q,
+ struct request *rq)
{
blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
}
@@ -648,12 +711,14 @@ static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
}
-static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
+static void blk_add_trace_bio_backmerge(struct request_queue *q,
+ struct bio *bio)
{
blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
}
-static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
+static void blk_add_trace_bio_frontmerge(struct request_queue *q,
+ struct bio *bio)
{
blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
}
@@ -663,7 +728,8 @@ static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
}
-static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
+static void blk_add_trace_getrq(struct request_queue *q,
+ struct bio *bio, int rw)
{
if (bio)
blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
@@ -676,7 +742,8 @@ static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw
}
-static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
+static void blk_add_trace_sleeprq(struct request_queue *q,
+ struct bio *bio, int rw)
{
if (bio)
blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
@@ -684,7 +751,8 @@ static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int
struct blk_trace *bt = q->blk_trace;
if (bt)
- __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
+ __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
+ 0, 0, NULL);
}
}
@@ -858,3 +926,613 @@ static void blk_unregister_tracepoints(void)
tracepoint_synchronize_unregister();
}
+
+/*
+ * struct blk_io_tracer formatting routines
+ */
+
+static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
+{
+ int i = 0;
+
+ if (t->action & BLK_TC_DISCARD)
+ rwbs[i++] = 'D';
+ else if (t->action & BLK_TC_WRITE)
+ rwbs[i++] = 'W';
+ else if (t->bytes)
+ rwbs[i++] = 'R';
+ else
+ rwbs[i++] = 'N';
+
+ if (t->action & BLK_TC_AHEAD)
+ rwbs[i++] = 'A';
+ if (t->action & BLK_TC_BARRIER)
+ rwbs[i++] = 'B';
+ if (t->action & BLK_TC_SYNC)
+ rwbs[i++] = 'S';
+ if (t->action & BLK_TC_META)
+ rwbs[i++] = 'M';
+
+ rwbs[i] = '\0';
+}
+
+static inline
+const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
+{
+ return (const struct blk_io_trace *)ent;
+}
+
+static inline const void *pdu_start(const struct trace_entry *ent)
+{
+ return te_blk_io_trace(ent) + 1;
+}
+
+static inline u32 t_sec(const struct trace_entry *ent)
+{
+ return te_blk_io_trace(ent)->bytes >> 9;
+}
+
+static inline unsigned long long t_sector(const struct trace_entry *ent)
+{
+ return te_blk_io_trace(ent)->sector;
+}
+
+static inline __u16 t_error(const struct trace_entry *ent)
+{
+ return te_blk_io_trace(ent)->sector;
+}
+
+static __u64 get_pdu_int(const struct trace_entry *ent)
+{
+ const __u64 *val = pdu_start(ent);
+ return be64_to_cpu(*val);
+}
+
+static void get_pdu_remap(const struct trace_entry *ent,
+ struct blk_io_trace_remap *r)
+{
+ const struct blk_io_trace_remap *__r = pdu_start(ent);
+ __u64 sector = __r->sector;
+
+ r->device = be32_to_cpu(__r->device);
+ r->device_from = be32_to_cpu(__r->device_from);
+ r->sector = be64_to_cpu(sector);
+}
+
+static int blk_log_action_iter(struct trace_iterator *iter, const char *act)
+{
+ char rwbs[6];
+ unsigned long long ts = ns2usecs(iter->ts);
+ unsigned long usec_rem = do_div(ts, USEC_PER_SEC);
+ unsigned secs = (unsigned long)ts;
+ const struct trace_entry *ent = iter->ent;
+ const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
+
+ fill_rwbs(rwbs, t);
+
+ return trace_seq_printf(&iter->seq,
+ "%3d,%-3d %2d %5d.%06lu %5u %2s %3s ",
+ MAJOR(t->device), MINOR(t->device), iter->cpu,
+ secs, usec_rem, ent->pid, act, rwbs);
+}
+
+static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t,
+ const char *act)
+{
+ char rwbs[6];
+ fill_rwbs(rwbs, t);
+ return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
+ MAJOR(t->device), MINOR(t->device), act, rwbs);
+}
+
+static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
+{
+ const char *cmd = trace_find_cmdline(ent->pid);
+
+ if (t_sec(ent))
+ return trace_seq_printf(s, "%llu + %u [%s]\n",
+ t_sector(ent), t_sec(ent), cmd);
+ return trace_seq_printf(s, "[%s]\n", cmd);
+}
+
+static int blk_log_with_error(struct trace_seq *s,
+ const struct trace_entry *ent)
+{
+ if (t_sec(ent))
+ return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
+ t_sec(ent), t_error(ent));
+ return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
+}
+
+static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
+{
+ struct blk_io_trace_remap r = { .device = 0, };
+
+ get_pdu_remap(ent, &r);
+ return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
+ t_sector(ent),
+ t_sec(ent), MAJOR(r.device), MINOR(r.device),
+ (unsigned long long)r.sector);
+}
+
+static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
+{
+ return trace_seq_printf(s, "[%s]\n", trace_find_cmdline(ent->pid));
+}
+
+static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
+{
+ return trace_seq_printf(s, "[%s] %llu\n", trace_find_cmdline(ent->pid),
+ get_pdu_int(ent));
+}
+
+static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
+{
+ return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
+ get_pdu_int(ent), trace_find_cmdline(ent->pid));
+}
+
+/*
+ * struct tracer operations
+ */
+
+static void blk_tracer_print_header(struct seq_file *m)
+{
+ if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
+ return;
+ seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n"
+ "# | | | | | |\n");
+}
+
+static void blk_tracer_start(struct trace_array *tr)
+{
+ mutex_lock(&blk_probe_mutex);
+ if (atomic_add_return(1, &blk_probes_ref) == 1)
+ if (blk_register_tracepoints())
+ atomic_dec(&blk_probes_ref);
+ mutex_unlock(&blk_probe_mutex);
+ trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
+}
+
+static int blk_tracer_init(struct trace_array *tr)
+{
+ blk_tr = tr;
+ blk_tracer_start(tr);
+ mutex_lock(&blk_probe_mutex);
+ blk_tracer_enabled++;
+ mutex_unlock(&blk_probe_mutex);
+ return 0;
+}
+
+static void blk_tracer_stop(struct trace_array *tr)
+{
+ trace_flags |= TRACE_ITER_CONTEXT_INFO;
+ mutex_lock(&blk_probe_mutex);
+ if (atomic_dec_and_test(&blk_probes_ref))
+ blk_unregister_tracepoints();
+ mutex_unlock(&blk_probe_mutex);
+}
+
+static void blk_tracer_reset(struct trace_array *tr)
+{
+ if (!atomic_read(&blk_probes_ref))
+ return;
+
+ mutex_lock(&blk_probe_mutex);
+ blk_tracer_enabled--;
+ WARN_ON(blk_tracer_enabled < 0);
+ mutex_unlock(&blk_probe_mutex);
+
+ blk_tracer_stop(tr);
+}
+
+static struct {
+ const char *act[2];
+ int (*print)(struct trace_seq *s, const struct trace_entry *ent);
+} what2act[] __read_mostly = {
+ [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic },
+ [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic },
+ [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic },
+ [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic },
+ [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic },
+ [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error },
+ [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic },
+ [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
+ [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
+ [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
+ [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
+ [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
+ [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
+ [__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },
+ [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap },
+};
+
+static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
+ int flags)
+{
+ struct trace_seq *s = &iter->seq;
+ const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
+ const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
+ int ret;
+
+ if (!trace_print_context(iter))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
+ ret = trace_seq_printf(s, "Bad pc action %x\n", what);
+ else {
+ const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
+ ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
+ if (ret)
+ ret = what2act[what].print(s, iter->ent);
+ }
+
+ return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
+ const int offset = offsetof(struct blk_io_trace, sector);
+ struct blk_io_trace old = {
+ .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
+ .time = ns2usecs(iter->ts),
+ };
+
+ if (!trace_seq_putmem(s, &old, offset))
+ return 0;
+ return trace_seq_putmem(s, &t->sector,
+ sizeof(old) - offset + t->pdu_len);
+}
+
+static enum print_line_t
+blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
+{
+ return blk_trace_synthesize_old_trace(iter) ?
+ TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
+{
+ const struct blk_io_trace *t;
+ u16 what;
+ int ret;
+
+ if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
+ return TRACE_TYPE_UNHANDLED;
+
+ t = (const struct blk_io_trace *)iter->ent;
+ what = t->action & ((1 << BLK_TC_SHIFT) - 1);
+
+ if (unlikely(what == 0 || what > ARRAY_SIZE(what2act)))
+ ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
+ else {
+ const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
+ ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
+ if (ret)
+ ret = what2act[what].print(&iter->seq, iter->ent);
+ }
+
+ return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct tracer blk_tracer __read_mostly = {
+ .name = "blk",
+ .init = blk_tracer_init,
+ .reset = blk_tracer_reset,
+ .start = blk_tracer_start,
+ .stop = blk_tracer_stop,
+ .print_header = blk_tracer_print_header,
+ .print_line = blk_tracer_print_line,
+ .flags = &blk_tracer_flags,
+};
+
+static struct trace_event trace_blk_event = {
+ .type = TRACE_BLK,
+ .trace = blk_trace_event_print,
+ .latency_trace = blk_trace_event_print,
+ .binary = blk_trace_event_print_binary,
+};
+
+static int __init init_blk_tracer(void)
+{
+ if (!register_ftrace_event(&trace_blk_event)) {
+ pr_warning("Warning: could not register block events\n");
+ return 1;
+ }
+
+ if (register_tracer(&blk_tracer) != 0) {
+ pr_warning("Warning: could not register the block tracer\n");
+ unregister_ftrace_event(&trace_blk_event);
+ return 1;
+ }
+
+ return 0;
+}
+
+device_initcall(init_blk_tracer);
+
+static int blk_trace_remove_queue(struct request_queue *q)
+{
+ struct blk_trace *bt;
+
+ bt = xchg(&q->blk_trace, NULL);
+ if (bt == NULL)
+ return -EINVAL;
+
+ kfree(bt);
+ return 0;
+}
+
+/*
+ * Setup everything required to start tracing
+ */
+static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
+{
+ struct blk_trace *old_bt, *bt = NULL;
+ int ret;
+
+ ret = -ENOMEM;
+ bt = kzalloc(sizeof(*bt), GFP_KERNEL);
+ if (!bt)
+ goto err;
+
+ bt->dev = dev;
+ bt->act_mask = (u16)-1;
+ bt->end_lba = -1ULL;
+ bt->trace_state = Blktrace_running;
+
+ old_bt = xchg(&q->blk_trace, bt);
+ if (old_bt != NULL) {
+ (void)xchg(&q->blk_trace, old_bt);
+ kfree(bt);
+ ret = -EBUSY;
+ }
+ return 0;
+err:
+ return ret;
+}
+
+/*
+ * sysfs interface to enable and configure tracing
+ */
+
+static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ struct block_device *bdev;
+ ssize_t ret = -ENXIO;
+
+ lock_kernel();
+ bdev = bdget(part_devt(p));
+ if (bdev != NULL) {
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (q != NULL) {
+ mutex_lock(&bdev->bd_mutex);
+ ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ mutex_unlock(&bdev->bd_mutex);
+ }
+
+ bdput(bdev);
+ }
+
+ unlock_kernel();
+ return ret;
+}
+
+static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct block_device *bdev;
+ struct request_queue *q;
+ struct hd_struct *p;
+ int value;
+ ssize_t ret = -ENXIO;
+
+ if (count == 0 || sscanf(buf, "%d", &value) != 1)
+ goto out;
+
+ lock_kernel();
+ p = dev_to_part(dev);
+ bdev = bdget(part_devt(p));
+ if (bdev == NULL)
+ goto out_unlock_kernel;
+
+ q = bdev_get_queue(bdev);
+ if (q == NULL)
+ goto out_bdput;
+
+ mutex_lock(&bdev->bd_mutex);
+ if (value)
+ ret = blk_trace_setup_queue(q, bdev->bd_dev);
+ else
+ ret = blk_trace_remove_queue(q);
+ mutex_unlock(&bdev->bd_mutex);
+
+ if (ret == 0)
+ ret = count;
+out_bdput:
+ bdput(bdev);
+out_unlock_kernel:
+ unlock_kernel();
+out:
+ return ret;
+}
+
+static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count);
+#define BLK_TRACE_DEVICE_ATTR(_name) \
+ DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
+ sysfs_blk_trace_attr_show, \
+ sysfs_blk_trace_attr_store)
+
+static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
+ sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
+static BLK_TRACE_DEVICE_ATTR(act_mask);
+static BLK_TRACE_DEVICE_ATTR(pid);
+static BLK_TRACE_DEVICE_ATTR(start_lba);
+static BLK_TRACE_DEVICE_ATTR(end_lba);
+
+static struct attribute *blk_trace_attrs[] = {
+ &dev_attr_enable.attr,
+ &dev_attr_act_mask.attr,
+ &dev_attr_pid.attr,
+ &dev_attr_start_lba.attr,
+ &dev_attr_end_lba.attr,
+ NULL
+};
+
+struct attribute_group blk_trace_attr_group = {
+ .name = "trace",
+ .attrs = blk_trace_attrs,
+};
+
+static int blk_str2act_mask(const char *str)
+{
+ int mask = 0;
+ char *copy = kstrdup(str, GFP_KERNEL), *s;
+
+ if (copy == NULL)
+ return -ENOMEM;
+
+ s = strstrip(copy);
+
+ while (1) {
+ char *sep = strchr(s, ',');
+
+ if (sep != NULL)
+ *sep = '\0';
+
+ if (strcasecmp(s, "barrier") == 0)
+ mask |= BLK_TC_BARRIER;
+ else if (strcasecmp(s, "complete") == 0)
+ mask |= BLK_TC_COMPLETE;
+ else if (strcasecmp(s, "fs") == 0)
+ mask |= BLK_TC_FS;
+ else if (strcasecmp(s, "issue") == 0)
+ mask |= BLK_TC_ISSUE;
+ else if (strcasecmp(s, "pc") == 0)
+ mask |= BLK_TC_PC;
+ else if (strcasecmp(s, "queue") == 0)
+ mask |= BLK_TC_QUEUE;
+ else if (strcasecmp(s, "read") == 0)
+ mask |= BLK_TC_READ;
+ else if (strcasecmp(s, "requeue") == 0)
+ mask |= BLK_TC_REQUEUE;
+ else if (strcasecmp(s, "sync") == 0)
+ mask |= BLK_TC_SYNC;
+ else if (strcasecmp(s, "write") == 0)
+ mask |= BLK_TC_WRITE;
+
+ if (sep == NULL)
+ break;
+
+ s = sep + 1;
+ }
+ kfree(copy);
+
+ return mask;
+}
+
+static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ struct request_queue *q;
+ struct block_device *bdev;
+ ssize_t ret = -ENXIO;
+
+ lock_kernel();
+ bdev = bdget(part_devt(p));
+ if (bdev == NULL)
+ goto out_unlock_kernel;
+
+ q = bdev_get_queue(bdev);
+ if (q == NULL)
+ goto out_bdput;
+ mutex_lock(&bdev->bd_mutex);
+ if (q->blk_trace == NULL)
+ ret = sprintf(buf, "disabled\n");
+ else if (attr == &dev_attr_act_mask)
+ ret = sprintf(buf, "%#x\n", q->blk_trace->act_mask);
+ else if (attr == &dev_attr_pid)
+ ret = sprintf(buf, "%u\n", q->blk_trace->pid);
+ else if (attr == &dev_attr_start_lba)
+ ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
+ else if (attr == &dev_attr_end_lba)
+ ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+ mutex_unlock(&bdev->bd_mutex);
+out_bdput:
+ bdput(bdev);
+out_unlock_kernel:
+ unlock_kernel();
+ return ret;
+}
+
+static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct block_device *bdev;
+ struct request_queue *q;
+ struct hd_struct *p;
+ u64 value;
+ ssize_t ret = -ENXIO;
+
+ if (count == 0)
+ goto out;
+
+ if (attr == &dev_attr_act_mask) {
+ if (sscanf(buf, "%llx", &value) != 1) {
+ /* Assume it is a list of trace category names */
+ value = blk_str2act_mask(buf);
+ if (value < 0)
+ goto out;
+ }
+ } else if (sscanf(buf, "%llu", &value) != 1)
+ goto out;
+
+ lock_kernel();
+ p = dev_to_part(dev);
+ bdev = bdget(part_devt(p));
+ if (bdev == NULL)
+ goto out_unlock_kernel;
+
+ q = bdev_get_queue(bdev);
+ if (q == NULL)
+ goto out_bdput;
+
+ mutex_lock(&bdev->bd_mutex);
+ ret = 0;
+ if (q->blk_trace == NULL)
+ ret = blk_trace_setup_queue(q, bdev->bd_dev);
+
+ if (ret == 0) {
+ if (attr == &dev_attr_act_mask)
+ q->blk_trace->act_mask = value;
+ else if (attr == &dev_attr_pid)
+ q->blk_trace->pid = value;
+ else if (attr == &dev_attr_start_lba)
+ q->blk_trace->start_lba = value;
+ else if (attr == &dev_attr_end_lba)
+ q->blk_trace->end_lba = value;
+ ret = count;
+ }
+ mutex_unlock(&bdev->bd_mutex);
+out_bdput:
+ bdput(bdev);
+out_unlock_kernel:
+ unlock_kernel();
+out:
+ return ret;
+}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fdf913dfc7e..7dd5a2bef9c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,7 @@
#include <linux/sysctl.h>
#include <linux/ctype.h>
#include <linux/list.h>
+#include <linux/hash.h>
#include <asm/ftrace.h>
@@ -44,14 +45,14 @@
ftrace_kill(); \
} while (0)
+/* hash bits for specific function selection */
+#define FTRACE_HASH_BITS 7
+#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
+
/* ftrace_enabled is a method to turn ftrace on or off */
int ftrace_enabled __read_mostly;
static int last_ftrace_enabled;
-/* set when tracing only a pid */
-struct pid *ftrace_pid_trace;
-static struct pid * const ftrace_swapper_pid = &init_struct_pid;
-
/* Quick disabling of function tracer. */
int function_trace_stop;
@@ -61,9 +62,7 @@ int function_trace_stop;
*/
static int ftrace_disabled __read_mostly;
-static DEFINE_SPINLOCK(ftrace_lock);
-static DEFINE_MUTEX(ftrace_sysctl_lock);
-static DEFINE_MUTEX(ftrace_start_lock);
+static DEFINE_MUTEX(ftrace_lock);
static struct ftrace_ops ftrace_list_end __read_mostly =
{
@@ -134,9 +133,6 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
static int __register_ftrace_function(struct ftrace_ops *ops)
{
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
-
ops->next = ftrace_list;
/*
* We are entering ops into the ftrace_list but another
@@ -172,18 +168,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
#endif
}
- spin_unlock(&ftrace_lock);
-
return 0;
}
static int __unregister_ftrace_function(struct ftrace_ops *ops)
{
struct ftrace_ops **p;
- int ret = 0;
-
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
/*
* If we are removing the last function, then simply point
@@ -192,17 +182,15 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
if (ftrace_list == ops && ops->next == &ftrace_list_end) {
ftrace_trace_function = ftrace_stub;
ftrace_list = &ftrace_list_end;
- goto out;
+ return 0;
}
for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
if (*p == ops)
break;
- if (*p != ops) {
- ret = -1;
- goto out;
- }
+ if (*p != ops)
+ return -1;
*p = (*p)->next;
@@ -223,18 +211,14 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
}
}
- out:
- spin_unlock(&ftrace_lock);
-
- return ret;
+ return 0;
}
static void ftrace_update_pid_func(void)
{
ftrace_func_t func;
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
+ mutex_lock(&ftrace_lock);
if (ftrace_trace_function == ftrace_stub)
goto out;
@@ -256,21 +240,30 @@ static void ftrace_update_pid_func(void)
#endif
out:
- spin_unlock(&ftrace_lock);
+ mutex_unlock(&ftrace_lock);
}
+/* set when tracing only a pid */
+struct pid *ftrace_pid_trace;
+static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+
#ifdef CONFIG_DYNAMIC_FTRACE
+
#ifndef CONFIG_FTRACE_MCOUNT_RECORD
# error Dynamic ftrace depends on MCOUNT_RECORD
#endif
-/*
- * Since MCOUNT_ADDR may point to mcount itself, we do not want
- * to get it confused by reading a reference in the code as we
- * are parsing on objcopy output of text. Use a variable for
- * it instead.
- */
-static unsigned long mcount_addr = MCOUNT_ADDR;
+static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
+
+struct ftrace_func_probe {
+ struct hlist_node node;
+ struct ftrace_probe_ops *ops;
+ unsigned long flags;
+ unsigned long ip;
+ void *data;
+ struct rcu_head rcu;
+};
+
enum {
FTRACE_ENABLE_CALLS = (1 << 0),
@@ -290,7 +283,7 @@ static DEFINE_MUTEX(ftrace_regex_lock);
struct ftrace_page {
struct ftrace_page *next;
- unsigned long index;
+ int index;
struct dyn_ftrace records[];
};
@@ -305,6 +298,19 @@ static struct ftrace_page *ftrace_pages;
static struct dyn_ftrace *ftrace_free_records;
+/*
+ * This is a double for. Do not use 'break' to break out of the loop,
+ * you must use a goto.
+ */
+#define do_for_each_ftrace_rec(pg, rec) \
+ for (pg = ftrace_pages_start; pg; pg = pg->next) { \
+ int _____i; \
+ for (_____i = 0; _____i < pg->index; _____i++) { \
+ rec = &pg->records[_____i];
+
+#define while_for_each_ftrace_rec() \
+ } \
+ }
#ifdef CONFIG_KPROBES
@@ -349,23 +355,16 @@ void ftrace_release(void *start, unsigned long size)
struct ftrace_page *pg;
unsigned long s = (unsigned long)start;
unsigned long e = s + size;
- int i;
if (ftrace_disabled || !start)
return;
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
-
- for (pg = ftrace_pages_start; pg; pg = pg->next) {
- for (i = 0; i < pg->index; i++) {
- rec = &pg->records[i];
-
- if ((rec->ip >= s) && (rec->ip < e))
- ftrace_free_rec(rec);
- }
- }
- spin_unlock(&ftrace_lock);
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_rec(pg, rec) {
+ if ((rec->ip >= s) && (rec->ip < e))
+ ftrace_free_rec(rec);
+ } while_for_each_ftrace_rec();
+ mutex_unlock(&ftrace_lock);
}
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -461,10 +460,10 @@ static void ftrace_bug(int failed, unsigned long ip)
static int
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
- unsigned long ip, fl;
unsigned long ftrace_addr;
+ unsigned long ip, fl;
- ftrace_addr = (unsigned long)ftrace_caller;
+ ftrace_addr = (unsigned long)FTRACE_ADDR;
ip = rec->ip;
@@ -473,7 +472,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
* it is not enabled then do nothing.
*
* If this record is not to be traced and
- * it is enabled then disabled it.
+ * it is enabled then disable it.
*
*/
if (rec->flags & FTRACE_FL_NOTRACE) {
@@ -493,7 +492,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
return 0;
- /* Record is not filtered and is not enabled do nothing */
+ /* Record is not filtered or enabled, do nothing */
if (!fl)
return 0;
@@ -515,7 +514,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
} else {
- /* if record is not enabled do nothing */
+ /* if record is not enabled, do nothing */
if (!(rec->flags & FTRACE_FL_ENABLED))
return 0;
@@ -531,41 +530,37 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
static void ftrace_replace_code(int enable)
{
- int i, failed;
struct dyn_ftrace *rec;
struct ftrace_page *pg;
+ int failed;
- for (pg = ftrace_pages_start; pg; pg = pg->next) {
- for (i = 0; i < pg->index; i++) {
- rec = &pg->records[i];
-
- /*
- * Skip over free records and records that have
- * failed.
- */
- if (rec->flags & FTRACE_FL_FREE ||
- rec->flags & FTRACE_FL_FAILED)
- continue;
+ do_for_each_ftrace_rec(pg, rec) {
+ /*
+ * Skip over free records and records that have
+ * failed.
+ */
+ if (rec->flags & FTRACE_FL_FREE ||
+ rec->flags & FTRACE_FL_FAILED)
+ continue;
- /* ignore updates to this record's mcount site */
- if (get_kprobe((void *)rec->ip)) {
- freeze_record(rec);
- continue;
- } else {
- unfreeze_record(rec);
- }
+ /* ignore updates to this record's mcount site */
+ if (get_kprobe((void *)rec->ip)) {
+ freeze_record(rec);
+ continue;
+ } else {
+ unfreeze_record(rec);
+ }
- failed = __ftrace_replace_code(rec, enable);
- if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
- rec->flags |= FTRACE_FL_FAILED;
- if ((system_state == SYSTEM_BOOTING) ||
- !core_kernel_text(rec->ip)) {
- ftrace_free_rec(rec);
- } else
- ftrace_bug(failed, rec->ip);
- }
+ failed = __ftrace_replace_code(rec, enable);
+ if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
+ rec->flags |= FTRACE_FL_FAILED;
+ if ((system_state == SYSTEM_BOOTING) ||
+ !core_kernel_text(rec->ip)) {
+ ftrace_free_rec(rec);
+ } else
+ ftrace_bug(failed, rec->ip);
}
- }
+ } while_for_each_ftrace_rec();
}
static int
@@ -576,7 +571,7 @@ ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
ip = rec->ip;
- ret = ftrace_make_nop(mod, rec, mcount_addr);
+ ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
if (ret) {
ftrace_bug(ret, ip);
rec->flags |= FTRACE_FL_FAILED;
@@ -631,13 +626,10 @@ static void ftrace_startup(int command)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftrace_start_lock);
ftrace_start_up++;
command |= FTRACE_ENABLE_CALLS;
ftrace_startup_enable(command);
-
- mutex_unlock(&ftrace_start_lock);
}
static void ftrace_shutdown(int command)
@@ -645,7 +637,6 @@ static void ftrace_shutdown(int command)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftrace_start_lock);
ftrace_start_up--;
if (!ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
@@ -656,11 +647,9 @@ static void ftrace_shutdown(int command)
}
if (!command || !ftrace_enabled)
- goto out;
+ return;
ftrace_run_update_code(command);
- out:
- mutex_unlock(&ftrace_start_lock);
}
static void ftrace_startup_sysctl(void)
@@ -670,7 +659,6 @@ static void ftrace_startup_sysctl(void)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftrace_start_lock);
/* Force update next time */
saved_ftrace_func = NULL;
/* ftrace_start_up is true if we want ftrace running */
@@ -678,7 +666,6 @@ static void ftrace_startup_sysctl(void)
command |= FTRACE_ENABLE_CALLS;
ftrace_run_update_code(command);
- mutex_unlock(&ftrace_start_lock);
}
static void ftrace_shutdown_sysctl(void)
@@ -688,13 +675,11 @@ static void ftrace_shutdown_sysctl(void)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftrace_start_lock);
/* ftrace_start_up is true if ftrace is running */
if (ftrace_start_up)
command |= FTRACE_DISABLE_CALLS;
ftrace_run_update_code(command);
- mutex_unlock(&ftrace_start_lock);
}
static cycle_t ftrace_update_time;
@@ -781,13 +766,16 @@ enum {
FTRACE_ITER_CONT = (1 << 1),
FTRACE_ITER_NOTRACE = (1 << 2),
FTRACE_ITER_FAILURES = (1 << 3),
+ FTRACE_ITER_PRINTALL = (1 << 4),
+ FTRACE_ITER_HASH = (1 << 5),
};
#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
struct ftrace_iterator {
struct ftrace_page *pg;
- unsigned idx;
+ int hidx;
+ int idx;
unsigned flags;
unsigned char buffer[FTRACE_BUFF_MAX+1];
unsigned buffer_idx;
@@ -795,15 +783,89 @@ struct ftrace_iterator {
};
static void *
+t_hash_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct ftrace_iterator *iter = m->private;
+ struct hlist_node *hnd = v;
+ struct hlist_head *hhd;
+
+ WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
+
+ (*pos)++;
+
+ retry:
+ if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
+ return NULL;
+
+ hhd = &ftrace_func_hash[iter->hidx];
+
+ if (hlist_empty(hhd)) {
+ iter->hidx++;
+ hnd = NULL;
+ goto retry;
+ }
+
+ if (!hnd)
+ hnd = hhd->first;
+ else {
+ hnd = hnd->next;
+ if (!hnd) {
+ iter->hidx++;
+ goto retry;
+ }
+ }
+
+ return hnd;
+}
+
+static void *t_hash_start(struct seq_file *m, loff_t *pos)
+{
+ struct ftrace_iterator *iter = m->private;
+ void *p = NULL;
+
+ iter->flags |= FTRACE_ITER_HASH;
+
+ return t_hash_next(m, p, pos);
+}
+
+static int t_hash_show(struct seq_file *m, void *v)
+{
+ struct ftrace_func_probe *rec;
+ struct hlist_node *hnd = v;
+ char str[KSYM_SYMBOL_LEN];
+
+ rec = hlist_entry(hnd, struct ftrace_func_probe, node);
+
+ if (rec->ops->print)
+ return rec->ops->print(m, rec->ip, rec->ops, rec->data);
+
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ seq_printf(m, "%s:", str);
+
+ kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
+ seq_printf(m, "%s", str);
+
+ if (rec->data)
+ seq_printf(m, ":%p", rec->data);
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ftrace_iterator *iter = m->private;
struct dyn_ftrace *rec = NULL;
+ if (iter->flags & FTRACE_ITER_HASH)
+ return t_hash_next(m, v, pos);
+
(*pos)++;
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
+ if (iter->flags & FTRACE_ITER_PRINTALL)
+ return NULL;
+
retry:
if (iter->idx >= iter->pg->index) {
if (iter->pg->next) {
@@ -832,7 +894,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
goto retry;
}
}
- spin_unlock(&ftrace_lock);
return rec;
}
@@ -842,6 +903,23 @@ static void *t_start(struct seq_file *m, loff_t *pos)
struct ftrace_iterator *iter = m->private;
void *p = NULL;
+ mutex_lock(&ftrace_lock);
+ /*
+ * For set_ftrace_filter reading, if we have the filter
+ * off, we can short cut and just print out that all
+ * functions are enabled.
+ */
+ if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
+ if (*pos > 0)
+ return t_hash_start(m, pos);
+ iter->flags |= FTRACE_ITER_PRINTALL;
+ (*pos)++;
+ return iter;
+ }
+
+ if (iter->flags & FTRACE_ITER_HASH)
+ return t_hash_start(m, pos);
+
if (*pos > 0) {
if (iter->idx < 0)
return p;
@@ -851,18 +929,31 @@ static void *t_start(struct seq_file *m, loff_t *pos)
p = t_next(m, p, pos);
+ if (!p)
+ return t_hash_start(m, pos);
+
return p;
}
static void t_stop(struct seq_file *m, void *p)
{
+ mutex_unlock(&ftrace_lock);
}
static int t_show(struct seq_file *m, void *v)
{
+ struct ftrace_iterator *iter = m->private;
struct dyn_ftrace *rec = v;
char str[KSYM_SYMBOL_LEN];
+ if (iter->flags & FTRACE_ITER_HASH)
+ return t_hash_show(m, v);
+
+ if (iter->flags & FTRACE_ITER_PRINTALL) {
+ seq_printf(m, "#### all functions enabled ####\n");
+ return 0;
+ }
+
if (!rec)
return 0;
@@ -941,23 +1032,16 @@ static void ftrace_filter_reset(int enable)
struct ftrace_page *pg;
struct dyn_ftrace *rec;
unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
- unsigned i;
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
+ mutex_lock(&ftrace_lock);
if (enable)
ftrace_filtered = 0;
- pg = ftrace_pages_start;
- while (pg) {
- for (i = 0; i < pg->index; i++) {
- rec = &pg->records[i];
- if (rec->flags & FTRACE_FL_FAILED)
- continue;
- rec->flags &= ~type;
- }
- pg = pg->next;
- }
- spin_unlock(&ftrace_lock);
+ do_for_each_ftrace_rec(pg, rec) {
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+ rec->flags &= ~type;
+ } while_for_each_ftrace_rec();
+ mutex_unlock(&ftrace_lock);
}
static int
@@ -1038,86 +1122,536 @@ enum {
MATCH_END_ONLY,
};
-static void
-ftrace_match(unsigned char *buff, int len, int enable)
+/*
+ * (static function - no need for kernel doc)
+ *
+ * Pass in a buffer containing a glob and this function will
+ * set search to point to the search part of the buffer and
+ * return the type of search it is (see enum above).
+ * This does modify buff.
+ *
+ * Returns enum type.
+ * search returns the pointer to use for comparison.
+ * not returns 1 if buff started with a '!'
+ * 0 otherwise.
+ */
+static int
+ftrace_setup_glob(char *buff, int len, char **search, int *not)
{
- char str[KSYM_SYMBOL_LEN];
- char *search = NULL;
- struct ftrace_page *pg;
- struct dyn_ftrace *rec;
int type = MATCH_FULL;
- unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
- unsigned i, match = 0, search_len = 0;
- int not = 0;
+ int i;
if (buff[0] == '!') {
- not = 1;
+ *not = 1;
buff++;
len--;
- }
+ } else
+ *not = 0;
+
+ *search = buff;
for (i = 0; i < len; i++) {
if (buff[i] == '*') {
if (!i) {
- search = buff + i + 1;
+ *search = buff + 1;
type = MATCH_END_ONLY;
- search_len = len - (i + 1);
} else {
- if (type == MATCH_END_ONLY) {
+ if (type == MATCH_END_ONLY)
type = MATCH_MIDDLE_ONLY;
- } else {
- match = i;
+ else
type = MATCH_FRONT_ONLY;
- }
buff[i] = 0;
break;
}
}
}
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
- if (enable)
- ftrace_filtered = 1;
- pg = ftrace_pages_start;
- while (pg) {
- for (i = 0; i < pg->index; i++) {
- int matched = 0;
- char *ptr;
-
- rec = &pg->records[i];
- if (rec->flags & FTRACE_FL_FAILED)
+ return type;
+}
+
+static int ftrace_match(char *str, char *regex, int len, int type)
+{
+ int matched = 0;
+ char *ptr;
+
+ switch (type) {
+ case MATCH_FULL:
+ if (strcmp(str, regex) == 0)
+ matched = 1;
+ break;
+ case MATCH_FRONT_ONLY:
+ if (strncmp(str, regex, len) == 0)
+ matched = 1;
+ break;
+ case MATCH_MIDDLE_ONLY:
+ if (strstr(str, regex))
+ matched = 1;
+ break;
+ case MATCH_END_ONLY:
+ ptr = strstr(str, regex);
+ if (ptr && (ptr[len] == 0))
+ matched = 1;
+ break;
+ }
+
+ return matched;
+}
+
+static int
+ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
+{
+ char str[KSYM_SYMBOL_LEN];
+
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ return ftrace_match(str, regex, len, type);
+}
+
+static void ftrace_match_records(char *buff, int len, int enable)
+{
+ unsigned int search_len;
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ unsigned long flag;
+ char *search;
+ int type;
+ int not;
+
+ flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+ type = ftrace_setup_glob(buff, len, &search, &not);
+
+ search_len = strlen(search);
+
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_rec(pg, rec) {
+
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+
+ if (ftrace_match_record(rec, search, search_len, type)) {
+ if (not)
+ rec->flags &= ~flag;
+ else
+ rec->flags |= flag;
+ }
+ /*
+ * Only enable filtering if we have a function that
+ * is filtered on.
+ */
+ if (enable && (rec->flags & FTRACE_FL_FILTER))
+ ftrace_filtered = 1;
+ } while_for_each_ftrace_rec();
+ mutex_unlock(&ftrace_lock);
+}
+
+static int
+ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
+ char *regex, int len, int type)
+{
+ char str[KSYM_SYMBOL_LEN];
+ char *modname;
+
+ kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
+
+ if (!modname || strcmp(modname, mod))
+ return 0;
+
+ /* blank search means to match all funcs in the mod */
+ if (len)
+ return ftrace_match(str, regex, len, type);
+ else
+ return 1;
+}
+
+static void ftrace_match_module_records(char *buff, char *mod, int enable)
+{
+ unsigned search_len = 0;
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ int type = MATCH_FULL;
+ char *search = buff;
+ unsigned long flag;
+ int not = 0;
+
+ flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+
+ /* blank or '*' mean the same */
+ if (strcmp(buff, "*") == 0)
+ buff[0] = 0;
+
+ /* handle the case of 'dont filter this module' */
+ if (strcmp(buff, "!") == 0 || strcmp(buff, "!*") == 0) {
+ buff[0] = 0;
+ not = 1;
+ }
+
+ if (strlen(buff)) {
+ type = ftrace_setup_glob(buff, strlen(buff), &search, &not);
+ search_len = strlen(search);
+ }
+
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_rec(pg, rec) {
+
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+
+ if (ftrace_match_module_record(rec, mod,
+ search, search_len, type)) {
+ if (not)
+ rec->flags &= ~flag;
+ else
+ rec->flags |= flag;
+ }
+ if (enable && (rec->flags & FTRACE_FL_FILTER))
+ ftrace_filtered = 1;
+
+ } while_for_each_ftrace_rec();
+ mutex_unlock(&ftrace_lock);
+}
+
+/*
+ * We register the module command as a template to show others how
+ * to register the a command as well.
+ */
+
+static int
+ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
+{
+ char *mod;
+
+ /*
+ * cmd == 'mod' because we only registered this func
+ * for the 'mod' ftrace_func_command.
+ * But if you register one func with multiple commands,
+ * you can tell which command was used by the cmd
+ * parameter.
+ */
+
+ /* we must have a module name */
+ if (!param)
+ return -EINVAL;
+
+ mod = strsep(&param, ":");
+ if (!strlen(mod))
+ return -EINVAL;
+
+ ftrace_match_module_records(func, mod, enable);
+ return 0;
+}
+
+static struct ftrace_func_command ftrace_mod_cmd = {
+ .name = "mod",
+ .func = ftrace_mod_callback,
+};
+
+static int __init ftrace_mod_cmd_init(void)
+{
+ return register_ftrace_command(&ftrace_mod_cmd);
+}
+device_initcall(ftrace_mod_cmd_init);
+
+static void
+function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
+{
+ struct ftrace_func_probe *entry;
+ struct hlist_head *hhd;
+ struct hlist_node *n;
+ unsigned long key;
+ int resched;
+
+ key = hash_long(ip, FTRACE_HASH_BITS);
+
+ hhd = &ftrace_func_hash[key];
+
+ if (hlist_empty(hhd))
+ return;
+
+ /*
+ * Disable preemption for these calls to prevent a RCU grace
+ * period. This syncs the hash iteration and freeing of items
+ * on the hash. rcu_read_lock is too dangerous here.
+ */
+ resched = ftrace_preempt_disable();
+ hlist_for_each_entry_rcu(entry, n, hhd, node) {
+ if (entry->ip == ip)
+ entry->ops->func(ip, parent_ip, &entry->data);
+ }
+ ftrace_preempt_enable(resched);
+}
+
+static struct ftrace_ops trace_probe_ops __read_mostly =
+{
+ .func = function_trace_probe_call,
+};
+
+static int ftrace_probe_registered;
+
+static void __enable_ftrace_function_probe(void)
+{
+ int i;
+
+ if (ftrace_probe_registered)
+ return;
+
+ for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
+ struct hlist_head *hhd = &ftrace_func_hash[i];
+ if (hhd->first)
+ break;
+ }
+ /* Nothing registered? */
+ if (i == FTRACE_FUNC_HASHSIZE)
+ return;
+
+ __register_ftrace_function(&trace_probe_ops);
+ ftrace_startup(0);
+ ftrace_probe_registered = 1;
+}
+
+static void __disable_ftrace_function_probe(void)
+{
+ int i;
+
+ if (!ftrace_probe_registered)
+ return;
+
+ for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
+ struct hlist_head *hhd = &ftrace_func_hash[i];
+ if (hhd->first)
+ return;
+ }
+
+ /* no more funcs left */
+ __unregister_ftrace_function(&trace_probe_ops);
+ ftrace_shutdown(0);
+ ftrace_probe_registered = 0;
+}
+
+
+static void ftrace_free_entry_rcu(struct rcu_head *rhp)
+{
+ struct ftrace_func_probe *entry =
+ container_of(rhp, struct ftrace_func_probe, rcu);
+
+ if (entry->ops->free)
+ entry->ops->free(&entry->data);
+ kfree(entry);
+}
+
+
+int
+register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
+ void *data)
+{
+ struct ftrace_func_probe *entry;
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+ int type, len, not;
+ unsigned long key;
+ int count = 0;
+ char *search;
+
+ type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
+ len = strlen(search);
+
+ /* we do not support '!' for function probes */
+ if (WARN_ON(not))
+ return -EINVAL;
+
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_rec(pg, rec) {
+
+ if (rec->flags & FTRACE_FL_FAILED)
+ continue;
+
+ if (!ftrace_match_record(rec, search, len, type))
+ continue;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ /* If we did not process any, then return error */
+ if (!count)
+ count = -ENOMEM;
+ goto out_unlock;
+ }
+
+ count++;
+
+ entry->data = data;
+
+ /*
+ * The caller might want to do something special
+ * for each function we find. We call the callback
+ * to give the caller an opportunity to do so.
+ */
+ if (ops->callback) {
+ if (ops->callback(rec->ip, &entry->data) < 0) {
+ /* caller does not like this func */
+ kfree(entry);
continue;
- kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
- switch (type) {
- case MATCH_FULL:
- if (strcmp(str, buff) == 0)
- matched = 1;
- break;
- case MATCH_FRONT_ONLY:
- if (memcmp(str, buff, match) == 0)
- matched = 1;
- break;
- case MATCH_MIDDLE_ONLY:
- if (strstr(str, search))
- matched = 1;
- break;
- case MATCH_END_ONLY:
- ptr = strstr(str, search);
- if (ptr && (ptr[search_len] == 0))
- matched = 1;
- break;
}
- if (matched) {
- if (not)
- rec->flags &= ~flag;
- else
- rec->flags |= flag;
+ }
+
+ entry->ops = ops;
+ entry->ip = rec->ip;
+
+ key = hash_long(entry->ip, FTRACE_HASH_BITS);
+ hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
+
+ } while_for_each_ftrace_rec();
+ __enable_ftrace_function_probe();
+
+ out_unlock:
+ mutex_unlock(&ftrace_lock);
+
+ return count;
+}
+
+enum {
+ PROBE_TEST_FUNC = 1,
+ PROBE_TEST_DATA = 2
+};
+
+static void
+__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
+ void *data, int flags)
+{
+ struct ftrace_func_probe *entry;
+ struct hlist_node *n, *tmp;
+ char str[KSYM_SYMBOL_LEN];
+ int type = MATCH_FULL;
+ int i, len = 0;
+ char *search;
+
+ if (glob && (strcmp(glob, "*") || !strlen(glob)))
+ glob = NULL;
+ else {
+ int not;
+
+ type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
+ len = strlen(search);
+
+ /* we do not support '!' for function probes */
+ if (WARN_ON(not))
+ return;
+ }
+
+ mutex_lock(&ftrace_lock);
+ for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
+ struct hlist_head *hhd = &ftrace_func_hash[i];
+
+ hlist_for_each_entry_safe(entry, n, tmp, hhd, node) {
+
+ /* break up if statements for readability */
+ if ((flags & PROBE_TEST_FUNC) && entry->ops != ops)
+ continue;
+
+ if ((flags & PROBE_TEST_DATA) && entry->data != data)
+ continue;
+
+ /* do this last, since it is the most expensive */
+ if (glob) {
+ kallsyms_lookup(entry->ip, NULL, NULL,
+ NULL, str);
+ if (!ftrace_match(str, glob, len, type))
+ continue;
}
+
+ hlist_del(&entry->node);
+ call_rcu(&entry->rcu, ftrace_free_entry_rcu);
+ }
+ }
+ __disable_ftrace_function_probe();
+ mutex_unlock(&ftrace_lock);
+}
+
+void
+unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
+ void *data)
+{
+ __unregister_ftrace_function_probe(glob, ops, data,
+ PROBE_TEST_FUNC | PROBE_TEST_DATA);
+}
+
+void
+unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops)
+{
+ __unregister_ftrace_function_probe(glob, ops, NULL, PROBE_TEST_FUNC);
+}
+
+void unregister_ftrace_function_probe_all(char *glob)
+{
+ __unregister_ftrace_function_probe(glob, NULL, NULL, 0);
+}
+
+static LIST_HEAD(ftrace_commands);
+static DEFINE_MUTEX(ftrace_cmd_mutex);
+
+int register_ftrace_command(struct ftrace_func_command *cmd)
+{
+ struct ftrace_func_command *p;
+ int ret = 0;
+
+ mutex_lock(&ftrace_cmd_mutex);
+ list_for_each_entry(p, &ftrace_commands, list) {
+ if (strcmp(cmd->name, p->name) == 0) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ }
+ list_add(&cmd->list, &ftrace_commands);
+ out_unlock:
+ mutex_unlock(&ftrace_cmd_mutex);
+
+ return ret;
+}
+
+int unregister_ftrace_command(struct ftrace_func_command *cmd)
+{
+ struct ftrace_func_command *p, *n;
+ int ret = -ENODEV;
+
+ mutex_lock(&ftrace_cmd_mutex);
+ list_for_each_entry_safe(p, n, &ftrace_commands, list) {
+ if (strcmp(cmd->name, p->name) == 0) {
+ ret = 0;
+ list_del_init(&p->list);
+ goto out_unlock;
+ }
+ }
+ out_unlock:
+ mutex_unlock(&ftrace_cmd_mutex);
+
+ return ret;
+}
+
+static int ftrace_process_regex(char *buff, int len, int enable)
+{
+ char *func, *command, *next = buff;
+ struct ftrace_func_command *p;
+ int ret = -EINVAL;
+
+ func = strsep(&next, ":");
+
+ if (!next) {
+ ftrace_match_records(func, len, enable);
+ return 0;
+ }
+
+ /* command found */
+
+ command = strsep(&next, ":");
+
+ mutex_lock(&ftrace_cmd_mutex);
+ list_for_each_entry(p, &ftrace_commands, list) {
+ if (strcmp(p->name, command) == 0) {
+ ret = p->func(func, command, next, enable);
+ goto out_unlock;
}
- pg = pg->next;
}
- spin_unlock(&ftrace_lock);
+ out_unlock:
+ mutex_unlock(&ftrace_cmd_mutex);
+
+ return ret;
}
static ssize_t
@@ -1187,7 +1721,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
if (isspace(ch)) {
iter->filtered++;
iter->buffer[iter->buffer_idx] = 0;
- ftrace_match(iter->buffer, iter->buffer_idx, enable);
+ ret = ftrace_process_regex(iter->buffer,
+ iter->buffer_idx, enable);
+ if (ret)
+ goto out;
iter->buffer_idx = 0;
} else
iter->flags |= FTRACE_ITER_CONT;
@@ -1226,7 +1763,7 @@ ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
if (reset)
ftrace_filter_reset(enable);
if (buf)
- ftrace_match(buf, len, enable);
+ ftrace_match_records(buf, len, enable);
mutex_unlock(&ftrace_regex_lock);
}
@@ -1276,15 +1813,13 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
if (iter->buffer_idx) {
iter->filtered++;
iter->buffer[iter->buffer_idx] = 0;
- ftrace_match(iter->buffer, iter->buffer_idx, enable);
+ ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
}
- mutex_lock(&ftrace_sysctl_lock);
- mutex_lock(&ftrace_start_lock);
+ mutex_lock(&ftrace_lock);
if (ftrace_start_up && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
- mutex_unlock(&ftrace_start_lock);
- mutex_unlock(&ftrace_sysctl_lock);
+ mutex_unlock(&ftrace_lock);
kfree(iter);
mutex_unlock(&ftrace_regex_lock);
@@ -1437,36 +1972,33 @@ ftrace_set_func(unsigned long *array, int idx, char *buffer)
struct dyn_ftrace *rec;
struct ftrace_page *pg;
int found = 0;
- int i, j;
+ int j;
if (ftrace_disabled)
return -ENODEV;
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
-
- for (pg = ftrace_pages_start; pg; pg = pg->next) {
- for (i = 0; i < pg->index; i++) {
- rec = &pg->records[i];
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_rec(pg, rec) {
- if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
- continue;
+ if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
+ continue;
- kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
- if (strcmp(str, buffer) == 0) {
- found = 1;
- for (j = 0; j < idx; j++)
- if (array[j] == rec->ip) {
- found = 0;
- break;
- }
- if (found)
- array[idx] = rec->ip;
- break;
- }
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ if (strcmp(str, buffer) == 0) {
+ /* Return 1 if we add it to the array */
+ found = 1;
+ for (j = 0; j < idx; j++)
+ if (array[j] == rec->ip) {
+ found = 0;
+ break;
+ }
+ if (found)
+ array[idx] = rec->ip;
+ goto out;
}
- }
- spin_unlock(&ftrace_lock);
+ } while_for_each_ftrace_rec();
+ out:
+ mutex_unlock(&ftrace_lock);
return found ? 0 : -EINVAL;
}
@@ -1604,7 +2136,7 @@ static int ftrace_convert_nops(struct module *mod,
unsigned long addr;
unsigned long flags;
- mutex_lock(&ftrace_start_lock);
+ mutex_lock(&ftrace_lock);
p = start;
while (p < end) {
addr = ftrace_call_adjust(*p++);
@@ -1623,7 +2155,7 @@ static int ftrace_convert_nops(struct module *mod,
local_irq_save(flags);
ftrace_update_code(mod);
local_irq_restore(flags);
- mutex_unlock(&ftrace_start_lock);
+ mutex_unlock(&ftrace_lock);
return 0;
}
@@ -1796,7 +2328,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
if (ret < 0)
return ret;
- mutex_lock(&ftrace_start_lock);
+ mutex_lock(&ftrace_lock);
if (val < 0) {
/* disable pid tracing */
if (!ftrace_pid_trace)
@@ -1835,7 +2367,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf,
ftrace_startup_enable(0);
out:
- mutex_unlock(&ftrace_start_lock);
+ mutex_unlock(&ftrace_lock);
return cnt;
}
@@ -1863,7 +2395,6 @@ static __init int ftrace_init_debugfs(void)
"'set_ftrace_pid' entry\n");
return 0;
}
-
fs_initcall(ftrace_init_debugfs);
/**
@@ -1898,17 +2429,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
if (unlikely(ftrace_disabled))
return -1;
- mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftrace_lock);
ret = __register_ftrace_function(ops);
ftrace_startup(0);
- mutex_unlock(&ftrace_sysctl_lock);
+ mutex_unlock(&ftrace_lock);
return ret;
}
/**
- * unregister_ftrace_function - unresgister a function for profiling.
+ * unregister_ftrace_function - unregister a function for profiling.
* @ops - ops structure that holds the function to unregister
*
* Unregister a function that was added to be called by ftrace profiling.
@@ -1917,10 +2448,10 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
{
int ret;
- mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftrace_lock);
ret = __unregister_ftrace_function(ops);
ftrace_shutdown(0);
- mutex_unlock(&ftrace_sysctl_lock);
+ mutex_unlock(&ftrace_lock);
return ret;
}
@@ -1935,7 +2466,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
if (unlikely(ftrace_disabled))
return -ENODEV;
- mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftrace_lock);
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
@@ -1964,7 +2495,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
}
out:
- mutex_unlock(&ftrace_sysctl_lock);
+ mutex_unlock(&ftrace_lock);
return ret;
}
@@ -2080,7 +2611,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
{
int ret = 0;
- mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftrace_lock);
ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
register_pm_notifier(&ftrace_suspend_notifier);
@@ -2098,13 +2629,13 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
ftrace_startup(FTRACE_START_FUNC_RET);
out:
- mutex_unlock(&ftrace_sysctl_lock);
+ mutex_unlock(&ftrace_lock);
return ret;
}
void unregister_ftrace_graph(void)
{
- mutex_lock(&ftrace_sysctl_lock);
+ mutex_lock(&ftrace_lock);
atomic_dec(&ftrace_graph_active);
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
@@ -2112,7 +2643,7 @@ void unregister_ftrace_graph(void)
ftrace_shutdown(FTRACE_STOP_FUNC_RET);
unregister_pm_notifier(&ftrace_suspend_notifier);
- mutex_unlock(&ftrace_sysctl_lock);
+ mutex_unlock(&ftrace_lock);
}
/* Allocate a return stack for newly created task */
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644
index 00000000000..ae201b3eda8
--- /dev/null
+++ b/kernel/trace/kmemtrace.c
@@ -0,0 +1,339 @@
+/*
+ * Memory allocator tracing
+ *
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+#include <linux/dcache.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+/* Select an alternative, minimalistic output than the original one */
+#define TRACE_KMEM_OPT_MINIMAL 0x1
+
+static struct tracer_opt kmem_opts[] = {
+ /* Default disable the minimalistic output */
+ { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
+ { }
+};
+
+static struct tracer_flags kmem_tracer_flags = {
+ .val = 0,
+ .opts = kmem_opts
+};
+
+
+static bool kmem_tracing_enabled __read_mostly;
+static struct trace_array *kmemtrace_array;
+
+static int kmem_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ kmemtrace_array = tr;
+
+ for_each_cpu_mask(cpu, cpu_possible_map)
+ tracing_reset(tr, cpu);
+
+ kmem_tracing_enabled = true;
+
+ return 0;
+}
+
+static void kmem_trace_reset(struct trace_array *tr)
+{
+ kmem_tracing_enabled = false;
+}
+
+static void kmemtrace_headers(struct seq_file *s)
+{
+ /* Don't need headers for the original kmemtrace output */
+ if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
+ return;
+
+ seq_printf(s, "#\n");
+ seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
+ " POINTER NODE CALLER\n");
+ seq_printf(s, "# FREE | | | | "
+ " | | | |\n");
+ seq_printf(s, "# |\n\n");
+}
+
+/*
+ * The two following functions give the original output from kmemtrace,
+ * or something close to....perhaps they need some missing things
+ */
+static enum print_line_t
+kmemtrace_print_alloc_original(struct trace_iterator *iter,
+ struct kmemtrace_alloc_entry *entry)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ /* Taken from the old linux/kmemtrace.h */
+ ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
+ "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
+ entry->type_id, entry->call_site, (unsigned long) entry->ptr,
+ (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
+ (unsigned long) entry->gfp_flags, entry->node);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+kmemtrace_print_free_original(struct trace_iterator *iter,
+ struct kmemtrace_free_entry *entry)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ /* Taken from the old linux/kmemtrace.h */
+ ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
+ entry->type_id, entry->call_site, (unsigned long) entry->ptr);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+
+/* The two other following provide a more minimalistic output */
+static enum print_line_t
+kmemtrace_print_alloc_compress(struct trace_iterator *iter,
+ struct kmemtrace_alloc_entry *entry)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ /* Alloc entry */
+ ret = trace_seq_printf(s, " + ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Type */
+ switch (entry->type_id) {
+ case KMEMTRACE_TYPE_KMALLOC:
+ ret = trace_seq_printf(s, "K ");
+ break;
+ case KMEMTRACE_TYPE_CACHE:
+ ret = trace_seq_printf(s, "C ");
+ break;
+ case KMEMTRACE_TYPE_PAGES:
+ ret = trace_seq_printf(s, "P ");
+ break;
+ default:
+ ret = trace_seq_printf(s, "? ");
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Requested */
+ ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Allocated */
+ ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Flags
+ * TODO: would be better to see the name of the GFP flag names
+ */
+ ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Pointer to allocated */
+ ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Node */
+ ret = trace_seq_printf(s, "%4d ", entry->node);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Call site */
+ ret = seq_print_ip_sym(s, entry->call_site, 0);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!trace_seq_printf(s, "\n"))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+kmemtrace_print_free_compress(struct trace_iterator *iter,
+ struct kmemtrace_free_entry *entry)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ /* Free entry */
+ ret = trace_seq_printf(s, " - ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Type */
+ switch (entry->type_id) {
+ case KMEMTRACE_TYPE_KMALLOC:
+ ret = trace_seq_printf(s, "K ");
+ break;
+ case KMEMTRACE_TYPE_CACHE:
+ ret = trace_seq_printf(s, "C ");
+ break;
+ case KMEMTRACE_TYPE_PAGES:
+ ret = trace_seq_printf(s, "P ");
+ break;
+ default:
+ ret = trace_seq_printf(s, "? ");
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Skip requested/allocated/flags */
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Pointer to allocated */
+ ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Skip node */
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Call site */
+ ret = seq_print_ip_sym(s, entry->call_site, 0);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!trace_seq_printf(s, "\n"))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+
+ switch (entry->type) {
+ case TRACE_KMEM_ALLOC: {
+ struct kmemtrace_alloc_entry *field;
+ trace_assign_type(field, entry);
+ if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
+ return kmemtrace_print_alloc_compress(iter, field);
+ else
+ return kmemtrace_print_alloc_original(iter, field);
+ }
+
+ case TRACE_KMEM_FREE: {
+ struct kmemtrace_free_entry *field;
+ trace_assign_type(field, entry);
+ if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
+ return kmemtrace_print_free_compress(iter, field);
+ else
+ return kmemtrace_print_free_original(iter, field);
+ }
+
+ default:
+ return TRACE_TYPE_UNHANDLED;
+ }
+}
+
+/* Trace allocations */
+void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+ struct ring_buffer_event *event;
+ struct kmemtrace_alloc_entry *entry;
+ struct trace_array *tr = kmemtrace_array;
+
+ if (!kmem_tracing_enabled)
+ return;
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+
+ entry->call_site = call_site;
+ entry->ptr = ptr;
+ entry->bytes_req = bytes_req;
+ entry->bytes_alloc = bytes_alloc;
+ entry->gfp_flags = gfp_flags;
+ entry->node = node;
+
+ trace_buffer_unlock_commit(tr, event, 0, 0);
+}
+EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
+
+void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr)
+{
+ struct ring_buffer_event *event;
+ struct kmemtrace_free_entry *entry;
+ struct trace_array *tr = kmemtrace_array;
+
+ if (!kmem_tracing_enabled)
+ return;
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ entry->type_id = type_id;
+ entry->call_site = call_site;
+ entry->ptr = ptr;
+
+ trace_buffer_unlock_commit(tr, event, 0, 0);
+}
+EXPORT_SYMBOL(kmemtrace_mark_free);
+
+static struct tracer kmem_tracer __read_mostly = {
+ .name = "kmemtrace",
+ .init = kmem_trace_init,
+ .reset = kmem_trace_reset,
+ .print_line = kmemtrace_print_line,
+ .print_header = kmemtrace_headers,
+ .flags = &kmem_tracer_flags
+};
+
+void kmemtrace_init(void)
+{
+ /* earliest opportunity to start kmem tracing */
+}
+
+static int __init init_kmem_tracer(void)
+{
+ return register_tracer(&kmem_tracer);
+}
+
+device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd38c5cfd8a..8f19f1aa42b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,9 +4,11 @@
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/ring_buffer.h>
+#include <linux/ftrace_irq.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
+#include <linux/hardirq.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
@@ -57,7 +59,7 @@ enum {
RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
};
-static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
+static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
/**
* tracing_on - enable all tracing buffers
@@ -89,13 +91,22 @@ EXPORT_SYMBOL_GPL(tracing_off);
* tracing_off_permanent - permanently disable ring buffers
*
* This function, once called, will disable all ring buffers
- * permanenty.
+ * permanently.
*/
void tracing_off_permanent(void)
{
set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
}
+/**
+ * tracing_is_on - show state of ring buffers enabled
+ */
+int tracing_is_on(void)
+{
+ return ring_buffer_flags == RB_BUFFERS_ON;
+}
+EXPORT_SYMBOL_GPL(tracing_is_on);
+
#include "trace.h"
/* Up this if you want to test the TIME_EXTENTS and normalization */
@@ -123,8 +134,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
-#define RB_ALIGNMENT_SHIFT 2
-#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
+#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA 28
enum {
@@ -133,7 +143,7 @@ enum {
};
/* inline for ring buffer fast paths */
-static inline unsigned
+static unsigned
rb_event_length(struct ring_buffer_event *event)
{
unsigned length;
@@ -151,7 +161,7 @@ rb_event_length(struct ring_buffer_event *event)
case RINGBUF_TYPE_DATA:
if (event->len)
- length = event->len << RB_ALIGNMENT_SHIFT;
+ length = event->len * RB_ALIGNMENT;
else
length = event->array[0];
return length + RB_EVNT_HDR_SIZE;
@@ -179,7 +189,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
EXPORT_SYMBOL_GPL(ring_buffer_event_length);
/* inline for ring buffer fast paths */
-static inline void *
+static void *
rb_event_data(struct ring_buffer_event *event)
{
BUG_ON(event->type != RINGBUF_TYPE_DATA);
@@ -209,7 +219,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
struct buffer_data_page {
u64 time_stamp; /* page time stamp */
- local_t commit; /* write commited index */
+ local_t commit; /* write committed index */
unsigned char data[]; /* data of buffer page */
};
@@ -229,10 +239,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
* Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
* this issue out.
*/
-static inline void free_buffer_page(struct buffer_page *bpage)
+static void free_buffer_page(struct buffer_page *bpage)
{
- if (bpage->page)
- free_page((unsigned long)bpage->page);
+ free_page((unsigned long)bpage->page);
kfree(bpage);
}
@@ -260,7 +269,7 @@ struct ring_buffer_per_cpu {
struct list_head pages;
struct buffer_page *head_page; /* read from head */
struct buffer_page *tail_page; /* write to tail */
- struct buffer_page *commit_page; /* commited pages */
+ struct buffer_page *commit_page; /* committed pages */
struct buffer_page *reader_page;
unsigned long overrun;
unsigned long entries;
@@ -273,8 +282,8 @@ struct ring_buffer {
unsigned pages;
unsigned flags;
int cpus;
- cpumask_var_t cpumask;
atomic_t record_disabled;
+ cpumask_var_t cpumask;
struct mutex mutex;
@@ -303,7 +312,7 @@ struct ring_buffer_iter {
* check_pages - integrity check of buffer pages
* @cpu_buffer: CPU buffer with pages to test
*
- * As a safty measure we check to make sure the data pages have not
+ * As a safety measure we check to make sure the data pages have not
* been corrupted.
*/
static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
@@ -811,7 +820,7 @@ rb_event_index(struct ring_buffer_event *event)
return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
}
-static inline int
+static int
rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
@@ -825,7 +834,7 @@ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
rb_commit_index(cpu_buffer) == index;
}
-static inline void
+static void
rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
@@ -850,7 +859,7 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
local_set(&cpu_buffer->commit_page->page->commit, index);
}
-static inline void
+static void
rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
{
/*
@@ -896,7 +905,7 @@ static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read = 0;
}
-static inline void rb_inc_iter(struct ring_buffer_iter *iter)
+static void rb_inc_iter(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -926,7 +935,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
* and with this, we can determine what to place into the
* data field.
*/
-static inline void
+static void
rb_update_event(struct ring_buffer_event *event,
unsigned type, unsigned length)
{
@@ -938,15 +947,11 @@ rb_update_event(struct ring_buffer_event *event,
break;
case RINGBUF_TYPE_TIME_EXTEND:
- event->len =
- (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
- >> RB_ALIGNMENT_SHIFT;
+ event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
break;
case RINGBUF_TYPE_TIME_STAMP:
- event->len =
- (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
- >> RB_ALIGNMENT_SHIFT;
+ event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
break;
case RINGBUF_TYPE_DATA:
@@ -955,16 +960,14 @@ rb_update_event(struct ring_buffer_event *event,
event->len = 0;
event->array[0] = length;
} else
- event->len =
- (length + (RB_ALIGNMENT-1))
- >> RB_ALIGNMENT_SHIFT;
+ event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
break;
default:
BUG();
}
}
-static inline unsigned rb_calculate_event_length(unsigned length)
+static unsigned rb_calculate_event_length(unsigned length)
{
struct ring_buffer_event event; /* Used only for sizeof array */
@@ -990,6 +993,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer *buffer = cpu_buffer->buffer;
struct ring_buffer_event *event;
unsigned long flags;
+ bool lock_taken = false;
commit_page = cpu_buffer->commit_page;
/* we just need to protect against interrupts */
@@ -1003,7 +1007,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *next_page = tail_page;
local_irq_save(flags);
- __raw_spin_lock(&cpu_buffer->lock);
+ /*
+ * Since the write to the buffer is still not
+ * fully lockless, we must be careful with NMIs.
+ * The locks in the writers are taken when a write
+ * crosses to a new page. The locks protect against
+ * races with the readers (this will soon be fixed
+ * with a lockless solution).
+ *
+ * Because we can not protect against NMIs, and we
+ * want to keep traces reentrant, we need to manage
+ * what happens when we are in an NMI.
+ *
+ * NMIs can happen after we take the lock.
+ * If we are in an NMI, only take the lock
+ * if it is not already taken. Otherwise
+ * simply fail.
+ */
+ if (unlikely(in_nmi())) {
+ if (!__raw_spin_trylock(&cpu_buffer->lock))
+ goto out_reset;
+ } else
+ __raw_spin_lock(&cpu_buffer->lock);
+
+ lock_taken = true;
rb_inc_page(cpu_buffer, &next_page);
@@ -1012,7 +1039,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* we grabbed the lock before incrementing */
if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
- goto out_unlock;
+ goto out_reset;
/*
* If for some reason, we had an interrupt storm that made
@@ -1021,12 +1048,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
*/
if (unlikely(next_page == commit_page)) {
WARN_ON_ONCE(1);
- goto out_unlock;
+ goto out_reset;
}
if (next_page == head_page) {
if (!(buffer->flags & RB_FL_OVERWRITE))
- goto out_unlock;
+ goto out_reset;
/* tail_page has not moved yet? */
if (tail_page == cpu_buffer->tail_page) {
@@ -1100,12 +1127,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
return event;
- out_unlock:
+ out_reset:
/* reset write */
if (tail <= BUF_PAGE_SIZE)
local_set(&tail_page->write, tail);
- __raw_spin_unlock(&cpu_buffer->lock);
+ if (likely(lock_taken))
+ __raw_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
return NULL;
}
@@ -1265,7 +1293,6 @@ static DEFINE_PER_CPU(int, rb_need_resched);
* ring_buffer_lock_reserve - reserve a part of the buffer
* @buffer: the ring buffer to reserve from
* @length: the length of the data to reserve (excluding event header)
- * @flags: a pointer to save the interrupt flags
*
* Returns a reseverd event on the ring buffer to copy directly to.
* The user of this interface will need to get the body to write into
@@ -1278,9 +1305,7 @@ static DEFINE_PER_CPU(int, rb_need_resched);
* If NULL is returned, then nothing has been allocated or locked.
*/
struct ring_buffer_event *
-ring_buffer_lock_reserve(struct ring_buffer *buffer,
- unsigned long length,
- unsigned long *flags)
+ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
@@ -1347,15 +1372,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
* ring_buffer_unlock_commit - commit a reserved
* @buffer: The buffer to commit to
* @event: The event pointer to commit.
- * @flags: the interrupt flags received from ring_buffer_lock_reserve.
*
* This commits the data to the ring buffer, and releases any locks held.
*
* Must be paired with ring_buffer_lock_reserve.
*/
int ring_buffer_unlock_commit(struct ring_buffer *buffer,
- struct ring_buffer_event *event,
- unsigned long flags)
+ struct ring_buffer_event *event)
{
struct ring_buffer_per_cpu *cpu_buffer;
int cpu = raw_smp_processor_id();
@@ -1438,7 +1461,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
}
EXPORT_SYMBOL_GPL(ring_buffer_write);
-static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
{
struct buffer_page *reader = cpu_buffer->reader_page;
struct buffer_page *head = cpu_buffer->head_page;
@@ -2277,9 +2300,24 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
if (buffer_a->pages != buffer_b->pages)
return -EINVAL;
+ if (ring_buffer_flags != RB_BUFFERS_ON)
+ return -EAGAIN;
+
+ if (atomic_read(&buffer_a->record_disabled))
+ return -EAGAIN;
+
+ if (atomic_read(&buffer_b->record_disabled))
+ return -EAGAIN;
+
cpu_buffer_a = buffer_a->buffers[cpu];
cpu_buffer_b = buffer_b->buffers[cpu];
+ if (atomic_read(&cpu_buffer_a->record_disabled))
+ return -EAGAIN;
+
+ if (atomic_read(&cpu_buffer_b->record_disabled))
+ return -EAGAIN;
+
/*
* We can't do a synchronize_sched here because this
* function can be called in atomic context.
@@ -2303,13 +2341,14 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
- struct buffer_data_page *bpage)
+ struct buffer_data_page *bpage,
+ unsigned int offset)
{
struct ring_buffer_event *event;
unsigned long head;
__raw_spin_lock(&cpu_buffer->lock);
- for (head = 0; head < local_read(&bpage->commit);
+ for (head = offset; head < local_read(&bpage->commit);
head += rb_event_length(event)) {
event = __rb_data_page_index(bpage, head);
@@ -2377,12 +2416,12 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
* to swap with a page in the ring buffer.
*
* for example:
- * rpage = ring_buffer_alloc_page(buffer);
+ * rpage = ring_buffer_alloc_read_page(buffer);
* if (!rpage)
* return error;
* ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
- * if (ret)
- * process_page(rpage);
+ * if (ret >= 0)
+ * process_page(rpage, ret);
*
* When @full is set, the function will not return true unless
* the writer is off the reader page.
@@ -2393,8 +2432,8 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
* responsible for that.
*
* Returns:
- * 1 if data has been transferred
- * 0 if no data has been transferred.
+ * >=0 if data has been transferred, returns the offset of consumed data.
+ * <0 if no data has been transferred.
*/
int ring_buffer_read_page(struct ring_buffer *buffer,
void **data_page, int cpu, int full)
@@ -2403,7 +2442,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
struct ring_buffer_event *event;
struct buffer_data_page *bpage;
unsigned long flags;
- int ret = 0;
+ unsigned int read;
+ int ret = -1;
if (!data_page)
return 0;
@@ -2425,25 +2465,29 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
/* check for data */
if (!local_read(&cpu_buffer->reader_page->page->commit))
goto out;
+
+ read = cpu_buffer->reader_page->read;
/*
* If the writer is already off of the read page, then simply
* switch the read page with the given page. Otherwise
* we need to copy the data from the reader to the writer.
*/
if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
- unsigned int read = cpu_buffer->reader_page->read;
+ unsigned int commit = rb_page_commit(cpu_buffer->reader_page);
+ struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
if (full)
goto out;
/* The writer is still on the reader page, we must copy */
- bpage = cpu_buffer->reader_page->page;
- memcpy(bpage->data,
- cpu_buffer->reader_page->page->data + read,
- local_read(&bpage->commit) - read);
+ memcpy(bpage->data + read, rpage->data + read, commit - read);
/* consume what was read */
- cpu_buffer->reader_page += read;
+ cpu_buffer->reader_page->read = commit;
+ /* update bpage */
+ local_set(&bpage->commit, commit);
+ if (!read)
+ bpage->time_stamp = rpage->time_stamp;
} else {
/* swap the pages */
rb_init_page(bpage);
@@ -2452,10 +2496,10 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
cpu_buffer->reader_page->read = 0;
*data_page = bpage;
}
- ret = 1;
+ ret = read;
/* update the entry counter */
- rb_remove_entries(cpu_buffer, bpage);
+ rb_remove_entries(cpu_buffer, bpage, read);
out:
spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
@@ -2466,7 +2510,7 @@ static ssize_t
rb_simple_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- long *p = filp->private_data;
+ unsigned long *p = filp->private_data;
char buf[64];
int r;
@@ -2482,9 +2526,9 @@ static ssize_t
rb_simple_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- long *p = filp->private_data;
+ unsigned long *p = filp->private_data;
char buf[64];
- long val;
+ unsigned long val;
int ret;
if (cnt >= sizeof(buf))
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 17bb88d86ac..e1f3b99a2e5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -31,12 +31,14 @@
#include <linux/fs.h>
#include <linux/kprobes.h>
#include <linux/writeback.h>
+#include <linux/splice.h>
#include <linux/stacktrace.h>
#include <linux/ring_buffer.h>
#include <linux/irqflags.h>
#include "trace.h"
+#include "trace_output.h"
#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
@@ -52,6 +54,11 @@ unsigned long __read_mostly tracing_thresh;
*/
static bool __read_mostly tracing_selftest_running;
+/*
+ * If a tracer is running, we do not want to run SELFTEST.
+ */
+static bool __read_mostly tracing_selftest_disabled;
+
/* For tracers that don't implement custom flags */
static struct tracer_opt dummy_tracer_opt[] = {
{ }
@@ -73,7 +80,7 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
* of the tracer is successful. But that is the only place that sets
* this back to zero.
*/
-int tracing_disabled = 1;
+static int tracing_disabled = 1;
static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
@@ -109,14 +116,19 @@ static cpumask_var_t __read_mostly tracing_buffer_mask;
*/
int ftrace_dump_on_oops;
-static int tracing_set_tracer(char *buf);
+static int tracing_set_tracer(const char *buf);
+
+#define BOOTUP_TRACER_SIZE 100
+static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
+static char *default_bootup_tracer;
static int __init set_ftrace(char *str)
{
- tracing_set_tracer(str);
+ strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
+ default_bootup_tracer = bootup_tracer_buf;
return 1;
}
-__setup("ftrace", set_ftrace);
+__setup("ftrace=", set_ftrace);
static int __init set_ftrace_dump_on_oops(char *str)
{
@@ -186,9 +198,6 @@ int tracing_is_enabled(void)
return tracer_enabled;
}
-/* function tracing enabled */
-int ftrace_function_enabled;
-
/*
* trace_buf_size is the size in bytes that is allocated
* for a buffer. Note, the number of bytes is always rounded
@@ -229,7 +238,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
- TRACE_ITER_ANNOTATE;
+ TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
/**
* trace_wake_up - wake up tasks waiting for trace input
@@ -287,6 +296,7 @@ static const char *trace_options[] = {
"userstacktrace",
"sym-userobj",
"printk-msg-only",
+ "context-info",
NULL
};
@@ -326,133 +336,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
data->rt_priority = tsk->rt_priority;
/* record this tasks comm */
- tracing_record_cmdline(current);
-}
-
-/**
- * trace_seq_printf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-{
- int len = (PAGE_SIZE - 1) - s->len;
- va_list ap;
- int ret;
-
- if (!len)
- return 0;
-
- va_start(ap, fmt);
- ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
- va_end(ap);
-
- /* If we can't write it all, don't bother writing anything */
- if (ret >= len)
- return 0;
-
- s->len += ret;
-
- return len;
-}
-
-/**
- * trace_seq_puts - trace sequence printing of simple string
- * @s: trace sequence descriptor
- * @str: simple string to record
- *
- * The tracer may use either the sequence operations or its own
- * copy to user routines. This function records a simple string
- * into a special buffer (@s) for later retrieval by a sequencer
- * or other mechanism.
- */
-static int
-trace_seq_puts(struct trace_seq *s, const char *str)
-{
- int len = strlen(str);
-
- if (len > ((PAGE_SIZE - 1) - s->len))
- return 0;
-
- memcpy(s->buffer + s->len, str, len);
- s->len += len;
-
- return len;
-}
-
-static int
-trace_seq_putc(struct trace_seq *s, unsigned char c)
-{
- if (s->len >= (PAGE_SIZE - 1))
- return 0;
-
- s->buffer[s->len++] = c;
-
- return 1;
-}
-
-static int
-trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
-{
- if (len > ((PAGE_SIZE - 1) - s->len))
- return 0;
-
- memcpy(s->buffer + s->len, mem, len);
- s->len += len;
-
- return len;
-}
-
-#define MAX_MEMHEX_BYTES 8
-#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
-
-static int
-trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
-{
- unsigned char hex[HEX_CHARS];
- unsigned char *data = mem;
- int i, j;
-
-#ifdef __BIG_ENDIAN
- for (i = 0, j = 0; i < len; i++) {
-#else
- for (i = len-1, j = 0; i >= 0; i--) {
-#endif
- hex[j++] = hex_asc_hi(data[i]);
- hex[j++] = hex_asc_lo(data[i]);
- }
- hex[j++] = ' ';
-
- return trace_seq_putmem(s, hex, j);
-}
-
-static int
-trace_seq_path(struct trace_seq *s, struct path *path)
-{
- unsigned char *p;
-
- if (s->len >= (PAGE_SIZE - 1))
- return 0;
- p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
- if (!IS_ERR(p)) {
- p = mangle_path(s->buffer + s->len, p, "\n");
- if (p) {
- s->len = p - s->buffer;
- return 1;
- }
- } else {
- s->buffer[s->len++] = '?';
- return 1;
- }
-
- return 0;
+ tracing_record_cmdline(tsk);
}
static void
@@ -481,6 +365,25 @@ ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
return cnt;
}
+ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
+{
+ int len;
+ void *ret;
+
+ if (s->len <= s->readpos)
+ return -EBUSY;
+
+ len = s->len - s->readpos;
+ if (cnt > len)
+ cnt = len;
+ ret = memcpy(buf, s->buffer + s->readpos, cnt);
+ if (!ret)
+ return -EFAULT;
+
+ s->readpos += len;
+ return cnt;
+}
+
static void
trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
@@ -543,7 +446,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
ftrace_enable_cpu();
- WARN_ON_ONCE(ret);
+ WARN_ON_ONCE(ret && ret != -EAGAIN);
__update_max_tr(tr, tsk, cpu);
__raw_spin_unlock(&ftrace_max_lock);
@@ -556,6 +459,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
* Register a new plugin tracer.
*/
int register_tracer(struct tracer *type)
+__releases(kernel_lock)
+__acquires(kernel_lock)
{
struct tracer *t;
int len;
@@ -594,9 +499,12 @@ int register_tracer(struct tracer *type)
else
if (!type->flags->opts)
type->flags->opts = dummy_tracer_opt;
+ if (!type->wait_pipe)
+ type->wait_pipe = default_wait_pipe;
+
#ifdef CONFIG_FTRACE_STARTUP_TEST
- if (type->selftest) {
+ if (type->selftest && !tracing_selftest_disabled) {
struct tracer *saved_tracer = current_trace;
struct trace_array *tr = &global_trace;
int i;
@@ -638,8 +546,26 @@ int register_tracer(struct tracer *type)
out:
tracing_selftest_running = false;
mutex_unlock(&trace_types_lock);
- lock_kernel();
+ if (ret || !default_bootup_tracer)
+ goto out_unlock;
+
+ if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
+ goto out_unlock;
+
+ printk(KERN_INFO "Starting tracer '%s'\n", type->name);
+ /* Do we want this tracer to start on bootup? */
+ tracing_set_tracer(type->name);
+ default_bootup_tracer = NULL;
+ /* disable other selftests, since this will break it. */
+ tracing_selftest_disabled = 1;
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
+ type->name);
+#endif
+
+ out_unlock:
+ lock_kernel();
return ret;
}
@@ -658,6 +584,15 @@ void unregister_tracer(struct tracer *type)
found:
*t = (*t)->next;
+
+ if (type == current_trace && tracer_enabled) {
+ tracer_enabled = 0;
+ tracing_stop();
+ if (current_trace->stop)
+ current_trace->stop(&global_trace);
+ current_trace = &nop_trace;
+ }
+
if (strlen(type->name) != max_tracer_type_len)
goto out;
@@ -696,7 +631,7 @@ static int cmdline_idx;
static DEFINE_SPINLOCK(trace_cmdline_lock);
/* temporary disable recording */
-atomic_t trace_record_cmdline_disabled __read_mostly;
+static atomic_t trace_record_cmdline_disabled __read_mostly;
static void trace_init_cmdlines(void)
{
@@ -738,13 +673,12 @@ void tracing_start(void)
return;
spin_lock_irqsave(&tracing_start_lock, flags);
- if (--trace_stop_count)
- goto out;
-
- if (trace_stop_count < 0) {
- /* Someone screwed up their debugging */
- WARN_ON_ONCE(1);
- trace_stop_count = 0;
+ if (--trace_stop_count) {
+ if (trace_stop_count < 0) {
+ /* Someone screwed up their debugging */
+ WARN_ON_ONCE(1);
+ trace_stop_count = 0;
+ }
goto out;
}
@@ -876,78 +810,100 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
}
+struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
+ unsigned char type,
+ unsigned long len,
+ unsigned long flags, int pc)
+{
+ struct ring_buffer_event *event;
+
+ event = ring_buffer_lock_reserve(tr->buffer, len);
+ if (event != NULL) {
+ struct trace_entry *ent = ring_buffer_event_data(event);
+
+ tracing_generic_entry_update(ent, flags, pc);
+ ent->type = type;
+ }
+
+ return event;
+}
+static void ftrace_trace_stack(struct trace_array *tr,
+ unsigned long flags, int skip, int pc);
+static void ftrace_trace_userstack(struct trace_array *tr,
+ unsigned long flags, int pc);
+
+void trace_buffer_unlock_commit(struct trace_array *tr,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc)
+{
+ ring_buffer_unlock_commit(tr->buffer, event);
+
+ ftrace_trace_stack(tr, flags, 6, pc);
+ ftrace_trace_userstack(tr, flags, pc);
+ trace_wake_up();
+}
+
void
-trace_function(struct trace_array *tr, struct trace_array_cpu *data,
+trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
int pc)
{
struct ring_buffer_event *event;
struct ftrace_entry *entry;
- unsigned long irq_flags;
/* If we are reading the ring buffer, don't trace */
if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
return;
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
+ flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
- entry->ent.type = TRACE_FN;
entry->ip = ip;
entry->parent_ip = parent_ip;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ring_buffer_unlock_commit(tr->buffer, event);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static void __trace_graph_entry(struct trace_array *tr,
- struct trace_array_cpu *data,
struct ftrace_graph_ent *trace,
unsigned long flags,
int pc)
{
struct ring_buffer_event *event;
struct ftrace_graph_ent_entry *entry;
- unsigned long irq_flags;
if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
return;
- event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
+ sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
- entry->ent.type = TRACE_GRAPH_ENT;
entry->graph_ent = *trace;
- ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+ ring_buffer_unlock_commit(global_trace.buffer, event);
}
static void __trace_graph_return(struct trace_array *tr,
- struct trace_array_cpu *data,
struct ftrace_graph_ret *trace,
unsigned long flags,
int pc)
{
struct ring_buffer_event *event;
struct ftrace_graph_ret_entry *entry;
- unsigned long irq_flags;
if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
return;
- event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
+ sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
- entry->ent.type = TRACE_GRAPH_RET;
entry->ret = *trace;
- ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
+ ring_buffer_unlock_commit(global_trace.buffer, event);
}
#endif
@@ -957,31 +913,23 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
int pc)
{
if (likely(!atomic_read(&data->disabled)))
- trace_function(tr, data, ip, parent_ip, flags, pc);
+ trace_function(tr, ip, parent_ip, flags, pc);
}
-static void ftrace_trace_stack(struct trace_array *tr,
- struct trace_array_cpu *data,
- unsigned long flags,
- int skip, int pc)
+static void __ftrace_trace_stack(struct trace_array *tr,
+ unsigned long flags,
+ int skip, int pc)
{
#ifdef CONFIG_STACKTRACE
struct ring_buffer_event *event;
struct stack_entry *entry;
struct stack_trace trace;
- unsigned long irq_flags;
- if (!(trace_flags & TRACE_ITER_STACKTRACE))
- return;
-
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_STACK,
+ sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
- entry->ent.type = TRACE_STACK;
-
memset(&entry->caller, 0, sizeof(entry->caller));
trace.nr_entries = 0;
@@ -990,38 +938,43 @@ static void ftrace_trace_stack(struct trace_array *tr,
trace.entries = entry->caller;
save_stack_trace(&trace);
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ring_buffer_unlock_commit(tr->buffer, event);
#endif
}
+static void ftrace_trace_stack(struct trace_array *tr,
+ unsigned long flags,
+ int skip, int pc)
+{
+ if (!(trace_flags & TRACE_ITER_STACKTRACE))
+ return;
+
+ __ftrace_trace_stack(tr, flags, skip, pc);
+}
+
void __trace_stack(struct trace_array *tr,
- struct trace_array_cpu *data,
unsigned long flags,
- int skip)
+ int skip, int pc)
{
- ftrace_trace_stack(tr, data, flags, skip, preempt_count());
+ __ftrace_trace_stack(tr, flags, skip, pc);
}
static void ftrace_trace_userstack(struct trace_array *tr,
- struct trace_array_cpu *data,
- unsigned long flags, int pc)
+ unsigned long flags, int pc)
{
#ifdef CONFIG_STACKTRACE
struct ring_buffer_event *event;
struct userstack_entry *entry;
struct stack_trace trace;
- unsigned long irq_flags;
if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
+ sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
- entry->ent.type = TRACE_USER_STACK;
memset(&entry->caller, 0, sizeof(entry->caller));
@@ -1031,70 +984,58 @@ static void ftrace_trace_userstack(struct trace_array *tr,
trace.entries = entry->caller;
save_stack_trace_user(&trace);
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ring_buffer_unlock_commit(tr->buffer, event);
#endif
}
-void __trace_userstack(struct trace_array *tr,
- struct trace_array_cpu *data,
- unsigned long flags)
+#ifdef UNUSED
+static void __trace_userstack(struct trace_array *tr, unsigned long flags)
{
- ftrace_trace_userstack(tr, data, flags, preempt_count());
+ ftrace_trace_userstack(tr, flags, preempt_count());
}
+#endif /* UNUSED */
static void
-ftrace_trace_special(void *__tr, void *__data,
+ftrace_trace_special(void *__tr,
unsigned long arg1, unsigned long arg2, unsigned long arg3,
int pc)
{
struct ring_buffer_event *event;
- struct trace_array_cpu *data = __data;
struct trace_array *tr = __tr;
struct special_entry *entry;
- unsigned long irq_flags;
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
+ sizeof(*entry), 0, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, pc);
- entry->ent.type = TRACE_SPECIAL;
entry->arg1 = arg1;
entry->arg2 = arg2;
entry->arg3 = arg3;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
- ftrace_trace_stack(tr, data, irq_flags, 4, pc);
- ftrace_trace_userstack(tr, data, irq_flags, pc);
-
- trace_wake_up();
+ trace_buffer_unlock_commit(tr, event, 0, pc);
}
void
__trace_special(void *__tr, void *__data,
unsigned long arg1, unsigned long arg2, unsigned long arg3)
{
- ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
+ ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
}
void
tracing_sched_switch_trace(struct trace_array *tr,
- struct trace_array_cpu *data,
struct task_struct *prev,
struct task_struct *next,
unsigned long flags, int pc)
{
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
- unsigned long irq_flags;
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_CTX,
+ sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
- entry->ent.type = TRACE_CTX;
entry->prev_pid = prev->pid;
entry->prev_prio = prev->prio;
entry->prev_state = prev->state;
@@ -1102,29 +1043,23 @@ tracing_sched_switch_trace(struct trace_array *tr,
entry->next_prio = next->prio;
entry->next_state = next->state;
entry->next_cpu = task_cpu(next);
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
- ftrace_trace_stack(tr, data, flags, 5, pc);
- ftrace_trace_userstack(tr, data, flags, pc);
+ trace_buffer_unlock_commit(tr, event, flags, pc);
}
void
tracing_sched_wakeup_trace(struct trace_array *tr,
- struct trace_array_cpu *data,
struct task_struct *wakee,
struct task_struct *curr,
unsigned long flags, int pc)
{
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
- unsigned long irq_flags;
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
+ sizeof(*entry), flags, pc);
if (!event)
return;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
- entry->ent.type = TRACE_WAKE;
entry->prev_pid = curr->pid;
entry->prev_prio = curr->prio;
entry->prev_state = curr->state;
@@ -1132,11 +1067,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
entry->next_prio = wakee->prio;
entry->next_state = wakee->state;
entry->next_cpu = task_cpu(wakee);
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
- ftrace_trace_stack(tr, data, flags, 6, pc);
- ftrace_trace_userstack(tr, data, flags, pc);
- trace_wake_up();
+ ring_buffer_unlock_commit(tr->buffer, event);
+ ftrace_trace_stack(tr, flags, 6, pc);
+ ftrace_trace_userstack(tr, flags, pc);
}
void
@@ -1157,66 +1091,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
data = tr->data[cpu];
if (likely(atomic_inc_return(&data->disabled) == 1))
- ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
-
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
-}
-
-#ifdef CONFIG_FUNCTION_TRACER
-static void
-function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
-{
- struct trace_array *tr = &global_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
- long disabled;
- int cpu, resched;
- int pc;
-
- if (unlikely(!ftrace_function_enabled))
- return;
-
- pc = preempt_count();
- resched = ftrace_preempt_disable();
- local_save_flags(flags);
- cpu = raw_smp_processor_id();
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
-
- if (likely(disabled == 1))
- trace_function(tr, data, ip, parent_ip, flags, pc);
-
- atomic_dec(&data->disabled);
- ftrace_preempt_enable(resched);
-}
-
-static void
-function_trace_call(unsigned long ip, unsigned long parent_ip)
-{
- struct trace_array *tr = &global_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
- long disabled;
- int cpu;
- int pc;
-
- if (unlikely(!ftrace_function_enabled))
- return;
-
- /*
- * Need to use raw, since this must be called before the
- * recursive protection is performed.
- */
- local_irq_save(flags);
- cpu = raw_smp_processor_id();
- data = tr->data[cpu];
- disabled = atomic_inc_return(&data->disabled);
-
- if (likely(disabled == 1)) {
- pc = preempt_count();
- trace_function(tr, data, ip, parent_ip, flags, pc);
- }
+ ftrace_trace_special(tr, arg1, arg2, arg3, pc);
atomic_dec(&data->disabled);
local_irq_restore(flags);
@@ -1244,7 +1119,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
pc = preempt_count();
- __trace_graph_entry(tr, data, trace, flags, pc);
+ __trace_graph_entry(tr, trace, flags, pc);
}
/* Only do the atomic if it is not already set */
if (!test_tsk_trace_graph(current))
@@ -1270,7 +1145,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
pc = preempt_count();
- __trace_graph_return(tr, data, trace, flags, pc);
+ __trace_graph_return(tr, trace, flags, pc);
}
if (!trace->depth)
clear_tsk_trace_graph(current);
@@ -1279,31 +1154,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-static struct ftrace_ops trace_ops __read_mostly =
-{
- .func = function_trace_call,
-};
-
-void tracing_start_function_trace(void)
-{
- ftrace_function_enabled = 0;
-
- if (trace_flags & TRACE_ITER_PREEMPTONLY)
- trace_ops.func = function_trace_call_preempt_only;
- else
- trace_ops.func = function_trace_call;
-
- register_ftrace_function(&trace_ops);
- ftrace_function_enabled = 1;
-}
-
-void tracing_stop_function_trace(void)
-{
- ftrace_function_enabled = 0;
- unregister_ftrace_function(&trace_ops);
-}
-#endif
-
enum trace_file_type {
TRACE_FILE_LAT_FMT = 1,
TRACE_FILE_ANNOTATE = 2,
@@ -1376,8 +1226,8 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
}
/* Find the next real entry, without updating the iterator itself */
-static struct trace_entry *
-find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
+struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
+ int *ent_cpu, u64 *ent_ts)
{
return __find_next_entry(iter, ent_cpu, ent_ts);
}
@@ -1472,154 +1322,6 @@ static void s_stop(struct seq_file *m, void *p)
mutex_unlock(&trace_types_lock);
}
-#ifdef CONFIG_KRETPROBES
-static inline const char *kretprobed(const char *name)
-{
- static const char tramp_name[] = "kretprobe_trampoline";
- int size = sizeof(tramp_name);
-
- if (strncmp(tramp_name, name, size) == 0)
- return "[unknown/kretprobe'd]";
- return name;
-}
-#else
-static inline const char *kretprobed(const char *name)
-{
- return name;
-}
-#endif /* CONFIG_KRETPROBES */
-
-static int
-seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
-{
-#ifdef CONFIG_KALLSYMS
- char str[KSYM_SYMBOL_LEN];
- const char *name;
-
- kallsyms_lookup(address, NULL, NULL, NULL, str);
-
- name = kretprobed(str);
-
- return trace_seq_printf(s, fmt, name);
-#endif
- return 1;
-}
-
-static int
-seq_print_sym_offset(struct trace_seq *s, const char *fmt,
- unsigned long address)
-{
-#ifdef CONFIG_KALLSYMS
- char str[KSYM_SYMBOL_LEN];
- const char *name;
-
- sprint_symbol(str, address);
- name = kretprobed(str);
-
- return trace_seq_printf(s, fmt, name);
-#endif
- return 1;
-}
-
-#ifndef CONFIG_64BIT
-# define IP_FMT "%08lx"
-#else
-# define IP_FMT "%016lx"
-#endif
-
-int
-seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
-{
- int ret;
-
- if (!ip)
- return trace_seq_printf(s, "0");
-
- if (sym_flags & TRACE_ITER_SYM_OFFSET)
- ret = seq_print_sym_offset(s, "%s", ip);
- else
- ret = seq_print_sym_short(s, "%s", ip);
-
- if (!ret)
- return 0;
-
- if (sym_flags & TRACE_ITER_SYM_ADDR)
- ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
- return ret;
-}
-
-static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
- unsigned long ip, unsigned long sym_flags)
-{
- struct file *file = NULL;
- unsigned long vmstart = 0;
- int ret = 1;
-
- if (mm) {
- const struct vm_area_struct *vma;
-
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, ip);
- if (vma) {
- file = vma->vm_file;
- vmstart = vma->vm_start;
- }
- if (file) {
- ret = trace_seq_path(s, &file->f_path);
- if (ret)
- ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
- }
- up_read(&mm->mmap_sem);
- }
- if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
- ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
- return ret;
-}
-
-static int
-seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
- unsigned long sym_flags)
-{
- struct mm_struct *mm = NULL;
- int ret = 1;
- unsigned int i;
-
- if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
- struct task_struct *task;
- /*
- * we do the lookup on the thread group leader,
- * since individual threads might have already quit!
- */
- rcu_read_lock();
- task = find_task_by_vpid(entry->ent.tgid);
- if (task)
- mm = get_task_mm(task);
- rcu_read_unlock();
- }
-
- for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
- unsigned long ip = entry->caller[i];
-
- if (ip == ULONG_MAX || !ret)
- break;
- if (i && ret)
- ret = trace_seq_puts(s, " <- ");
- if (!ip) {
- if (ret)
- ret = trace_seq_puts(s, "??");
- continue;
- }
- if (!ret)
- break;
- if (ret)
- ret = seq_print_user_ip(s, mm, ip, sym_flags);
- }
-
- if (mm)
- mmput(mm);
- return ret;
-}
-
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n");
@@ -1704,103 +1406,6 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
seq_puts(m, "\n");
}
-static void
-lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
-{
- int hardirq, softirq;
- char *comm;
-
- comm = trace_find_cmdline(entry->pid);
-
- trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
- trace_seq_printf(s, "%3d", cpu);
- trace_seq_printf(s, "%c%c",
- (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
- (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
- ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
-
- hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
- softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
- if (hardirq && softirq) {
- trace_seq_putc(s, 'H');
- } else {
- if (hardirq) {
- trace_seq_putc(s, 'h');
- } else {
- if (softirq)
- trace_seq_putc(s, 's');
- else
- trace_seq_putc(s, '.');
- }
- }
-
- if (entry->preempt_count)
- trace_seq_printf(s, "%x", entry->preempt_count);
- else
- trace_seq_puts(s, ".");
-}
-
-unsigned long preempt_mark_thresh = 100;
-
-static void
-lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
- unsigned long rel_usecs)
-{
- trace_seq_printf(s, " %4lldus", abs_usecs);
- if (rel_usecs > preempt_mark_thresh)
- trace_seq_puts(s, "!: ");
- else if (rel_usecs > 1)
- trace_seq_puts(s, "+: ");
- else
- trace_seq_puts(s, " : ");
-}
-
-static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
-
-static int task_state_char(unsigned long state)
-{
- int bit = state ? __ffs(state) + 1 : 0;
-
- return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
-}
-
-/*
- * The message is supposed to contain an ending newline.
- * If the printing stops prematurely, try to add a newline of our own.
- */
-void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
-{
- struct trace_entry *ent;
- struct trace_field_cont *cont;
- bool ok = true;
-
- ent = peek_next_entry(iter, iter->cpu, NULL);
- if (!ent || ent->type != TRACE_CONT) {
- trace_seq_putc(s, '\n');
- return;
- }
-
- do {
- cont = (struct trace_field_cont *)ent;
- if (ok)
- ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
-
- ftrace_disable_cpu();
-
- if (iter->buffer_iter[iter->cpu])
- ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
- else
- ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
-
- ftrace_enable_cpu();
-
- ent = peek_next_entry(iter, iter->cpu, NULL);
- } while (ent && ent->type == TRACE_CONT);
-
- if (!ok)
- trace_seq_putc(s, '\n');
-}
-
static void test_cpu_buff_start(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
@@ -1818,138 +1423,31 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
}
-static enum print_line_t
-print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
+static enum print_line_t print_lat_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
- struct trace_entry *next_entry;
- unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
+ struct trace_event *event;
struct trace_entry *entry = iter->ent;
- unsigned long abs_usecs;
- unsigned long rel_usecs;
- u64 next_ts;
- char *comm;
- int S, T;
- int i;
-
- if (entry->type == TRACE_CONT)
- return TRACE_TYPE_HANDLED;
test_cpu_buff_start(iter);
- next_entry = find_next_entry(iter, NULL, &next_ts);
- if (!next_entry)
- next_ts = iter->ts;
- rel_usecs = ns2usecs(next_ts - iter->ts);
- abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
-
- if (verbose) {
- comm = trace_find_cmdline(entry->pid);
- trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
- " %ld.%03ldms (+%ld.%03ldms): ",
- comm,
- entry->pid, cpu, entry->flags,
- entry->preempt_count, trace_idx,
- ns2usecs(iter->ts),
- abs_usecs/1000,
- abs_usecs % 1000, rel_usecs/1000,
- rel_usecs % 1000);
- } else {
- lat_print_generic(s, entry, cpu);
- lat_print_timestamp(s, abs_usecs, rel_usecs);
- }
- switch (entry->type) {
- case TRACE_FN: {
- struct ftrace_entry *field;
-
- trace_assign_type(field, entry);
-
- seq_print_ip_sym(s, field->ip, sym_flags);
- trace_seq_puts(s, " (");
- seq_print_ip_sym(s, field->parent_ip, sym_flags);
- trace_seq_puts(s, ")\n");
- break;
- }
- case TRACE_CTX:
- case TRACE_WAKE: {
- struct ctx_switch_entry *field;
-
- trace_assign_type(field, entry);
-
- T = task_state_char(field->next_state);
- S = task_state_char(field->prev_state);
- comm = trace_find_cmdline(field->next_pid);
- trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
- field->prev_pid,
- field->prev_prio,
- S, entry->type == TRACE_CTX ? "==>" : " +",
- field->next_cpu,
- field->next_pid,
- field->next_prio,
- T, comm);
- break;
- }
- case TRACE_SPECIAL: {
- struct special_entry *field;
-
- trace_assign_type(field, entry);
-
- trace_seq_printf(s, "# %ld %ld %ld\n",
- field->arg1,
- field->arg2,
- field->arg3);
- break;
- }
- case TRACE_STACK: {
- struct stack_entry *field;
-
- trace_assign_type(field, entry);
-
- for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
- if (i)
- trace_seq_puts(s, " <= ");
- seq_print_ip_sym(s, field->caller[i], sym_flags);
- }
- trace_seq_puts(s, "\n");
- break;
- }
- case TRACE_PRINT: {
- struct print_entry *field;
-
- trace_assign_type(field, entry);
+ event = ftrace_find_event(entry->type);
- seq_print_ip_sym(s, field->ip, sym_flags);
- trace_seq_printf(s, ": %s", field->buf);
- if (entry->flags & TRACE_FLAG_CONT)
- trace_seq_print_cont(s, iter);
- break;
+ if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ if (!trace_print_lat_context(iter))
+ goto partial;
}
- case TRACE_BRANCH: {
- struct trace_branch *field;
- trace_assign_type(field, entry);
+ if (event)
+ return event->latency_trace(iter, sym_flags);
- trace_seq_printf(s, "[%s] %s:%s:%d\n",
- field->correct ? " ok " : " MISS ",
- field->func,
- field->file,
- field->line);
- break;
- }
- case TRACE_USER_STACK: {
- struct userstack_entry *field;
+ if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
+ goto partial;
- trace_assign_type(field, entry);
-
- seq_print_userip_objs(field, s, sym_flags);
- trace_seq_putc(s, '\n');
- break;
- }
- default:
- trace_seq_printf(s, "Unknown type %d\n", entry->type);
- }
return TRACE_TYPE_HANDLED;
+partial:
+ return TRACE_TYPE_PARTIAL_LINE;
}
static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1957,313 +1455,78 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
struct trace_seq *s = &iter->seq;
unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
struct trace_entry *entry;
- unsigned long usec_rem;
- unsigned long long t;
- unsigned long secs;
- char *comm;
- int ret;
- int S, T;
- int i;
+ struct trace_event *event;
entry = iter->ent;
- if (entry->type == TRACE_CONT)
- return TRACE_TYPE_HANDLED;
-
test_cpu_buff_start(iter);
- comm = trace_find_cmdline(iter->ent->pid);
-
- t = ns2usecs(iter->ts);
- usec_rem = do_div(t, 1000000ULL);
- secs = (unsigned long)t;
-
- ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- switch (entry->type) {
- case TRACE_FN: {
- struct ftrace_entry *field;
-
- trace_assign_type(field, entry);
-
- ret = seq_print_ip_sym(s, field->ip, sym_flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
- field->parent_ip) {
- ret = trace_seq_printf(s, " <-");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- ret = seq_print_ip_sym(s,
- field->parent_ip,
- sym_flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
- ret = trace_seq_printf(s, "\n");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- break;
- }
- case TRACE_CTX:
- case TRACE_WAKE: {
- struct ctx_switch_entry *field;
-
- trace_assign_type(field, entry);
-
- T = task_state_char(field->next_state);
- S = task_state_char(field->prev_state);
- ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
- field->prev_pid,
- field->prev_prio,
- S,
- entry->type == TRACE_CTX ? "==>" : " +",
- field->next_cpu,
- field->next_pid,
- field->next_prio,
- T);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- break;
- }
- case TRACE_SPECIAL: {
- struct special_entry *field;
-
- trace_assign_type(field, entry);
-
- ret = trace_seq_printf(s, "# %ld %ld %ld\n",
- field->arg1,
- field->arg2,
- field->arg3);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- break;
- }
- case TRACE_STACK: {
- struct stack_entry *field;
-
- trace_assign_type(field, entry);
-
- for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
- if (i) {
- ret = trace_seq_puts(s, " <= ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
- ret = seq_print_ip_sym(s, field->caller[i],
- sym_flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
- ret = trace_seq_puts(s, "\n");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- break;
- }
- case TRACE_PRINT: {
- struct print_entry *field;
-
- trace_assign_type(field, entry);
+ event = ftrace_find_event(entry->type);
- seq_print_ip_sym(s, field->ip, sym_flags);
- trace_seq_printf(s, ": %s", field->buf);
- if (entry->flags & TRACE_FLAG_CONT)
- trace_seq_print_cont(s, iter);
- break;
- }
- case TRACE_GRAPH_RET: {
- return print_graph_function(iter);
- }
- case TRACE_GRAPH_ENT: {
- return print_graph_function(iter);
+ if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ if (!trace_print_context(iter))
+ goto partial;
}
- case TRACE_BRANCH: {
- struct trace_branch *field;
- trace_assign_type(field, entry);
+ if (event)
+ return event->trace(iter, sym_flags);
- trace_seq_printf(s, "[%s] %s:%s:%d\n",
- field->correct ? " ok " : " MISS ",
- field->func,
- field->file,
- field->line);
- break;
- }
- case TRACE_USER_STACK: {
- struct userstack_entry *field;
+ if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
+ goto partial;
- trace_assign_type(field, entry);
-
- ret = seq_print_userip_objs(field, s, sym_flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_putc(s, '\n');
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- break;
- }
- }
return TRACE_TYPE_HANDLED;
+partial:
+ return TRACE_TYPE_PARTIAL_LINE;
}
static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry;
- int ret;
- int S, T;
+ struct trace_event *event;
entry = iter->ent;
- if (entry->type == TRACE_CONT)
- return TRACE_TYPE_HANDLED;
-
- ret = trace_seq_printf(s, "%d %d %llu ",
- entry->pid, iter->cpu, iter->ts);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- switch (entry->type) {
- case TRACE_FN: {
- struct ftrace_entry *field;
-
- trace_assign_type(field, entry);
-
- ret = trace_seq_printf(s, "%x %x\n",
- field->ip,
- field->parent_ip);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- break;
- }
- case TRACE_CTX:
- case TRACE_WAKE: {
- struct ctx_switch_entry *field;
-
- trace_assign_type(field, entry);
-
- T = task_state_char(field->next_state);
- S = entry->type == TRACE_WAKE ? '+' :
- task_state_char(field->prev_state);
- ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
- field->prev_pid,
- field->prev_prio,
- S,
- field->next_cpu,
- field->next_pid,
- field->next_prio,
- T);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- break;
+ if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ if (!trace_seq_printf(s, "%d %d %llu ",
+ entry->pid, iter->cpu, iter->ts))
+ goto partial;
}
- case TRACE_SPECIAL:
- case TRACE_USER_STACK:
- case TRACE_STACK: {
- struct special_entry *field;
- trace_assign_type(field, entry);
+ event = ftrace_find_event(entry->type);
+ if (event)
+ return event->raw(iter, 0);
- ret = trace_seq_printf(s, "# %ld %ld %ld\n",
- field->arg1,
- field->arg2,
- field->arg3);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- break;
- }
- case TRACE_PRINT: {
- struct print_entry *field;
+ if (!trace_seq_printf(s, "%d ?\n", entry->type))
+ goto partial;
- trace_assign_type(field, entry);
-
- trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
- if (entry->flags & TRACE_FLAG_CONT)
- trace_seq_print_cont(s, iter);
- break;
- }
- }
return TRACE_TYPE_HANDLED;
+partial:
+ return TRACE_TYPE_PARTIAL_LINE;
}
-#define SEQ_PUT_FIELD_RET(s, x) \
-do { \
- if (!trace_seq_putmem(s, &(x), sizeof(x))) \
- return 0; \
-} while (0)
-
-#define SEQ_PUT_HEX_FIELD_RET(s, x) \
-do { \
- BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
- if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
- return 0; \
-} while (0)
-
static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
unsigned char newline = '\n';
struct trace_entry *entry;
- int S, T;
+ struct trace_event *event;
entry = iter->ent;
- if (entry->type == TRACE_CONT)
- return TRACE_TYPE_HANDLED;
-
- SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
- SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
- SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
-
- switch (entry->type) {
- case TRACE_FN: {
- struct ftrace_entry *field;
-
- trace_assign_type(field, entry);
-
- SEQ_PUT_HEX_FIELD_RET(s, field->ip);
- SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
- break;
- }
- case TRACE_CTX:
- case TRACE_WAKE: {
- struct ctx_switch_entry *field;
-
- trace_assign_type(field, entry);
-
- T = task_state_char(field->next_state);
- S = entry->type == TRACE_WAKE ? '+' :
- task_state_char(field->prev_state);
- SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
- SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
- SEQ_PUT_HEX_FIELD_RET(s, S);
- SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
- SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
- SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
- SEQ_PUT_HEX_FIELD_RET(s, T);
- break;
+ if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
+ SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
+ SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
}
- case TRACE_SPECIAL:
- case TRACE_USER_STACK:
- case TRACE_STACK: {
- struct special_entry *field;
- trace_assign_type(field, entry);
-
- SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
- SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
- SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
- break;
- }
+ event = ftrace_find_event(entry->type);
+ if (event) {
+ enum print_line_t ret = event->hex(iter, 0);
+ if (ret != TRACE_TYPE_HANDLED)
+ return ret;
}
+
SEQ_PUT_FIELD_RET(s, newline);
return TRACE_TYPE_HANDLED;
@@ -2278,13 +1541,10 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
trace_assign_type(field, entry);
- ret = trace_seq_printf(s, field->buf);
+ ret = trace_seq_printf(s, "%s", field->buf);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- if (entry->flags & TRACE_FLAG_CONT)
- trace_seq_print_cont(s, iter);
-
return TRACE_TYPE_HANDLED;
}
@@ -2292,53 +1552,18 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
{
struct trace_seq *s = &iter->seq;
struct trace_entry *entry;
+ struct trace_event *event;
entry = iter->ent;
- if (entry->type == TRACE_CONT)
- return TRACE_TYPE_HANDLED;
-
- SEQ_PUT_FIELD_RET(s, entry->pid);
- SEQ_PUT_FIELD_RET(s, entry->cpu);
- SEQ_PUT_FIELD_RET(s, iter->ts);
-
- switch (entry->type) {
- case TRACE_FN: {
- struct ftrace_entry *field;
-
- trace_assign_type(field, entry);
-
- SEQ_PUT_FIELD_RET(s, field->ip);
- SEQ_PUT_FIELD_RET(s, field->parent_ip);
- break;
+ if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
+ SEQ_PUT_FIELD_RET(s, entry->pid);
+ SEQ_PUT_FIELD_RET(s, iter->cpu);
+ SEQ_PUT_FIELD_RET(s, iter->ts);
}
- case TRACE_CTX: {
- struct ctx_switch_entry *field;
-
- trace_assign_type(field, entry);
- SEQ_PUT_FIELD_RET(s, field->prev_pid);
- SEQ_PUT_FIELD_RET(s, field->prev_prio);
- SEQ_PUT_FIELD_RET(s, field->prev_state);
- SEQ_PUT_FIELD_RET(s, field->next_pid);
- SEQ_PUT_FIELD_RET(s, field->next_prio);
- SEQ_PUT_FIELD_RET(s, field->next_state);
- break;
- }
- case TRACE_SPECIAL:
- case TRACE_USER_STACK:
- case TRACE_STACK: {
- struct special_entry *field;
-
- trace_assign_type(field, entry);
-
- SEQ_PUT_FIELD_RET(s, field->arg1);
- SEQ_PUT_FIELD_RET(s, field->arg2);
- SEQ_PUT_FIELD_RET(s, field->arg3);
- break;
- }
- }
- return 1;
+ event = ftrace_find_event(entry->type);
+ return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
}
static int trace_empty(struct trace_iterator *iter)
@@ -2383,7 +1608,7 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
return print_raw_fmt(iter);
if (iter->iter_flags & TRACE_FILE_LAT_FMT)
- return print_lat_fmt(iter, iter->idx, iter->cpu);
+ return print_lat_fmt(iter);
return print_trace_fmt(iter);
}
@@ -2505,7 +1730,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
return 0;
}
-int tracing_release(struct inode *inode, struct file *file)
+static int tracing_release(struct inode *inode, struct file *file)
{
struct seq_file *m = (struct seq_file *)file->private_data;
struct trace_iterator *iter = m->private;
@@ -2748,7 +1973,7 @@ tracing_trace_options_read(struct file *filp, char __user *ubuf,
struct tracer_opt *trace_opts = current_trace->flags->opts;
- /* calulate max size */
+ /* calculate max size */
for (i = 0; trace_options[i]; i++) {
len += strlen(trace_options[i]);
len += 3; /* "no" and space */
@@ -2930,7 +2155,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
{
struct trace_array *tr = filp->private_data;
char buf[64];
- long val;
+ unsigned long val;
int ret;
if (cnt >= sizeof(buf))
@@ -2985,7 +2210,13 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static int tracing_set_tracer(char *buf)
+int tracer_init(struct tracer *t, struct trace_array *tr)
+{
+ tracing_reset_online_cpus(tr);
+ return t->init(tr);
+}
+
+static int tracing_set_tracer(const char *buf)
{
struct trace_array *tr = &global_trace;
struct tracer *t;
@@ -3009,7 +2240,7 @@ static int tracing_set_tracer(char *buf)
current_trace = t;
if (t->init) {
- ret = t->init(tr);
+ ret = tracer_init(t, tr);
if (ret)
goto out;
}
@@ -3072,9 +2303,9 @@ static ssize_t
tracing_max_lat_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- long *ptr = filp->private_data;
+ unsigned long *ptr = filp->private_data;
char buf[64];
- long val;
+ unsigned long val;
int ret;
if (cnt >= sizeof(buf))
@@ -3167,67 +2398,60 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
}
}
-/*
- * Consumer reader.
- */
-static ssize_t
-tracing_read_pipe(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
+
+void default_wait_pipe(struct trace_iterator *iter)
{
- struct trace_iterator *iter = filp->private_data;
- ssize_t sret;
+ DEFINE_WAIT(wait);
- /* return any leftover data */
- sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
- if (sret != -EBUSY)
- return sret;
+ prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
- trace_seq_reset(&iter->seq);
+ if (trace_empty(iter))
+ schedule();
- mutex_lock(&trace_types_lock);
- if (iter->trace->read) {
- sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
- if (sret)
- goto out;
- }
+ finish_wait(&trace_wait, &wait);
+}
+
+/*
+ * This is a make-shift waitqueue.
+ * A tracer might use this callback on some rare cases:
+ *
+ * 1) the current tracer might hold the runqueue lock when it wakes up
+ * a reader, hence a deadlock (sched, function, and function graph tracers)
+ * 2) the function tracers, trace all functions, we don't want
+ * the overhead of calling wake_up and friends
+ * (and tracing them too)
+ *
+ * Anyway, this is really very primitive wakeup.
+ */
+void poll_wait_pipe(struct trace_iterator *iter)
+{
+ set_current_state(TASK_INTERRUPTIBLE);
+ /* sleep for 100 msecs, and try again. */
+ schedule_timeout(HZ / 10);
+}
+
+/* Must be called with trace_types_lock mutex held. */
+static int tracing_wait_pipe(struct file *filp)
+{
+ struct trace_iterator *iter = filp->private_data;
-waitagain:
- sret = 0;
while (trace_empty(iter)) {
if ((filp->f_flags & O_NONBLOCK)) {
- sret = -EAGAIN;
- goto out;
+ return -EAGAIN;
}
- /*
- * This is a make-shift waitqueue. The reason we don't use
- * an actual wait queue is because:
- * 1) we only ever have one waiter
- * 2) the tracing, traces all functions, we don't want
- * the overhead of calling wake_up and friends
- * (and tracing them too)
- * Anyway, this is really very primitive wakeup.
- */
- set_current_state(TASK_INTERRUPTIBLE);
- iter->tr->waiter = current;
-
mutex_unlock(&trace_types_lock);
- /* sleep for 100 msecs, and try again. */
- schedule_timeout(HZ/10);
+ iter->trace->wait_pipe(iter);
mutex_lock(&trace_types_lock);
- iter->tr->waiter = NULL;
-
- if (signal_pending(current)) {
- sret = -EINTR;
- goto out;
- }
+ if (signal_pending(current))
+ return -EINTR;
if (iter->trace != current_trace)
- goto out;
+ return 0;
/*
* We block until we read something and tracing is disabled.
@@ -3240,13 +2464,45 @@ waitagain:
*/
if (!tracer_enabled && iter->pos)
break;
+ }
+
+ return 1;
+}
+
+/*
+ * Consumer reader.
+ */
+static ssize_t
+tracing_read_pipe(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_iterator *iter = filp->private_data;
+ ssize_t sret;
- continue;
+ /* return any leftover data */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (sret != -EBUSY)
+ return sret;
+
+ trace_seq_reset(&iter->seq);
+
+ mutex_lock(&trace_types_lock);
+ if (iter->trace->read) {
+ sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
+ if (sret)
+ goto out;
}
+waitagain:
+ sret = tracing_wait_pipe(filp);
+ if (sret <= 0)
+ goto out;
+
/* stop when tracing is finished */
- if (trace_empty(iter))
+ if (trace_empty(iter)) {
+ sret = 0;
goto out;
+ }
if (cnt >= PAGE_SIZE)
cnt = PAGE_SIZE - 1;
@@ -3267,8 +2523,8 @@ waitagain:
iter->seq.len = len;
break;
}
-
- trace_consume(iter);
+ if (ret != TRACE_TYPE_NO_CONSUME)
+ trace_consume(iter);
if (iter->seq.len >= cnt)
break;
@@ -3292,6 +2548,134 @@ out:
return sret;
}
+static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ __free_page(buf->page);
+}
+
+static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
+ unsigned int idx)
+{
+ __free_page(spd->pages[idx]);
+}
+
+static struct pipe_buf_operations tracing_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = tracing_pipe_buf_release,
+ .steal = generic_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
+static size_t
+tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
+{
+ size_t count;
+ int ret;
+
+ /* Seq buffer is page-sized, exactly what we need. */
+ for (;;) {
+ count = iter->seq.len;
+ ret = print_trace_line(iter);
+ count = iter->seq.len - count;
+ if (rem < count) {
+ rem = 0;
+ iter->seq.len -= count;
+ break;
+ }
+ if (ret == TRACE_TYPE_PARTIAL_LINE) {
+ iter->seq.len -= count;
+ break;
+ }
+
+ trace_consume(iter);
+ rem -= count;
+ if (!find_next_entry_inc(iter)) {
+ rem = 0;
+ iter->ent = NULL;
+ break;
+ }
+ }
+
+ return rem;
+}
+
+static ssize_t tracing_splice_read_pipe(struct file *filp,
+ loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len,
+ unsigned int flags)
+{
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+ struct trace_iterator *iter = filp->private_data;
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .nr_pages = 0, /* This gets updated below. */
+ .flags = flags,
+ .ops = &tracing_pipe_buf_ops,
+ .spd_release = tracing_spd_release_pipe,
+ };
+ ssize_t ret;
+ size_t rem;
+ unsigned int i;
+
+ mutex_lock(&trace_types_lock);
+
+ if (iter->trace->splice_read) {
+ ret = iter->trace->splice_read(iter, filp,
+ ppos, pipe, len, flags);
+ if (ret)
+ goto out_err;
+ }
+
+ ret = tracing_wait_pipe(filp);
+ if (ret <= 0)
+ goto out_err;
+
+ if (!iter->ent && !find_next_entry_inc(iter)) {
+ ret = -EFAULT;
+ goto out_err;
+ }
+
+ /* Fill as many pages as possible. */
+ for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i])
+ break;
+
+ rem = tracing_fill_pipe_page(rem, iter);
+
+ /* Copy the data into the page, so we can start over. */
+ ret = trace_seq_to_buffer(&iter->seq,
+ page_address(pages[i]),
+ iter->seq.len);
+ if (ret < 0) {
+ __free_page(pages[i]);
+ break;
+ }
+ partial[i].offset = 0;
+ partial[i].len = iter->seq.len;
+
+ trace_seq_reset(&iter->seq);
+ }
+
+ mutex_unlock(&trace_types_lock);
+
+ spd.nr_pages = i;
+
+ return splice_to_pipe(pipe, &spd);
+
+out_err:
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+}
+
static ssize_t
tracing_entries_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
@@ -3455,6 +2839,7 @@ static struct file_operations tracing_pipe_fops = {
.open = tracing_open_pipe,
.poll = tracing_poll_pipe,
.read = tracing_read_pipe,
+ .splice_read = tracing_splice_read_pipe,
.release = tracing_release_pipe,
};
@@ -3653,18 +3038,16 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
trace_buf[len] = 0;
size = sizeof(*entry) + len + 1;
- event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
if (!event)
goto out_unlock;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, irq_flags, pc);
- entry->ent.type = TRACE_PRINT;
entry->ip = ip;
entry->depth = depth;
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = 0;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ring_buffer_unlock_commit(tr->buffer, event);
out_unlock:
spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
@@ -3691,6 +3074,15 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
}
EXPORT_SYMBOL_GPL(__ftrace_printk);
+int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
+{
+ if (!(trace_flags & TRACE_ITER_PRINTK))
+ return 0;
+
+ return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
+}
+EXPORT_SYMBOL_GPL(__ftrace_vprintk);
+
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
@@ -3871,14 +3263,10 @@ __init static int tracer_alloc_buffers(void)
trace_init_cmdlines();
register_tracer(&nop_trace);
+ current_trace = &nop_trace;
#ifdef CONFIG_BOOT_TRACER
register_tracer(&boot_tracer);
- current_trace = &boot_tracer;
- current_trace->init(&global_trace);
-#else
- current_trace = &nop_trace;
#endif
-
/* All seems OK, enable tracing */
tracing_disabled = 0;
@@ -3895,5 +3283,26 @@ out_free_buffer_mask:
out:
return ret;
}
+
+__init static int clear_boot_tracer(void)
+{
+ /*
+ * The default tracer at boot buffer is an init section.
+ * This function is called in lateinit. If we did not
+ * find the boot tracer, then clear it out, to prevent
+ * later registration from accessing the buffer that is
+ * about to be freed.
+ */
+ if (!default_bootup_tracer)
+ return 0;
+
+ printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
+ default_bootup_tracer);
+ default_bootup_tracer = NULL;
+
+ return 0;
+}
+
early_initcall(tracer_alloc_buffers);
fs_initcall(tracer_init_debugfs);
+late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381bfd9..eed732c151f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,8 @@
#include <linux/mmiotrace.h>
#include <linux/ftrace.h>
#include <trace/boot.h>
+#include <trace/kmemtrace.h>
+#include <trace/power.h>
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -16,7 +18,6 @@ enum trace_type {
TRACE_FN,
TRACE_CTX,
TRACE_WAKE,
- TRACE_CONT,
TRACE_STACK,
TRACE_PRINT,
TRACE_SPECIAL,
@@ -29,9 +30,12 @@ enum trace_type {
TRACE_GRAPH_ENT,
TRACE_USER_STACK,
TRACE_HW_BRANCHES,
+ TRACE_KMEM_ALLOC,
+ TRACE_KMEM_FREE,
TRACE_POWER,
+ TRACE_BLK,
- __TRACE_LAST_TYPE
+ __TRACE_LAST_TYPE,
};
/*
@@ -42,7 +46,6 @@ enum trace_type {
*/
struct trace_entry {
unsigned char type;
- unsigned char cpu;
unsigned char flags;
unsigned char preempt_count;
int pid;
@@ -60,13 +63,13 @@ struct ftrace_entry {
/* Function call entry */
struct ftrace_graph_ent_entry {
- struct trace_entry ent;
+ struct trace_entry ent;
struct ftrace_graph_ent graph_ent;
};
/* Function return entry */
struct ftrace_graph_ret_entry {
- struct trace_entry ent;
+ struct trace_entry ent;
struct ftrace_graph_ret ret;
};
extern struct tracer boot_tracer;
@@ -170,6 +173,24 @@ struct trace_power {
struct power_trace state_data;
};
+struct kmemtrace_alloc_entry {
+ struct trace_entry ent;
+ enum kmemtrace_type_id type_id;
+ unsigned long call_site;
+ const void *ptr;
+ size_t bytes_req;
+ size_t bytes_alloc;
+ gfp_t gfp_flags;
+ int node;
+};
+
+struct kmemtrace_free_entry {
+ struct trace_entry ent;
+ enum kmemtrace_type_id type_id;
+ unsigned long call_site;
+ const void *ptr;
+};
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
@@ -178,7 +199,6 @@ struct trace_power {
* NEED_RESCED - reschedule is requested
* HARDIRQ - inside an interrupt handler
* SOFTIRQ - inside a softirq handler
- * CONT - multiple entries hold the trace item
*/
enum trace_flag_type {
TRACE_FLAG_IRQS_OFF = 0x01,
@@ -186,7 +206,6 @@ enum trace_flag_type {
TRACE_FLAG_NEED_RESCHED = 0x04,
TRACE_FLAG_HARDIRQ = 0x08,
TRACE_FLAG_SOFTIRQ = 0x10,
- TRACE_FLAG_CONT = 0x20,
};
#define TRACE_BUF_SIZE 1024
@@ -262,7 +281,6 @@ extern void __ftrace_bad_type(void);
do { \
IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
- IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
@@ -280,6 +298,10 @@ extern void __ftrace_bad_type(void);
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+ IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
+ TRACE_KMEM_ALLOC); \
+ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
+ TRACE_KMEM_FREE); \
__ftrace_bad_type(); \
} while (0)
@@ -287,7 +309,8 @@ extern void __ftrace_bad_type(void);
enum print_line_t {
TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
TRACE_TYPE_HANDLED = 1,
- TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
+ TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
+ TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
};
@@ -313,22 +336,45 @@ struct tracer_flags {
/* Makes more easy to define a tracer opt */
#define TRACER_OPT(s, b) .name = #s, .bit = b
-/*
- * A specific tracer, represented by methods that operate on a trace array:
+
+/**
+ * struct tracer - a specific tracer and its callbacks to interact with debugfs
+ * @name: the name chosen to select it on the available_tracers file
+ * @init: called when one switches to this tracer (echo name > current_tracer)
+ * @reset: called when one switches to another tracer
+ * @start: called when tracing is unpaused (echo 1 > tracing_enabled)
+ * @stop: called when tracing is paused (echo 0 > tracing_enabled)
+ * @open: called when the trace file is opened
+ * @pipe_open: called when the trace_pipe file is opened
+ * @wait_pipe: override how the user waits for traces on trace_pipe
+ * @close: called when the trace file is released
+ * @read: override the default read callback on trace_pipe
+ * @splice_read: override the default splice_read callback on trace_pipe
+ * @selftest: selftest to run on boot (see trace_selftest.c)
+ * @print_headers: override the first lines that describe your columns
+ * @print_line: callback that prints a trace
+ * @set_flag: signals one of your private flags changed (trace_options file)
+ * @flags: your private flags
*/
struct tracer {
const char *name;
- /* Your tracer should raise a warning if init fails */
int (*init)(struct trace_array *tr);
void (*reset)(struct trace_array *tr);
void (*start)(struct trace_array *tr);
void (*stop)(struct trace_array *tr);
void (*open)(struct trace_iterator *iter);
void (*pipe_open)(struct trace_iterator *iter);
+ void (*wait_pipe)(struct trace_iterator *iter);
void (*close)(struct trace_iterator *iter);
ssize_t (*read)(struct trace_iterator *iter,
struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos);
+ ssize_t (*splice_read)(struct trace_iterator *iter,
+ struct file *filp,
+ loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len,
+ unsigned int flags);
#ifdef CONFIG_FTRACE_STARTUP_TEST
int (*selftest)(struct tracer *trace,
struct trace_array *tr);
@@ -340,6 +386,7 @@ struct tracer {
struct tracer *next;
int print_max;
struct tracer_flags *flags;
+ struct tracer_stat *stats;
};
struct trace_seq {
@@ -371,6 +418,7 @@ struct trace_iterator {
cpumask_var_t started;
};
+int tracer_init(struct tracer *t, struct trace_array *tr);
int tracing_is_enabled(void);
void trace_wake_up(void);
void tracing_reset(struct trace_array *tr, int cpu);
@@ -379,26 +427,42 @@ int tracing_open_generic(struct inode *inode, struct file *filp);
struct dentry *tracing_init_dentry(void);
void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
+struct ring_buffer_event;
+
+struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
+ unsigned char type,
+ unsigned long len,
+ unsigned long flags,
+ int pc);
+void trace_buffer_unlock_commit(struct trace_array *tr,
+ struct ring_buffer_event *event,
+ unsigned long flags, int pc);
+
struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
struct trace_array_cpu *data);
+
+struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
+ int *ent_cpu, u64 *ent_ts);
+
void tracing_generic_entry_update(struct trace_entry *entry,
unsigned long flags,
int pc);
+void default_wait_pipe(struct trace_iterator *iter);
+void poll_wait_pipe(struct trace_iterator *iter);
+
void ftrace(struct trace_array *tr,
struct trace_array_cpu *data,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags, int pc);
void tracing_sched_switch_trace(struct trace_array *tr,
- struct trace_array_cpu *data,
struct task_struct *prev,
struct task_struct *next,
unsigned long flags, int pc);
void tracing_record_cmdline(struct task_struct *tsk);
void tracing_sched_wakeup_trace(struct trace_array *tr,
- struct trace_array_cpu *data,
struct task_struct *wakee,
struct task_struct *cur,
unsigned long flags, int pc);
@@ -408,14 +472,12 @@ void trace_special(struct trace_array *tr,
unsigned long arg2,
unsigned long arg3, int pc);
void trace_function(struct trace_array *tr,
- struct trace_array_cpu *data,
unsigned long ip,
unsigned long parent_ip,
unsigned long flags, int pc);
void trace_graph_return(struct ftrace_graph_ret *trace);
int trace_graph_entry(struct ftrace_graph_ent *trace);
-void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
@@ -434,15 +496,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
void update_max_tr_single(struct trace_array *tr,
struct task_struct *tsk, int cpu);
-extern cycle_t ftrace_now(int cpu);
+void __trace_stack(struct trace_array *tr,
+ unsigned long flags,
+ int skip, int pc);
-#ifdef CONFIG_FUNCTION_TRACER
-void tracing_start_function_trace(void);
-void tracing_stop_function_trace(void);
-#else
-# define tracing_start_function_trace() do { } while (0)
-# define tracing_stop_function_trace() do { } while (0)
-#endif
+extern cycle_t ftrace_now(int cpu);
#ifdef CONFIG_CONTEXT_SWITCH_TRACER
typedef void
@@ -456,10 +514,10 @@ struct tracer_switch_ops {
void *private;
struct tracer_switch_ops *next;
};
-
-char *trace_find_cmdline(int pid);
#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
+extern char *trace_find_cmdline(int pid);
+
#ifdef CONFIG_DYNAMIC_FTRACE
extern unsigned long ftrace_update_tot_cnt;
#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
@@ -469,6 +527,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
#ifdef CONFIG_FTRACE_STARTUP_TEST
extern int trace_selftest_startup_function(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_function_graph(struct tracer *trace,
+ struct trace_array *tr);
extern int trace_selftest_startup_irqsoff(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_preemptoff(struct tracer *trace,
@@ -488,15 +548,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
-extern void trace_seq_print_cont(struct trace_seq *s,
- struct trace_iterator *iter);
-
-extern int
-seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
- unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
- size_t cnt);
extern long ns2usecs(cycle_t nsec);
extern int
trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
@@ -580,7 +631,8 @@ enum trace_iterator_flags {
TRACE_ITER_ANNOTATE = 0x2000,
TRACE_ITER_USERSTACKTRACE = 0x4000,
TRACE_ITER_SYM_USEROBJ = 0x8000,
- TRACE_ITER_PRINTK_MSGONLY = 0x10000
+ TRACE_ITER_PRINTK_MSGONLY = 0x10000,
+ TRACE_ITER_CONTEXT_INFO = 0x20000 /* Print pid/cpu/time */
};
/*
@@ -601,12 +653,12 @@ extern struct tracer nop_trace;
* preempt_enable (after a disable), a schedule might take place
* causing an infinite recursion.
*
- * To prevent this, we read the need_recshed flag before
+ * To prevent this, we read the need_resched flag before
* disabling preemption. When we want to enable preemption we
* check the flag, if it is set, then we call preempt_enable_no_resched.
* Otherwise, we call preempt_enable.
*
- * The rational for doing the above is that if need resched is set
+ * The rational for doing the above is that if need_resched is set
* and we have yet to reschedule, we are either in an atomic location
* (where we do not need to check for scheduling) or we are inside
* the scheduler and do not want to resched.
@@ -627,7 +679,7 @@ static inline int ftrace_preempt_disable(void)
*
* This is a scheduler safe way to enable preemption and not miss
* any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we were either inside an atomic or
+ * If resched is set, then we are either inside an atomic or
* are inside the scheduler (we would have already scheduled
* otherwise). In this case, we do not want to call normal
* preempt_enable, but preempt_enable_no_resched instead.
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 366c8c333e1..7a30fc4c364 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -11,6 +11,7 @@
#include <linux/kallsyms.h>
#include "trace.h"
+#include "trace_output.h"
static struct trace_array *boot_trace;
static bool pre_initcalls_finished;
@@ -27,13 +28,13 @@ void start_boot_trace(void)
void enable_boot_trace(void)
{
- if (pre_initcalls_finished)
+ if (boot_trace && pre_initcalls_finished)
tracing_start_sched_switch_record();
}
void disable_boot_trace(void)
{
- if (pre_initcalls_finished)
+ if (boot_trace && pre_initcalls_finished)
tracing_stop_sched_switch_record();
}
@@ -42,6 +43,9 @@ static int boot_trace_init(struct trace_array *tr)
int cpu;
boot_trace = tr;
+ if (!tr)
+ return 0;
+
for_each_cpu(cpu, cpu_possible_mask)
tracing_reset(tr, cpu);
@@ -128,10 +132,9 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
{
struct ring_buffer_event *event;
struct trace_boot_call *entry;
- unsigned long irq_flags;
struct trace_array *tr = boot_trace;
- if (!pre_initcalls_finished)
+ if (!tr || !pre_initcalls_finished)
return;
/* Get its name now since this function could
@@ -140,18 +143,13 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
sprint_symbol(bt->func, (unsigned long)fn);
preempt_disable();
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
+ sizeof(*entry), 0, 0);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, 0);
- entry->ent.type = TRACE_BOOT_CALL;
entry->boot_call = *bt;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-
- trace_wake_up();
-
+ trace_buffer_unlock_commit(tr, event, 0, 0);
out:
preempt_enable();
}
@@ -160,27 +158,21 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
{
struct ring_buffer_event *event;
struct trace_boot_ret *entry;
- unsigned long irq_flags;
struct trace_array *tr = boot_trace;
- if (!pre_initcalls_finished)
+ if (!tr || !pre_initcalls_finished)
return;
sprint_symbol(bt->func, (unsigned long)fn);
preempt_disable();
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
+ sizeof(*entry), 0, 0);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, 0);
- entry->ent.type = TRACE_BOOT_RET;
entry->boot_ret = *bt;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-
- trace_wake_up();
-
+ trace_buffer_unlock_commit(tr, event, 0, 0);
out:
preempt_enable();
}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 6c00feb3bac..c2e68d440c4 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -14,12 +14,17 @@
#include <linux/hash.h>
#include <linux/fs.h>
#include <asm/local.h>
+
#include "trace.h"
+#include "trace_stat.h"
+#include "trace_output.h"
#ifdef CONFIG_BRANCH_TRACER
+static struct tracer branch_trace;
static int branch_tracing_enabled __read_mostly;
static DEFINE_MUTEX(branch_tracing_mutex);
+
static struct trace_array *branch_tracer;
static void
@@ -28,7 +33,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
struct trace_array *tr = branch_tracer;
struct ring_buffer_event *event;
struct trace_branch *entry;
- unsigned long flags, irq_flags;
+ unsigned long flags;
int cpu, pc;
const char *p;
@@ -47,15 +52,13 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
goto out;
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ pc = preempt_count();
+ event = trace_buffer_lock_reserve(tr, TRACE_BRANCH,
+ sizeof(*entry), flags, pc);
if (!event)
goto out;
- pc = preempt_count();
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, flags, pc);
- entry->ent.type = TRACE_BRANCH;
/* Strip off the path, only save the file */
p = f->file + strlen(f->file);
@@ -70,7 +73,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
entry->line = f->line;
entry->correct = val == expect;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+ ring_buffer_unlock_commit(tr->buffer, event);
out:
atomic_dec(&tr->data[cpu]->disabled);
@@ -88,8 +91,6 @@ void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
int enable_branch_tracing(struct trace_array *tr)
{
- int ret = 0;
-
mutex_lock(&branch_tracing_mutex);
branch_tracer = tr;
/*
@@ -100,7 +101,7 @@ int enable_branch_tracing(struct trace_array *tr)
branch_tracing_enabled++;
mutex_unlock(&branch_tracing_mutex);
- return ret;
+ return 0;
}
void disable_branch_tracing(void)
@@ -128,11 +129,6 @@ static void stop_branch_trace(struct trace_array *tr)
static int branch_trace_init(struct trace_array *tr)
{
- int cpu;
-
- for_each_online_cpu(cpu)
- tracing_reset(tr, cpu);
-
start_branch_trace(tr);
return 0;
}
@@ -142,22 +138,54 @@ static void branch_trace_reset(struct trace_array *tr)
stop_branch_trace(tr);
}
-struct tracer branch_trace __read_mostly =
+static enum print_line_t trace_branch_print(struct trace_iterator *iter,
+ int flags)
+{
+ struct trace_branch *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n",
+ field->correct ? " ok " : " MISS ",
+ field->func,
+ field->file,
+ field->line))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+
+static struct trace_event trace_branch_event = {
+ .type = TRACE_BRANCH,
+ .trace = trace_branch_print,
+ .latency_trace = trace_branch_print,
+};
+
+static struct tracer branch_trace __read_mostly =
{
.name = "branch",
.init = branch_trace_init,
.reset = branch_trace_reset,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_branch,
-#endif
+#endif /* CONFIG_FTRACE_SELFTEST */
};
-__init static int init_branch_trace(void)
+__init static int init_branch_tracer(void)
{
+ int ret;
+
+ ret = register_ftrace_event(&trace_branch_event);
+ if (!ret) {
+ printk(KERN_WARNING "Warning: could not register "
+ "branch events\n");
+ return 1;
+ }
return register_tracer(&branch_trace);
}
+device_initcall(init_branch_tracer);
-device_initcall(init_branch_trace);
#else
static inline
void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
@@ -183,66 +211,39 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
}
EXPORT_SYMBOL(ftrace_likely_update);
-struct ftrace_pointer {
- void *start;
- void *stop;
- int hit;
-};
+extern unsigned long __start_annotated_branch_profile[];
+extern unsigned long __stop_annotated_branch_profile[];
-static void *
-t_next(struct seq_file *m, void *v, loff_t *pos)
+static int annotated_branch_stat_headers(struct seq_file *m)
{
- const struct ftrace_pointer *f = m->private;
- struct ftrace_branch_data *p = v;
-
- (*pos)++;
-
- if (v == (void *)1)
- return f->start;
-
- ++p;
-
- if ((void *)p >= (void *)f->stop)
- return NULL;
-
- return p;
+ seq_printf(m, " correct incorrect %% ");
+ seq_printf(m, " Function "
+ " File Line\n"
+ " ------- --------- - "
+ " -------- "
+ " ---- ----\n");
+ return 0;
}
-static void *t_start(struct seq_file *m, loff_t *pos)
+static inline long get_incorrect_percent(struct ftrace_branch_data *p)
{
- void *t = (void *)1;
- loff_t l = 0;
-
- for (; t && l < *pos; t = t_next(m, t, &l))
- ;
+ long percent;
- return t;
-}
+ if (p->correct) {
+ percent = p->incorrect * 100;
+ percent /= p->correct + p->incorrect;
+ } else
+ percent = p->incorrect ? 100 : -1;
-static void t_stop(struct seq_file *m, void *p)
-{
+ return percent;
}
-static int t_show(struct seq_file *m, void *v)
+static int branch_stat_show(struct seq_file *m, void *v)
{
- const struct ftrace_pointer *fp = m->private;
struct ftrace_branch_data *p = v;
const char *f;
long percent;
- if (v == (void *)1) {
- if (fp->hit)
- seq_printf(m, " miss hit %% ");
- else
- seq_printf(m, " correct incorrect %% ");
- seq_printf(m, " Function "
- " File Line\n"
- " ------- --------- - "
- " -------- "
- " ---- ----\n");
- return 0;
- }
-
/* Only print the file, not the path */
f = p->file + strlen(p->file);
while (f >= p->file && *f != '/')
@@ -252,11 +253,7 @@ static int t_show(struct seq_file *m, void *v)
/*
* The miss is overlayed on correct, and hit on incorrect.
*/
- if (p->correct) {
- percent = p->incorrect * 100;
- percent /= p->correct + p->incorrect;
- } else
- percent = p->incorrect ? 100 : -1;
+ percent = get_incorrect_percent(p);
seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
if (percent < 0)
@@ -267,76 +264,118 @@ static int t_show(struct seq_file *m, void *v)
return 0;
}
-static struct seq_operations tracing_likely_seq_ops = {
- .start = t_start,
- .next = t_next,
- .stop = t_stop,
- .show = t_show,
+static void *annotated_branch_stat_start(void)
+{
+ return __start_annotated_branch_profile;
+}
+
+static void *
+annotated_branch_stat_next(void *v, int idx)
+{
+ struct ftrace_branch_data *p = v;
+
+ ++p;
+
+ if ((void *)p >= (void *)__stop_annotated_branch_profile)
+ return NULL;
+
+ return p;
+}
+
+static int annotated_branch_stat_cmp(void *p1, void *p2)
+{
+ struct ftrace_branch_data *a = p1;
+ struct ftrace_branch_data *b = p2;
+
+ long percent_a, percent_b;
+
+ percent_a = get_incorrect_percent(a);
+ percent_b = get_incorrect_percent(b);
+
+ if (percent_a < percent_b)
+ return -1;
+ if (percent_a > percent_b)
+ return 1;
+ else
+ return 0;
+}
+
+static struct tracer_stat annotated_branch_stats = {
+ .name = "branch_annotated",
+ .stat_start = annotated_branch_stat_start,
+ .stat_next = annotated_branch_stat_next,
+ .stat_cmp = annotated_branch_stat_cmp,
+ .stat_headers = annotated_branch_stat_headers,
+ .stat_show = branch_stat_show
};
-static int tracing_branch_open(struct inode *inode, struct file *file)
+__init static int init_annotated_branch_stats(void)
{
int ret;
- ret = seq_open(file, &tracing_likely_seq_ops);
+ ret = register_stat_tracer(&annotated_branch_stats);
if (!ret) {
- struct seq_file *m = file->private_data;
- m->private = (void *)inode->i_private;
+ printk(KERN_WARNING "Warning: could not register "
+ "annotated branches stats\n");
+ return 1;
}
-
- return ret;
+ return 0;
}
-
-static const struct file_operations tracing_branch_fops = {
- .open = tracing_branch_open,
- .read = seq_read,
- .llseek = seq_lseek,
-};
+fs_initcall(init_annotated_branch_stats);
#ifdef CONFIG_PROFILE_ALL_BRANCHES
+
extern unsigned long __start_branch_profile[];
extern unsigned long __stop_branch_profile[];
-static const struct ftrace_pointer ftrace_branch_pos = {
- .start = __start_branch_profile,
- .stop = __stop_branch_profile,
- .hit = 1,
-};
+static int all_branch_stat_headers(struct seq_file *m)
+{
+ seq_printf(m, " miss hit %% ");
+ seq_printf(m, " Function "
+ " File Line\n"
+ " ------- --------- - "
+ " -------- "
+ " ---- ----\n");
+ return 0;
+}
-#endif /* CONFIG_PROFILE_ALL_BRANCHES */
+static void *all_branch_stat_start(void)
+{
+ return __start_branch_profile;
+}
-extern unsigned long __start_annotated_branch_profile[];
-extern unsigned long __stop_annotated_branch_profile[];
+static void *
+all_branch_stat_next(void *v, int idx)
+{
+ struct ftrace_branch_data *p = v;
-static const struct ftrace_pointer ftrace_annotated_branch_pos = {
- .start = __start_annotated_branch_profile,
- .stop = __stop_annotated_branch_profile,
-};
+ ++p;
-static __init int ftrace_branch_init(void)
-{
- struct dentry *d_tracer;
- struct dentry *entry;
+ if ((void *)p >= (void *)__stop_branch_profile)
+ return NULL;
- d_tracer = tracing_init_dentry();
+ return p;
+}
- entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
- (void *)&ftrace_annotated_branch_pos,
- &tracing_branch_fops);
- if (!entry)
- pr_warning("Could not create debugfs "
- "'profile_annotatet_branch' entry\n");
+static struct tracer_stat all_branch_stats = {
+ .name = "branch_all",
+ .stat_start = all_branch_stat_start,
+ .stat_next = all_branch_stat_next,
+ .stat_headers = all_branch_stat_headers,
+ .stat_show = branch_stat_show
+};
-#ifdef CONFIG_PROFILE_ALL_BRANCHES
- entry = debugfs_create_file("profile_branch", 0444, d_tracer,
- (void *)&ftrace_branch_pos,
- &tracing_branch_fops);
- if (!entry)
- pr_warning("Could not create debugfs"
- " 'profile_branch' entry\n");
-#endif
+__init static int all_annotated_branch_stats(void)
+{
+ int ret;
+ ret = register_stat_tracer(&all_branch_stats);
+ if (!ret) {
+ printk(KERN_WARNING "Warning: could not register "
+ "all branches stats\n");
+ return 1;
+ }
return 0;
}
-
-device_initcall(ftrace_branch_init);
+fs_initcall(all_annotated_branch_stats);
+#endif /* CONFIG_PROFILE_ALL_BRANCHES */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9236d7e25a1..c9a0b7df44f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -9,6 +9,7 @@
* Copyright (C) 2004-2006 Ingo Molnar
* Copyright (C) 2004 William Lee Irwin III
*/
+#include <linux/ring_buffer.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
@@ -16,52 +17,388 @@
#include "trace.h"
-static void start_function_trace(struct trace_array *tr)
+/* function tracing enabled */
+static int ftrace_function_enabled;
+
+static struct trace_array *func_trace;
+
+static void tracing_start_function_trace(void);
+static void tracing_stop_function_trace(void);
+
+static int function_trace_init(struct trace_array *tr)
{
+ func_trace = tr;
tr->cpu = get_cpu();
- tracing_reset_online_cpus(tr);
put_cpu();
tracing_start_cmdline_record();
tracing_start_function_trace();
+ return 0;
}
-static void stop_function_trace(struct trace_array *tr)
+static void function_trace_reset(struct trace_array *tr)
{
tracing_stop_function_trace();
tracing_stop_cmdline_record();
}
-static int function_trace_init(struct trace_array *tr)
+static void function_trace_start(struct trace_array *tr)
{
- start_function_trace(tr);
- return 0;
+ tracing_reset_online_cpus(tr);
}
-static void function_trace_reset(struct trace_array *tr)
+static void
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
+{
+ struct trace_array *tr = func_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu, resched;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ pc = preempt_count();
+ resched = ftrace_preempt_disable();
+ local_save_flags(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ trace_function(tr, ip, parent_ip, flags, pc);
+
+ atomic_dec(&data->disabled);
+ ftrace_preempt_enable(resched);
+}
+
+static void
+function_trace_call(unsigned long ip, unsigned long parent_ip)
{
- stop_function_trace(tr);
+ struct trace_array *tr = func_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ /*
+ * Need to use raw, since this must be called before the
+ * recursive protection is performed.
+ */
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ trace_function(tr, ip, parent_ip, flags, pc);
+ }
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
}
-static void function_trace_start(struct trace_array *tr)
+static void
+function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
{
- tracing_reset_online_cpus(tr);
+ struct trace_array *tr = func_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+ int pc;
+
+ if (unlikely(!ftrace_function_enabled))
+ return;
+
+ /*
+ * Need to use raw, since this must be called before the
+ * recursive protection is performed.
+ */
+ local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1)) {
+ pc = preempt_count();
+ trace_function(tr, ip, parent_ip, flags, pc);
+ /*
+ * skip over 5 funcs:
+ * __ftrace_trace_stack,
+ * __trace_stack,
+ * function_stack_trace_call
+ * ftrace_list_func
+ * ftrace_call
+ */
+ __trace_stack(tr, flags, 5, pc);
+ }
+
+ atomic_dec(&data->disabled);
+ local_irq_restore(flags);
+}
+
+
+static struct ftrace_ops trace_ops __read_mostly =
+{
+ .func = function_trace_call,
+};
+
+static struct ftrace_ops trace_stack_ops __read_mostly =
+{
+ .func = function_stack_trace_call,
+};
+
+/* Our two options */
+enum {
+ TRACE_FUNC_OPT_STACK = 0x1,
+};
+
+static struct tracer_opt func_opts[] = {
+#ifdef CONFIG_STACKTRACE
+ { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
+#endif
+ { } /* Always set a last empty entry */
+};
+
+static struct tracer_flags func_flags = {
+ .val = 0, /* By default: all flags disabled */
+ .opts = func_opts
+};
+
+static void tracing_start_function_trace(void)
+{
+ ftrace_function_enabled = 0;
+
+ if (trace_flags & TRACE_ITER_PREEMPTONLY)
+ trace_ops.func = function_trace_call_preempt_only;
+ else
+ trace_ops.func = function_trace_call;
+
+ if (func_flags.val & TRACE_FUNC_OPT_STACK)
+ register_ftrace_function(&trace_stack_ops);
+ else
+ register_ftrace_function(&trace_ops);
+
+ ftrace_function_enabled = 1;
+}
+
+static void tracing_stop_function_trace(void)
+{
+ ftrace_function_enabled = 0;
+ /* OK if they are not registered */
+ unregister_ftrace_function(&trace_stack_ops);
+ unregister_ftrace_function(&trace_ops);
+}
+
+static int func_set_flag(u32 old_flags, u32 bit, int set)
+{
+ if (bit == TRACE_FUNC_OPT_STACK) {
+ /* do nothing if already set */
+ if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
+ return 0;
+
+ if (set) {
+ unregister_ftrace_function(&trace_ops);
+ register_ftrace_function(&trace_stack_ops);
+ } else {
+ unregister_ftrace_function(&trace_stack_ops);
+ register_ftrace_function(&trace_ops);
+ }
+
+ return 0;
+ }
+
+ return -EINVAL;
}
static struct tracer function_trace __read_mostly =
{
- .name = "function",
- .init = function_trace_init,
- .reset = function_trace_reset,
- .start = function_trace_start,
+ .name = "function",
+ .init = function_trace_init,
+ .reset = function_trace_reset,
+ .start = function_trace_start,
+ .wait_pipe = poll_wait_pipe,
+ .flags = &func_flags,
+ .set_flag = func_set_flag,
#ifdef CONFIG_FTRACE_SELFTEST
- .selftest = trace_selftest_startup_function,
+ .selftest = trace_selftest_startup_function,
#endif
};
+#ifdef CONFIG_DYNAMIC_FTRACE
+static void
+ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
+{
+ long *count = (long *)data;
+
+ if (tracing_is_on())
+ return;
+
+ if (!*count)
+ return;
+
+ if (*count != -1)
+ (*count)--;
+
+ tracing_on();
+}
+
+static void
+ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
+{
+ long *count = (long *)data;
+
+ if (!tracing_is_on())
+ return;
+
+ if (!*count)
+ return;
+
+ if (*count != -1)
+ (*count)--;
+
+ tracing_off();
+}
+
+static int
+ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
+ struct ftrace_probe_ops *ops, void *data);
+
+static struct ftrace_probe_ops traceon_probe_ops = {
+ .func = ftrace_traceon,
+ .print = ftrace_trace_onoff_print,
+};
+
+static struct ftrace_probe_ops traceoff_probe_ops = {
+ .func = ftrace_traceoff,
+ .print = ftrace_trace_onoff_print,
+};
+
+static int
+ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
+ struct ftrace_probe_ops *ops, void *data)
+{
+ char str[KSYM_SYMBOL_LEN];
+ long count = (long)data;
+
+ kallsyms_lookup(ip, NULL, NULL, NULL, str);
+ seq_printf(m, "%s:", str);
+
+ if (ops == &traceon_probe_ops)
+ seq_printf(m, "traceon");
+ else
+ seq_printf(m, "traceoff");
+
+ if (count == -1)
+ seq_printf(m, ":unlimited\n");
+ else
+ seq_printf(m, ":count=%ld", count);
+ seq_putc(m, '\n');
+
+ return 0;
+}
+
+static int
+ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
+{
+ struct ftrace_probe_ops *ops;
+
+ /* we register both traceon and traceoff to this callback */
+ if (strcmp(cmd, "traceon") == 0)
+ ops = &traceon_probe_ops;
+ else
+ ops = &traceoff_probe_ops;
+
+ unregister_ftrace_function_probe_func(glob, ops);
+
+ return 0;
+}
+
+static int
+ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
+{
+ struct ftrace_probe_ops *ops;
+ void *count = (void *)-1;
+ char *number;
+ int ret;
+
+ /* hash funcs only work with set_ftrace_filter */
+ if (!enable)
+ return -EINVAL;
+
+ if (glob[0] == '!')
+ return ftrace_trace_onoff_unreg(glob+1, cmd, param);
+
+ /* we register both traceon and traceoff to this callback */
+ if (strcmp(cmd, "traceon") == 0)
+ ops = &traceon_probe_ops;
+ else
+ ops = &traceoff_probe_ops;
+
+ if (!param)
+ goto out_reg;
+
+ number = strsep(&param, ":");
+
+ if (!strlen(number))
+ goto out_reg;
+
+ /*
+ * We use the callback data field (which is a pointer)
+ * as our counter.
+ */
+ ret = strict_strtoul(number, 0, (unsigned long *)&count);
+ if (ret)
+ return ret;
+
+ out_reg:
+ ret = register_ftrace_function_probe(glob, ops, count);
+
+ return ret;
+}
+
+static struct ftrace_func_command ftrace_traceon_cmd = {
+ .name = "traceon",
+ .func = ftrace_trace_onoff_callback,
+};
+
+static struct ftrace_func_command ftrace_traceoff_cmd = {
+ .name = "traceoff",
+ .func = ftrace_trace_onoff_callback,
+};
+
+static int __init init_func_cmd_traceon(void)
+{
+ int ret;
+
+ ret = register_ftrace_command(&ftrace_traceoff_cmd);
+ if (ret)
+ return ret;
+
+ ret = register_ftrace_command(&ftrace_traceon_cmd);
+ if (ret)
+ unregister_ftrace_command(&ftrace_traceoff_cmd);
+ return ret;
+}
+#else
+static inline int init_func_cmd_traceon(void)
+{
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
static __init int init_function_trace(void)
{
+ init_func_cmd_traceon();
return register_tracer(&function_trace);
}
-
device_initcall(init_function_trace);
+
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 930c08e5b38..0ff5cb66190 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1,7 +1,7 @@
/*
*
* Function graph tracer.
- * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
* Mostly borrowed from function tracer which
* is Copyright (c) Steven Rostedt <srostedt@redhat.com>
*
@@ -12,6 +12,7 @@
#include <linux/fs.h>
#include "trace.h"
+#include "trace_output.h"
#define TRACE_GRAPH_INDENT 2
@@ -20,9 +21,11 @@
#define TRACE_GRAPH_PRINT_CPU 0x2
#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
#define TRACE_GRAPH_PRINT_PROC 0x8
+#define TRACE_GRAPH_PRINT_DURATION 0x10
+#define TRACE_GRAPH_PRINT_ABS_TIME 0X20
static struct tracer_opt trace_opts[] = {
- /* Display overruns ? */
+ /* Display overruns? (for self-debug purpose) */
{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
/* Display CPU ? */
{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
@@ -30,26 +33,26 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
/* Display proc name/pid */
{ TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
+ /* Display duration of execution */
+ { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
+ /* Display absolute time of an entry */
+ { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
/* Don't display overruns and proc by default */
- .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
+ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
+ TRACE_GRAPH_PRINT_DURATION,
.opts = trace_opts
};
/* pid on the last trace processed */
-static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
+
static int graph_trace_init(struct trace_array *tr)
{
- int cpu, ret;
-
- for_each_online_cpu(cpu)
- tracing_reset(tr, cpu);
-
- ret = register_ftrace_graph(&trace_graph_return,
+ int ret = register_ftrace_graph(&trace_graph_return,
&trace_graph_entry);
if (ret)
return ret;
@@ -153,17 +156,25 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
/* If the pid changed since the last trace, output this event */
static enum print_line_t
-verif_pid(struct trace_seq *s, pid_t pid, int cpu)
+verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
{
pid_t prev_pid;
+ pid_t *last_pid;
int ret;
- if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
+ if (!last_pids_cpu)
return TRACE_TYPE_HANDLED;
- prev_pid = last_pid[cpu];
- last_pid[cpu] = pid;
+ last_pid = per_cpu_ptr(last_pids_cpu, cpu);
+
+ if (*last_pid == pid)
+ return TRACE_TYPE_HANDLED;
+ prev_pid = *last_pid;
+ *last_pid = pid;
+
+ if (prev_pid == -1)
+ return TRACE_TYPE_HANDLED;
/*
* Context-switch trace line:
@@ -175,34 +186,34 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu)
ret = trace_seq_printf(s,
" ------------------------------------------\n");
if (!ret)
- TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_PARTIAL_LINE;
ret = print_graph_cpu(s, cpu);
if (ret == TRACE_TYPE_PARTIAL_LINE)
- TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_PARTIAL_LINE;
ret = print_graph_proc(s, prev_pid);
if (ret == TRACE_TYPE_PARTIAL_LINE)
- TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_PARTIAL_LINE;
ret = trace_seq_printf(s, " => ");
if (!ret)
- TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_PARTIAL_LINE;
ret = print_graph_proc(s, pid);
if (ret == TRACE_TYPE_PARTIAL_LINE)
- TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_PARTIAL_LINE;
ret = trace_seq_printf(s,
"\n ------------------------------------------\n\n");
if (!ret)
- TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_PARTIAL_LINE;
- return ret;
+ return TRACE_TYPE_HANDLED;
}
-static bool
-trace_branch_is_leaf(struct trace_iterator *iter,
+static struct ftrace_graph_ret_entry *
+get_return_for_leaf(struct trace_iterator *iter,
struct ftrace_graph_ent_entry *curr)
{
struct ring_buffer_iter *ring_iter;
@@ -211,29 +222,63 @@ trace_branch_is_leaf(struct trace_iterator *iter,
ring_iter = iter->buffer_iter[iter->cpu];
- if (!ring_iter)
- return false;
-
- event = ring_buffer_iter_peek(ring_iter, NULL);
+ /* First peek to compare current entry and the next one */
+ if (ring_iter)
+ event = ring_buffer_iter_peek(ring_iter, NULL);
+ else {
+ /* We need to consume the current entry to see the next one */
+ ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+ event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
+ NULL);
+ }
if (!event)
- return false;
+ return NULL;
next = ring_buffer_event_data(event);
if (next->ent.type != TRACE_GRAPH_RET)
- return false;
+ return NULL;
if (curr->ent.pid != next->ent.pid ||
curr->graph_ent.func != next->ret.func)
- return false;
+ return NULL;
+
+ /* this is a leaf, now advance the iterator */
+ if (ring_iter)
+ ring_buffer_read(ring_iter, NULL);
+
+ return next;
+}
+
+/* Signal a overhead of time execution to the output */
+static int
+print_graph_overhead(unsigned long long duration, struct trace_seq *s)
+{
+ /* If duration disappear, we don't need anything */
+ if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
+ return 1;
+
+ /* Non nested entry or return */
+ if (duration == -1)
+ return trace_seq_printf(s, " ");
+
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ /* Duration exceeded 100 msecs */
+ if (duration > 100000ULL)
+ return trace_seq_printf(s, "! ");
- return true;
+ /* Duration exceeded 10 msecs */
+ if (duration > 10000ULL)
+ return trace_seq_printf(s, "+ ");
+ }
+
+ return trace_seq_printf(s, " ");
}
static enum print_line_t
print_graph_irq(struct trace_seq *s, unsigned long addr,
- enum trace_type type, int cpu, pid_t pid)
+ enum trace_type type, int cpu, pid_t pid)
{
int ret;
@@ -241,35 +286,40 @@ print_graph_irq(struct trace_seq *s, unsigned long addr,
addr >= (unsigned long)__irqentry_text_end)
return TRACE_TYPE_UNHANDLED;
- if (type == TRACE_GRAPH_ENT) {
- ret = trace_seq_printf(s, "==========> | ");
- } else {
- /* Cpu */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
- ret = print_graph_cpu(s, cpu);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
- }
- /* Proc */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
- ret = print_graph_proc(s, pid);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ /* Proc */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+ ret = print_graph_proc(s, pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ /* No overhead */
+ ret = print_graph_overhead(-1, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
- /* No overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = trace_seq_printf(s, " ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ if (type == TRACE_GRAPH_ENT)
+ ret = trace_seq_printf(s, "==========>");
+ else
+ ret = trace_seq_printf(s, "<==========");
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Don't close the duration column if haven't one */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
+ trace_seq_printf(s, " |");
+ ret = trace_seq_printf(s, "\n");
- ret = trace_seq_printf(s, "<========== |\n");
- }
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
@@ -288,7 +338,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
sprintf(msecs_str, "%lu", (unsigned long) duration);
/* Print msecs */
- ret = trace_seq_printf(s, msecs_str);
+ ret = trace_seq_printf(s, "%s", msecs_str);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -321,51 +371,44 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
}
-/* Signal a overhead of time execution to the output */
-static int
-print_graph_overhead(unsigned long long duration, struct trace_seq *s)
+static int print_graph_abs_time(u64 t, struct trace_seq *s)
{
- /* Duration exceeded 100 msecs */
- if (duration > 100000ULL)
- return trace_seq_printf(s, "! ");
+ unsigned long usecs_rem;
- /* Duration exceeded 10 msecs */
- if (duration > 10000ULL)
- return trace_seq_printf(s, "+ ");
+ usecs_rem = do_div(t, 1000000000);
+ usecs_rem /= 1000;
- return trace_seq_printf(s, " ");
+ return trace_seq_printf(s, "%5lu.%06lu | ",
+ (unsigned long)t, usecs_rem);
}
/* Case of a leaf function on its call entry */
static enum print_line_t
print_graph_entry_leaf(struct trace_iterator *iter,
- struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
+ struct ftrace_graph_ent_entry *entry,
+ struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
{
- struct ftrace_graph_ret_entry *ret_entry;
struct ftrace_graph_ret *graph_ret;
- struct ring_buffer_event *event;
struct ftrace_graph_ent *call;
unsigned long long duration;
int ret;
int i;
- event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
- ret_entry = ring_buffer_event_data(event);
graph_ret = &ret_entry->ret;
call = &entry->graph_ent;
duration = graph_ret->rettime - graph_ret->calltime;
/* Overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = print_graph_overhead(duration, s);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
/* Duration */
- ret = print_graph_duration(duration, s);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
+ ret = print_graph_duration(duration, s);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -394,25 +437,17 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
struct ftrace_graph_ent *call = &entry->graph_ent;
/* No overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = trace_seq_printf(s, " ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ ret = print_graph_overhead(-1, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
- /* Interrupt */
- ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid);
- if (ret == TRACE_TYPE_UNHANDLED) {
- /* No time */
+ /* No time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
ret = trace_seq_printf(s, " | ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- } else {
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
}
-
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
ret = trace_seq_printf(s, " ");
@@ -428,20 +463,40 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- return TRACE_TYPE_HANDLED;
+ /*
+ * we already consumed the current entry to check the next one
+ * and see if this is a leaf.
+ */
+ return TRACE_TYPE_NO_CONSUME;
}
static enum print_line_t
print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
- struct trace_iterator *iter, int cpu)
+ struct trace_iterator *iter)
{
int ret;
+ int cpu = iter->cpu;
+ pid_t *last_entry = iter->private;
struct trace_entry *ent = iter->ent;
+ struct ftrace_graph_ent *call = &field->graph_ent;
+ struct ftrace_graph_ret_entry *leaf_ret;
/* Pid */
- if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
+ if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
+ /* Interrupt */
+ ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Absolute time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
+ ret = print_graph_abs_time(iter->ts, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
/* Cpu */
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
ret = print_graph_cpu(s, cpu);
@@ -460,8 +515,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
return TRACE_TYPE_PARTIAL_LINE;
}
- if (trace_branch_is_leaf(iter, field))
- return print_graph_entry_leaf(iter, field, s);
+ leaf_ret = get_return_for_leaf(iter, field);
+ if (leaf_ret)
+ return print_graph_entry_leaf(iter, field, leaf_ret, s);
else
return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
@@ -469,16 +525,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
static enum print_line_t
print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
- struct trace_entry *ent, int cpu)
+ struct trace_entry *ent, struct trace_iterator *iter)
{
int i;
int ret;
+ int cpu = iter->cpu;
+ pid_t *last_pid = iter->private;
unsigned long long duration = trace->rettime - trace->calltime;
/* Pid */
- if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
+ if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
+ /* Absolute time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
+ ret = print_graph_abs_time(iter->ts, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
/* Cpu */
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
ret = print_graph_cpu(s, cpu);
@@ -498,16 +563,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
}
/* Overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = print_graph_overhead(duration, s);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
/* Duration */
- ret = print_graph_duration(duration, s);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
+ ret = print_graph_duration(duration, s);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Closing brace */
for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
@@ -541,14 +606,23 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
{
int i;
int ret;
+ int cpu = iter->cpu;
+ pid_t *last_pid = iter->private;
+
+ /* Absolute time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
+ ret = print_graph_abs_time(iter->ts, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Pid */
- if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE)
+ if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
/* Cpu */
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
- ret = print_graph_cpu(s, iter->cpu);
+ ret = print_graph_cpu(s, cpu);
if (ret == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
}
@@ -565,17 +639,17 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
}
/* No overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = trace_seq_printf(s, " ");
+ ret = print_graph_overhead(-1, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* No time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
+ ret = trace_seq_printf(s, " | ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
- /* No time */
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
/* Indentation */
if (trace->depth > 0)
for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
@@ -589,8 +663,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- if (ent->flags & TRACE_FLAG_CONT)
- trace_seq_print_cont(s, iter);
+ /* Strip ending newline */
+ if (s->buffer[s->len - 1] == '\n') {
+ s->buffer[s->len - 1] = '\0';
+ s->len--;
+ }
ret = trace_seq_printf(s, " */\n");
if (!ret)
@@ -610,13 +687,12 @@ print_graph_function(struct trace_iterator *iter)
case TRACE_GRAPH_ENT: {
struct ftrace_graph_ent_entry *field;
trace_assign_type(field, entry);
- return print_graph_entry(field, s, iter,
- iter->cpu);
+ return print_graph_entry(field, s, iter);
}
case TRACE_GRAPH_RET: {
struct ftrace_graph_ret_entry *field;
trace_assign_type(field, entry);
- return print_graph_return(&field->ret, s, entry, iter->cpu);
+ return print_graph_return(&field->ret, s, entry, iter);
}
case TRACE_PRINT: {
struct print_entry *field;
@@ -632,33 +708,64 @@ static void print_graph_headers(struct seq_file *s)
{
/* 1st line */
seq_printf(s, "# ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
+ seq_printf(s, " TIME ");
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
- seq_printf(s, "CPU ");
+ seq_printf(s, "CPU");
if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
- seq_printf(s, "TASK/PID ");
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD)
- seq_printf(s, "OVERHEAD/");
- seq_printf(s, "DURATION FUNCTION CALLS\n");
+ seq_printf(s, " TASK/PID ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
+ seq_printf(s, " DURATION ");
+ seq_printf(s, " FUNCTION CALLS\n");
/* 2nd line */
seq_printf(s, "# ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
+ seq_printf(s, " | ");
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
- seq_printf(s, "| ");
+ seq_printf(s, "| ");
if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
- seq_printf(s, "| | ");
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- seq_printf(s, "| ");
- seq_printf(s, "| | | | |\n");
- } else
- seq_printf(s, " | | | | |\n");
+ seq_printf(s, " | | ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
+ seq_printf(s, " | | ");
+ seq_printf(s, " | | | |\n");
+}
+
+static void graph_trace_open(struct trace_iterator *iter)
+{
+ /* pid on the last trace processed */
+ pid_t *last_pid = alloc_percpu(pid_t);
+ int cpu;
+
+ if (!last_pid)
+ pr_warning("function graph tracer: not enough memory\n");
+ else
+ for_each_possible_cpu(cpu) {
+ pid_t *pid = per_cpu_ptr(last_pid, cpu);
+ *pid = -1;
+ }
+
+ iter->private = last_pid;
}
+
+static void graph_trace_close(struct trace_iterator *iter)
+{
+ percpu_free(iter->private);
+}
+
static struct tracer graph_trace __read_mostly = {
.name = "function_graph",
+ .open = graph_trace_open,
+ .close = graph_trace_close,
+ .wait_pipe = poll_wait_pipe,
.init = graph_trace_init,
.reset = graph_trace_reset,
.print_line = print_graph_function,
.print_header = print_graph_headers,
.flags = &tracer_flags,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_function_graph,
+#endif
};
static __init int init_graph_trace(void)
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 649df22d435..3561aace075 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -1,7 +1,8 @@
/*
* h/w branch tracer for x86 based on bts
*
- * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com>
+ * Copyright (C) 2008-2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
*
*/
@@ -10,21 +11,44 @@
#include <linux/debugfs.h>
#include <linux/ftrace.h>
#include <linux/kallsyms.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
#include <asm/ds.h>
#include "trace.h"
+#include "trace_output.h"
#define SIZEOF_BTS (1 << 13)
+/* The tracer mutex protects the below per-cpu tracer array.
+ It needs to be held to:
+ - start tracing on all cpus
+ - stop tracing on all cpus
+ - start tracing on a single hotplug cpu
+ - stop tracing on a single hotplug cpu
+ - read the trace from all cpus
+ - read the trace from a single cpu
+*/
+static DEFINE_MUTEX(bts_tracer_mutex);
static DEFINE_PER_CPU(struct bts_tracer *, tracer);
static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
#define this_tracer per_cpu(tracer, smp_processor_id())
#define this_buffer per_cpu(buffer, smp_processor_id())
+static int __read_mostly trace_hw_branches_enabled;
+static struct trace_array *hw_branch_trace __read_mostly;
+
+/*
+ * Start tracing on the current cpu.
+ * The argument is ignored.
+ *
+ * pre: bts_tracer_mutex must be locked.
+ */
static void bts_trace_start_cpu(void *arg)
{
if (this_tracer)
@@ -42,14 +66,20 @@ static void bts_trace_start_cpu(void *arg)
static void bts_trace_start(struct trace_array *tr)
{
- int cpu;
+ mutex_lock(&bts_tracer_mutex);
- tracing_reset_online_cpus(tr);
+ on_each_cpu(bts_trace_start_cpu, NULL, 1);
+ trace_hw_branches_enabled = 1;
- for_each_cpu(cpu, cpu_possible_mask)
- smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
+ mutex_unlock(&bts_tracer_mutex);
}
+/*
+ * Stop tracing on the current cpu.
+ * The argument is ignored.
+ *
+ * pre: bts_tracer_mutex must be locked.
+ */
static void bts_trace_stop_cpu(void *arg)
{
if (this_tracer) {
@@ -60,26 +90,62 @@ static void bts_trace_stop_cpu(void *arg)
static void bts_trace_stop(struct trace_array *tr)
{
- int cpu;
+ mutex_lock(&bts_tracer_mutex);
+
+ trace_hw_branches_enabled = 0;
+ on_each_cpu(bts_trace_stop_cpu, NULL, 1);
+
+ mutex_unlock(&bts_tracer_mutex);
+}
+
+static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ mutex_lock(&bts_tracer_mutex);
+
+ if (!trace_hw_branches_enabled)
+ goto out;
- for_each_cpu(cpu, cpu_possible_mask)
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
+ smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
+ break;
+ case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
+ break;
+ }
+
+ out:
+ mutex_unlock(&bts_tracer_mutex);
+ return NOTIFY_DONE;
}
-static int bts_trace_init(struct trace_array *tr)
+static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
+ .notifier_call = bts_hotcpu_handler
+};
+
+static int __cpuinit bts_trace_init(struct trace_array *tr)
{
- tracing_reset_online_cpus(tr);
+ hw_branch_trace = tr;
+
+ register_hotcpu_notifier(&bts_hotcpu_notifier);
bts_trace_start(tr);
return 0;
}
+static void __cpuinit bts_trace_reset(struct trace_array *tr)
+{
+ bts_trace_stop(tr);
+ unregister_hotcpu_notifier(&bts_hotcpu_notifier);
+}
+
static void bts_trace_print_header(struct seq_file *m)
{
- seq_puts(m,
- "# CPU# FROM TO FUNCTION\n");
- seq_puts(m,
- "# | | | |\n");
+ seq_puts(m, "# CPU# TO <- FROM\n");
}
static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
@@ -87,15 +153,15 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
struct trace_entry *entry = iter->ent;
struct trace_seq *seq = &iter->seq;
struct hw_branch_entry *it;
+ unsigned long symflags = TRACE_ITER_SYM_OFFSET;
trace_assign_type(it, entry);
if (entry->type == TRACE_HW_BRANCHES) {
- if (trace_seq_printf(seq, "%4d ", entry->cpu) &&
- trace_seq_printf(seq, "0x%016llx -> 0x%016llx ",
- it->from, it->to) &&
- (!it->from ||
- seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
+ if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
+ seq_print_ip_sym(seq, it->to, symflags) &&
+ trace_seq_printf(seq, "\t <- ") &&
+ seq_print_ip_sym(seq, it->from, symflags) &&
trace_seq_printf(seq, "\n"))
return TRACE_TYPE_HANDLED;
return TRACE_TYPE_PARTIAL_LINE;;
@@ -103,26 +169,42 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
return TRACE_TYPE_UNHANDLED;
}
-void trace_hw_branch(struct trace_array *tr, u64 from, u64 to)
+void trace_hw_branch(u64 from, u64 to)
{
+ struct trace_array *tr = hw_branch_trace;
struct ring_buffer_event *event;
struct hw_branch_entry *entry;
- unsigned long irq;
+ unsigned long irq1;
+ int cpu;
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq);
- if (!event)
+ if (unlikely(!tr))
return;
+
+ if (unlikely(!trace_hw_branches_enabled))
+ return;
+
+ local_irq_save(irq1);
+ cpu = raw_smp_processor_id();
+ if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
+ goto out;
+
+ event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ goto out;
entry = ring_buffer_event_data(event);
tracing_generic_entry_update(&entry->ent, 0, from);
entry->ent.type = TRACE_HW_BRANCHES;
- entry->ent.cpu = smp_processor_id();
entry->from = from;
entry->to = to;
- ring_buffer_unlock_commit(tr->buffer, event, irq);
+ trace_buffer_unlock_commit(tr, event, 0, 0);
+
+ out:
+ atomic_dec(&tr->data[cpu]->disabled);
+ local_irq_restore(irq1);
}
-static void trace_bts_at(struct trace_array *tr,
- const struct bts_trace *trace, void *at)
+static void trace_bts_at(const struct bts_trace *trace, void *at)
{
struct bts_struct bts;
int err = 0;
@@ -137,18 +219,29 @@ static void trace_bts_at(struct trace_array *tr,
switch (bts.qualifier) {
case BTS_BRANCH:
- trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to);
+ trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
break;
}
}
+/*
+ * Collect the trace on the current cpu and write it into the ftrace buffer.
+ *
+ * pre: bts_tracer_mutex must be locked
+ */
static void trace_bts_cpu(void *arg)
{
struct trace_array *tr = (struct trace_array *) arg;
const struct bts_trace *trace;
unsigned char *at;
- if (!this_tracer)
+ if (unlikely(!tr))
+ return;
+
+ if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
+ return;
+
+ if (unlikely(!this_tracer))
return;
ds_suspend_bts(this_tracer);
@@ -158,11 +251,11 @@ static void trace_bts_cpu(void *arg)
for (at = trace->ds.top; (void *)at < trace->ds.end;
at += trace->ds.size)
- trace_bts_at(tr, trace, at);
+ trace_bts_at(trace, at);
for (at = trace->ds.begin; (void *)at < trace->ds.top;
at += trace->ds.size)
- trace_bts_at(tr, trace, at);
+ trace_bts_at(trace, at);
out:
ds_resume_bts(this_tracer);
@@ -170,22 +263,38 @@ out:
static void trace_bts_prepare(struct trace_iterator *iter)
{
- int cpu;
+ mutex_lock(&bts_tracer_mutex);
+
+ on_each_cpu(trace_bts_cpu, iter->tr, 1);
+
+ mutex_unlock(&bts_tracer_mutex);
+}
+
+static void trace_bts_close(struct trace_iterator *iter)
+{
+ tracing_reset_online_cpus(iter->tr);
+}
+
+void trace_hw_branch_oops(void)
+{
+ mutex_lock(&bts_tracer_mutex);
+
+ trace_bts_cpu(hw_branch_trace);
- for_each_cpu(cpu, cpu_possible_mask)
- smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
+ mutex_unlock(&bts_tracer_mutex);
}
struct tracer bts_tracer __read_mostly =
{
.name = "hw-branch-tracer",
.init = bts_trace_init,
- .reset = bts_trace_stop,
+ .reset = bts_trace_reset,
.print_header = bts_trace_print_header,
.print_line = bts_trace_print_line,
.start = bts_trace_start,
.stop = bts_trace_stop,
- .open = trace_bts_prepare
+ .open = trace_bts_prepare,
+ .close = trace_bts_close
};
__init static int init_bts_trace(void)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 62a78d94353..9e5ebd84415 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -1,5 +1,5 @@
/*
- * trace irqs off criticall timings
+ * trace irqs off critical timings
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
- trace_function(tr, data, ip, parent_ip, flags, preempt_count());
+ trace_function(tr, ip, parent_ip, flags, preempt_count());
atomic_dec(&data->disabled);
}
@@ -153,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
if (!report_latency(delta))
goto out_unlock;
- trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
+ trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
latency = nsecs_to_usecs(delta);
@@ -177,7 +177,7 @@ out:
data->critical_sequence = max_sequence;
data->preempt_timestamp = ftrace_now(cpu);
tracing_reset(tr, cpu);
- trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
+ trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
}
static inline void
@@ -210,7 +210,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
local_save_flags(flags);
- trace_function(tr, data, ip, parent_ip, flags, preempt_count());
+ trace_function(tr, ip, parent_ip, flags, preempt_count());
per_cpu(tracing_cpu, cpu) = 1;
@@ -244,7 +244,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
atomic_inc(&data->disabled);
local_save_flags(flags);
- trace_function(tr, data, ip, parent_ip, flags, preempt_count());
+ trace_function(tr, ip, parent_ip, flags, preempt_count());
check_critical_timing(tr, data, parent_ip ? : ip, cpu);
data->critical_start = 0;
atomic_dec(&data->disabled);
@@ -353,28 +353,18 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
}
#endif /* CONFIG_PREEMPT_TRACER */
-/*
- * save_tracer_enabled is used to save the state of the tracer_enabled
- * variable when we disable it when we open a trace output file.
- */
-static int save_tracer_enabled;
-
static void start_irqsoff_tracer(struct trace_array *tr)
{
register_ftrace_function(&trace_ops);
- if (tracing_is_enabled()) {
+ if (tracing_is_enabled())
tracer_enabled = 1;
- save_tracer_enabled = 1;
- } else {
+ else
tracer_enabled = 0;
- save_tracer_enabled = 0;
- }
}
static void stop_irqsoff_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
- save_tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
}
@@ -395,25 +385,11 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
static void irqsoff_tracer_start(struct trace_array *tr)
{
tracer_enabled = 1;
- save_tracer_enabled = 1;
}
static void irqsoff_tracer_stop(struct trace_array *tr)
{
tracer_enabled = 0;
- save_tracer_enabled = 0;
-}
-
-static void irqsoff_tracer_open(struct trace_iterator *iter)
-{
- /* stop the trace while dumping */
- tracer_enabled = 0;
-}
-
-static void irqsoff_tracer_close(struct trace_iterator *iter)
-{
- /* restart tracing */
- tracer_enabled = save_tracer_enabled;
}
#ifdef CONFIG_IRQSOFF_TRACER
@@ -431,8 +407,6 @@ static struct tracer irqsoff_tracer __read_mostly =
.reset = irqsoff_tracer_reset,
.start = irqsoff_tracer_start,
.stop = irqsoff_tracer_stop,
- .open = irqsoff_tracer_open,
- .close = irqsoff_tracer_close,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_irqsoff,
@@ -459,8 +433,6 @@ static struct tracer preemptoff_tracer __read_mostly =
.reset = irqsoff_tracer_reset,
.start = irqsoff_tracer_start,
.stop = irqsoff_tracer_stop,
- .open = irqsoff_tracer_open,
- .close = irqsoff_tracer_close,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptoff,
@@ -489,8 +461,6 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
.reset = irqsoff_tracer_reset,
.start = irqsoff_tracer_start,
.stop = irqsoff_tracer_stop,
- .open = irqsoff_tracer_open,
- .close = irqsoff_tracer_close,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 80e503ef613..c401b908e80 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,6 +12,7 @@
#include <asm/atomic.h>
#include "trace.h"
+#include "trace_output.h"
struct header_iter {
struct pci_dev *dev;
@@ -183,21 +184,22 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
switch (rw->opcode) {
case MMIO_READ:
ret = trace_seq_printf(s,
- "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
+ "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
rw->width, secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
rw->value, rw->pc, 0);
break;
case MMIO_WRITE:
ret = trace_seq_printf(s,
- "W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
+ "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
rw->width, secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
rw->value, rw->pc, 0);
break;
case MMIO_UNKNOWN_OP:
ret = trace_seq_printf(s,
- "UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n",
+ "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
+ "%02lx 0x%lx %d\n",
secs, usec_rem, rw->map_id,
(unsigned long long)rw->phys,
(rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
@@ -229,14 +231,14 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
switch (m->opcode) {
case MMIO_PROBE:
ret = trace_seq_printf(s,
- "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
+ "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
secs, usec_rem, m->map_id,
(unsigned long long)m->phys, m->virt, m->len,
0UL, 0);
break;
case MMIO_UNPROBE:
ret = trace_seq_printf(s,
- "UNMAP %lu.%06lu %d 0x%lx %d\n",
+ "UNMAP %u.%06lu %d 0x%lx %d\n",
secs, usec_rem, m->map_id, 0UL, 0);
break;
default:
@@ -260,13 +262,10 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
int ret;
/* The trailing newline must be in the message. */
- ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
+ ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- if (entry->flags & TRACE_FLAG_CONT)
- trace_seq_print_cont(s, iter);
-
return TRACE_TYPE_HANDLED;
}
@@ -308,21 +307,17 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
{
struct ring_buffer_event *event;
struct trace_mmiotrace_rw *entry;
- unsigned long irq_flags;
+ int pc = preempt_count();
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW,
+ sizeof(*entry), 0, pc);
if (!event) {
atomic_inc(&dropped_count);
return;
}
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, preempt_count());
- entry->ent.type = TRACE_MMIO_RW;
entry->rw = *rw;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-
- trace_wake_up();
+ trace_buffer_unlock_commit(tr, event, 0, pc);
}
void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -338,21 +333,17 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
{
struct ring_buffer_event *event;
struct trace_mmiotrace_map *entry;
- unsigned long irq_flags;
+ int pc = preempt_count();
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
+ event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP,
+ sizeof(*entry), 0, pc);
if (!event) {
atomic_inc(&dropped_count);
return;
}
entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, preempt_count());
- entry->ent.type = TRACE_MMIO_MAP;
entry->map = *map;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-
- trace_wake_up();
+ trace_buffer_unlock_commit(tr, event, 0, pc);
}
void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index b9767acd30a..9aa84bde23c 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -47,12 +47,7 @@ static void stop_nop_trace(struct trace_array *tr)
static int nop_trace_init(struct trace_array *tr)
{
- int cpu;
ctx_trace = tr;
-
- for_each_online_cpu(cpu)
- tracing_reset(tr, cpu);
-
start_nop_trace(tr);
return 0;
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
new file mode 100644
index 00000000000..9fc815031b0
--- /dev/null
+++ b/kernel/trace/trace_output.c
@@ -0,0 +1,919 @@
+/*
+ * trace_output.c
+ *
+ * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ftrace.h>
+
+#include "trace_output.h"
+
+/* must be a power of 2 */
+#define EVENT_HASHSIZE 128
+
+static DEFINE_MUTEX(trace_event_mutex);
+static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
+
+static int next_event_type = __TRACE_LAST_TYPE + 1;
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+ int len = (PAGE_SIZE - 1) - s->len;
+ va_list ap;
+ int ret;
+
+ if (!len)
+ return 0;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+ va_end(ap);
+
+ /* If we can't write it all, don't bother writing anything */
+ if (ret >= len)
+ return 0;
+
+ s->len += ret;
+
+ return len;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+ int len = strlen(str);
+
+ if (len > ((PAGE_SIZE - 1) - s->len))
+ return 0;
+
+ memcpy(s->buffer + s->len, str, len);
+ s->len += len;
+
+ return len;
+}
+
+int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+ if (s->len >= (PAGE_SIZE - 1))
+ return 0;
+
+ s->buffer[s->len++] = c;
+
+ return 1;
+}
+
+int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
+{
+ if (len > ((PAGE_SIZE - 1) - s->len))
+ return 0;
+
+ memcpy(s->buffer + s->len, mem, len);
+ s->len += len;
+
+ return len;
+}
+
+int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
+{
+ unsigned char hex[HEX_CHARS];
+ unsigned char *data = mem;
+ int i, j;
+
+#ifdef __BIG_ENDIAN
+ for (i = 0, j = 0; i < len; i++) {
+#else
+ for (i = len-1, j = 0; i >= 0; i--) {
+#endif
+ hex[j++] = hex_asc_hi(data[i]);
+ hex[j++] = hex_asc_lo(data[i]);
+ }
+ hex[j++] = ' ';
+
+ return trace_seq_putmem(s, hex, j);
+}
+
+int trace_seq_path(struct trace_seq *s, struct path *path)
+{
+ unsigned char *p;
+
+ if (s->len >= (PAGE_SIZE - 1))
+ return 0;
+ p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+ if (!IS_ERR(p)) {
+ p = mangle_path(s->buffer + s->len, p, "\n");
+ if (p) {
+ s->len = p - s->buffer;
+ return 1;
+ }
+ } else {
+ s->buffer[s->len++] = '?';
+ return 1;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_KRETPROBES
+static inline const char *kretprobed(const char *name)
+{
+ static const char tramp_name[] = "kretprobe_trampoline";
+ int size = sizeof(tramp_name);
+
+ if (strncmp(tramp_name, name, size) == 0)
+ return "[unknown/kretprobe'd]";
+ return name;
+}
+#else
+static inline const char *kretprobed(const char *name)
+{
+ return name;
+}
+#endif /* CONFIG_KRETPROBES */
+
+static int
+seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+ const char *name;
+
+ kallsyms_lookup(address, NULL, NULL, NULL, str);
+
+ name = kretprobed(str);
+
+ return trace_seq_printf(s, fmt, name);
+#endif
+ return 1;
+}
+
+static int
+seq_print_sym_offset(struct trace_seq *s, const char *fmt,
+ unsigned long address)
+{
+#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
+ const char *name;
+
+ sprint_symbol(str, address);
+ name = kretprobed(str);
+
+ return trace_seq_printf(s, fmt, name);
+#endif
+ return 1;
+}
+
+#ifndef CONFIG_64BIT
+# define IP_FMT "%08lx"
+#else
+# define IP_FMT "%016lx"
+#endif
+
+int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+ unsigned long ip, unsigned long sym_flags)
+{
+ struct file *file = NULL;
+ unsigned long vmstart = 0;
+ int ret = 1;
+
+ if (mm) {
+ const struct vm_area_struct *vma;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, ip);
+ if (vma) {
+ file = vma->vm_file;
+ vmstart = vma->vm_start;
+ }
+ if (file) {
+ ret = trace_seq_path(s, &file->f_path);
+ if (ret)
+ ret = trace_seq_printf(s, "[+0x%lx]",
+ ip - vmstart);
+ }
+ up_read(&mm->mmap_sem);
+ }
+ if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+ ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+ return ret;
+}
+
+int
+seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+ unsigned long sym_flags)
+{
+ struct mm_struct *mm = NULL;
+ int ret = 1;
+ unsigned int i;
+
+ if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+ struct task_struct *task;
+ /*
+ * we do the lookup on the thread group leader,
+ * since individual threads might have already quit!
+ */
+ rcu_read_lock();
+ task = find_task_by_vpid(entry->ent.tgid);
+ if (task)
+ mm = get_task_mm(task);
+ rcu_read_unlock();
+ }
+
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ unsigned long ip = entry->caller[i];
+
+ if (ip == ULONG_MAX || !ret)
+ break;
+ if (i && ret)
+ ret = trace_seq_puts(s, " <- ");
+ if (!ip) {
+ if (ret)
+ ret = trace_seq_puts(s, "??");
+ continue;
+ }
+ if (!ret)
+ break;
+ if (ret)
+ ret = seq_print_user_ip(s, mm, ip, sym_flags);
+ }
+
+ if (mm)
+ mmput(mm);
+ return ret;
+}
+
+int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
+{
+ int ret;
+
+ if (!ip)
+ return trace_seq_printf(s, "0");
+
+ if (sym_flags & TRACE_ITER_SYM_OFFSET)
+ ret = seq_print_sym_offset(s, "%s", ip);
+ else
+ ret = seq_print_sym_short(s, "%s", ip);
+
+ if (!ret)
+ return 0;
+
+ if (sym_flags & TRACE_ITER_SYM_ADDR)
+ ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+ return ret;
+}
+
+static int
+lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
+{
+ int hardirq, softirq;
+ char *comm;
+
+ comm = trace_find_cmdline(entry->pid);
+ hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
+ softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+
+ if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
+ comm, entry->pid, cpu,
+ (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
+ 'X' : '.',
+ (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
+ 'N' : '.',
+ (hardirq && softirq) ? 'H' :
+ hardirq ? 'h' : softirq ? 's' : '.'))
+ return 0;
+
+ if (entry->preempt_count)
+ return trace_seq_printf(s, "%x", entry->preempt_count);
+ return trace_seq_puts(s, ".");
+}
+
+static unsigned long preempt_mark_thresh = 100;
+
+static int
+lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
+ unsigned long rel_usecs)
+{
+ return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
+ rel_usecs > preempt_mark_thresh ? '!' :
+ rel_usecs > 1 ? '+' : ' ');
+}
+
+int trace_print_context(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent;
+ char *comm = trace_find_cmdline(entry->pid);
+ unsigned long long t = ns2usecs(iter->ts);
+ unsigned long usec_rem = do_div(t, USEC_PER_SEC);
+ unsigned long secs = (unsigned long)t;
+
+ return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
+ comm, entry->pid, iter->cpu, secs, usec_rem);
+}
+
+int trace_print_lat_context(struct trace_iterator *iter)
+{
+ u64 next_ts;
+ int ret;
+ struct trace_seq *s = &iter->seq;
+ struct trace_entry *entry = iter->ent,
+ *next_entry = trace_find_next_entry(iter, NULL,
+ &next_ts);
+ unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
+ unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
+ unsigned long rel_usecs;
+
+ if (!next_entry)
+ next_ts = iter->ts;
+ rel_usecs = ns2usecs(next_ts - iter->ts);
+
+ if (verbose) {
+ char *comm = trace_find_cmdline(entry->pid);
+ ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
+ " %ld.%03ldms (+%ld.%03ldms): ", comm,
+ entry->pid, iter->cpu, entry->flags,
+ entry->preempt_count, iter->idx,
+ ns2usecs(iter->ts),
+ abs_usecs / USEC_PER_MSEC,
+ abs_usecs % USEC_PER_MSEC,
+ rel_usecs / USEC_PER_MSEC,
+ rel_usecs % USEC_PER_MSEC);
+ } else {
+ ret = lat_print_generic(s, entry, iter->cpu);
+ if (ret)
+ ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
+ }
+
+ return ret;
+}
+
+static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+
+static int task_state_char(unsigned long state)
+{
+ int bit = state ? __ffs(state) + 1 : 0;
+
+ return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+}
+
+/**
+ * ftrace_find_event - find a registered event
+ * @type: the type of event to look for
+ *
+ * Returns an event of type @type otherwise NULL
+ */
+struct trace_event *ftrace_find_event(int type)
+{
+ struct trace_event *event;
+ struct hlist_node *n;
+ unsigned key;
+
+ key = type & (EVENT_HASHSIZE - 1);
+
+ hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
+ if (event->type == type)
+ return event;
+ }
+
+ return NULL;
+}
+
+/**
+ * register_ftrace_event - register output for an event type
+ * @event: the event type to register
+ *
+ * Event types are stored in a hash and this hash is used to
+ * find a way to print an event. If the @event->type is set
+ * then it will use that type, otherwise it will assign a
+ * type to use.
+ *
+ * If you assign your own type, please make sure it is added
+ * to the trace_type enum in trace.h, to avoid collisions
+ * with the dynamic types.
+ *
+ * Returns the event type number or zero on error.
+ */
+int register_ftrace_event(struct trace_event *event)
+{
+ unsigned key;
+ int ret = 0;
+
+ mutex_lock(&trace_event_mutex);
+
+ if (!event->type)
+ event->type = next_event_type++;
+ else if (event->type > __TRACE_LAST_TYPE) {
+ printk(KERN_WARNING "Need to add type to trace.h\n");
+ WARN_ON(1);
+ }
+
+ if (ftrace_find_event(event->type))
+ goto out;
+
+ if (event->trace == NULL)
+ event->trace = trace_nop_print;
+ if (event->latency_trace == NULL)
+ event->latency_trace = trace_nop_print;
+ if (event->raw == NULL)
+ event->raw = trace_nop_print;
+ if (event->hex == NULL)
+ event->hex = trace_nop_print;
+ if (event->binary == NULL)
+ event->binary = trace_nop_print;
+
+ key = event->type & (EVENT_HASHSIZE - 1);
+
+ hlist_add_head_rcu(&event->node, &event_hash[key]);
+
+ ret = event->type;
+ out:
+ mutex_unlock(&trace_event_mutex);
+
+ return ret;
+}
+
+/**
+ * unregister_ftrace_event - remove a no longer used event
+ * @event: the event to remove
+ */
+int unregister_ftrace_event(struct trace_event *event)
+{
+ mutex_lock(&trace_event_mutex);
+ hlist_del(&event->node);
+ mutex_unlock(&trace_event_mutex);
+
+ return 0;
+}
+
+/*
+ * Standard events
+ */
+
+enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags)
+{
+ return TRACE_TYPE_HANDLED;
+}
+
+/* TRACE_FN */
+static enum print_line_t trace_fn_latency(struct trace_iterator *iter,
+ int flags)
+{
+ struct ftrace_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!seq_print_ip_sym(s, field->ip, flags))
+ goto partial;
+ if (!trace_seq_puts(s, " ("))
+ goto partial;
+ if (!seq_print_ip_sym(s, field->parent_ip, flags))
+ goto partial;
+ if (!trace_seq_puts(s, ")\n"))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
+{
+ struct ftrace_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!seq_print_ip_sym(s, field->ip, flags))
+ goto partial;
+
+ if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
+ if (!trace_seq_printf(s, " <-"))
+ goto partial;
+ if (!seq_print_ip_sym(s,
+ field->parent_ip,
+ flags))
+ goto partial;
+ }
+ if (!trace_seq_printf(s, "\n"))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
+{
+ struct ftrace_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_printf(&iter->seq, "%lx %lx\n",
+ field->ip,
+ field->parent_ip))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
+{
+ struct ftrace_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+ SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
+{
+ struct ftrace_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_FIELD_RET(s, field->ip);
+ SEQ_PUT_FIELD_RET(s, field->parent_ip);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event trace_fn_event = {
+ .type = TRACE_FN,
+ .trace = trace_fn_trace,
+ .latency_trace = trace_fn_latency,
+ .raw = trace_fn_raw,
+ .hex = trace_fn_hex,
+ .binary = trace_fn_bin,
+};
+
+/* TRACE_CTX an TRACE_WAKE */
+static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
+ char *delim)
+{
+ struct ctx_switch_entry *field;
+ char *comm;
+ int S, T;
+
+ trace_assign_type(field, iter->ent);
+
+ T = task_state_char(field->next_state);
+ S = task_state_char(field->prev_state);
+ comm = trace_find_cmdline(field->next_pid);
+ if (!trace_seq_printf(&iter->seq,
+ " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+ field->prev_pid,
+ field->prev_prio,
+ S, delim,
+ field->next_cpu,
+ field->next_pid,
+ field->next_prio,
+ T, comm))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags)
+{
+ return trace_ctxwake_print(iter, "==>");
+}
+
+static enum print_line_t trace_wake_print(struct trace_iterator *iter,
+ int flags)
+{
+ return trace_ctxwake_print(iter, " +");
+}
+
+static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
+{
+ struct ctx_switch_entry *field;
+ int T;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!S)
+ task_state_char(field->prev_state);
+ T = task_state_char(field->next_state);
+ if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
+ field->prev_pid,
+ field->prev_prio,
+ S,
+ field->next_cpu,
+ field->next_pid,
+ field->next_prio,
+ T))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags)
+{
+ return trace_ctxwake_raw(iter, 0);
+}
+
+static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags)
+{
+ return trace_ctxwake_raw(iter, '+');
+}
+
+
+static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
+{
+ struct ctx_switch_entry *field;
+ struct trace_seq *s = &iter->seq;
+ int T;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!S)
+ task_state_char(field->prev_state);
+ T = task_state_char(field->next_state);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+ SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
+ SEQ_PUT_HEX_FIELD_RET(s, S);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+ SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
+ SEQ_PUT_HEX_FIELD_RET(s, T);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags)
+{
+ return trace_ctxwake_hex(iter, 0);
+}
+
+static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags)
+{
+ return trace_ctxwake_hex(iter, '+');
+}
+
+static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
+ int flags)
+{
+ struct ctx_switch_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_FIELD_RET(s, field->prev_pid);
+ SEQ_PUT_FIELD_RET(s, field->prev_prio);
+ SEQ_PUT_FIELD_RET(s, field->prev_state);
+ SEQ_PUT_FIELD_RET(s, field->next_pid);
+ SEQ_PUT_FIELD_RET(s, field->next_prio);
+ SEQ_PUT_FIELD_RET(s, field->next_state);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event trace_ctx_event = {
+ .type = TRACE_CTX,
+ .trace = trace_ctx_print,
+ .latency_trace = trace_ctx_print,
+ .raw = trace_ctx_raw,
+ .hex = trace_ctx_hex,
+ .binary = trace_ctxwake_bin,
+};
+
+static struct trace_event trace_wake_event = {
+ .type = TRACE_WAKE,
+ .trace = trace_wake_print,
+ .latency_trace = trace_wake_print,
+ .raw = trace_wake_raw,
+ .hex = trace_wake_hex,
+ .binary = trace_ctxwake_bin,
+};
+
+/* TRACE_SPECIAL */
+static enum print_line_t trace_special_print(struct trace_iterator *iter,
+ int flags)
+{
+ struct special_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
+ field->arg1,
+ field->arg2,
+ field->arg3))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_special_hex(struct trace_iterator *iter,
+ int flags)
+{
+ struct special_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+ SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t trace_special_bin(struct trace_iterator *iter,
+ int flags)
+{
+ struct special_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ SEQ_PUT_FIELD_RET(s, field->arg1);
+ SEQ_PUT_FIELD_RET(s, field->arg2);
+ SEQ_PUT_FIELD_RET(s, field->arg3);
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static struct trace_event trace_special_event = {
+ .type = TRACE_SPECIAL,
+ .trace = trace_special_print,
+ .latency_trace = trace_special_print,
+ .raw = trace_special_print,
+ .hex = trace_special_hex,
+ .binary = trace_special_bin,
+};
+
+/* TRACE_STACK */
+
+static enum print_line_t trace_stack_print(struct trace_iterator *iter,
+ int flags)
+{
+ struct stack_entry *field;
+ struct trace_seq *s = &iter->seq;
+ int i;
+
+ trace_assign_type(field, iter->ent);
+
+ for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+ if (i) {
+ if (!trace_seq_puts(s, " <= "))
+ goto partial;
+
+ if (!seq_print_ip_sym(s, field->caller[i], flags))
+ goto partial;
+ }
+ if (!trace_seq_puts(s, "\n"))
+ goto partial;
+ }
+
+ return TRACE_TYPE_HANDLED;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_stack_event = {
+ .type = TRACE_STACK,
+ .trace = trace_stack_print,
+ .latency_trace = trace_stack_print,
+ .raw = trace_special_print,
+ .hex = trace_special_hex,
+ .binary = trace_special_bin,
+};
+
+/* TRACE_USER_STACK */
+static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
+ int flags)
+{
+ struct userstack_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!seq_print_userip_objs(field, s, flags))
+ goto partial;
+
+ if (!trace_seq_putc(s, '\n'))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_user_stack_event = {
+ .type = TRACE_USER_STACK,
+ .trace = trace_user_stack_print,
+ .latency_trace = trace_user_stack_print,
+ .raw = trace_special_print,
+ .hex = trace_special_hex,
+ .binary = trace_special_bin,
+};
+
+/* TRACE_PRINT */
+static enum print_line_t trace_print_print(struct trace_iterator *iter,
+ int flags)
+{
+ struct print_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!seq_print_ip_sym(s, field->ip, flags))
+ goto partial;
+
+ if (!trace_seq_printf(s, ": %s", field->buf))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
+{
+ struct print_entry *field;
+
+ trace_assign_type(field, iter->ent);
+
+ if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_print_event = {
+ .type = TRACE_PRINT,
+ .trace = trace_print_print,
+ .latency_trace = trace_print_print,
+ .raw = trace_print_raw,
+};
+
+static struct trace_event *events[] __initdata = {
+ &trace_fn_event,
+ &trace_ctx_event,
+ &trace_wake_event,
+ &trace_special_event,
+ &trace_stack_event,
+ &trace_user_stack_event,
+ &trace_print_event,
+ NULL
+};
+
+__init static int init_events(void)
+{
+ struct trace_event *event;
+ int i, ret;
+
+ for (i = 0; events[i]; i++) {
+ event = events[i];
+
+ ret = register_ftrace_event(event);
+ if (!ret) {
+ printk(KERN_WARNING "event %d failed to register\n",
+ event->type);
+ WARN_ON_ONCE(1);
+ }
+ }
+
+ return 0;
+}
+device_initcall(init_events);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
new file mode 100644
index 00000000000..551a25a7221
--- /dev/null
+++ b/kernel/trace/trace_output.h
@@ -0,0 +1,62 @@
+#ifndef __TRACE_EVENTS_H
+#define __TRACE_EVENTS_H
+
+#include "trace.h"
+
+typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
+ int flags);
+
+struct trace_event {
+ struct hlist_node node;
+ int type;
+ trace_print_func trace;
+ trace_print_func latency_trace;
+ trace_print_func raw;
+ trace_print_func hex;
+ trace_print_func binary;
+};
+
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+ __attribute__ ((format (printf, 2, 3)));
+extern int
+seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
+ unsigned long sym_flags);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+ size_t cnt);
+int trace_seq_puts(struct trace_seq *s, const char *str);
+int trace_seq_putc(struct trace_seq *s, unsigned char c);
+int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
+int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
+int trace_seq_path(struct trace_seq *s, struct path *path);
+int seq_print_userip_objs(const struct userstack_entry *entry,
+ struct trace_seq *s, unsigned long sym_flags);
+int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+ unsigned long ip, unsigned long sym_flags);
+
+int trace_print_context(struct trace_iterator *iter);
+int trace_print_lat_context(struct trace_iterator *iter);
+
+struct trace_event *ftrace_find_event(int type);
+int register_ftrace_event(struct trace_event *event);
+int unregister_ftrace_event(struct trace_event *event);
+
+enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags);
+
+#define MAX_MEMHEX_BYTES 8
+#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
+
+#define SEQ_PUT_FIELD_RET(s, x) \
+do { \
+ if (!trace_seq_putmem(s, &(x), sizeof(x))) \
+ return TRACE_TYPE_PARTIAL_LINE; \
+} while (0)
+
+#define SEQ_PUT_HEX_FIELD_RET(s, x) \
+do { \
+ BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
+ if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
+ return TRACE_TYPE_PARTIAL_LINE; \
+} while (0)
+
+#endif
+
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 7bda248daf5..91ce672fb03 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -11,24 +11,126 @@
#include <linux/init.h>
#include <linux/debugfs.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
#include "trace.h"
+#include "trace_output.h"
static struct trace_array *power_trace;
static int __read_mostly trace_power_enabled;
+static void probe_power_start(struct power_trace *it, unsigned int type,
+ unsigned int level)
+{
+ if (!trace_power_enabled)
+ return;
+
+ memset(it, 0, sizeof(struct power_trace));
+ it->state = level;
+ it->type = type;
+ it->stamp = ktime_get();
+}
+
+
+static void probe_power_end(struct power_trace *it)
+{
+ struct ring_buffer_event *event;
+ struct trace_power *entry;
+ struct trace_array_cpu *data;
+ struct trace_array *tr = power_trace;
+
+ if (!trace_power_enabled)
+ return;
+
+ preempt_disable();
+ it->end = ktime_get();
+ data = tr->data[smp_processor_id()];
+
+ event = trace_buffer_lock_reserve(tr, TRACE_POWER,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ entry->state_data = *it;
+ trace_buffer_unlock_commit(tr, event, 0, 0);
+ out:
+ preempt_enable();
+}
+
+static void probe_power_mark(struct power_trace *it, unsigned int type,
+ unsigned int level)
+{
+ struct ring_buffer_event *event;
+ struct trace_power *entry;
+ struct trace_array_cpu *data;
+ struct trace_array *tr = power_trace;
+
+ if (!trace_power_enabled)
+ return;
+
+ memset(it, 0, sizeof(struct power_trace));
+ it->state = level;
+ it->type = type;
+ it->stamp = ktime_get();
+ preempt_disable();
+ it->end = it->stamp;
+ data = tr->data[smp_processor_id()];
+
+ event = trace_buffer_lock_reserve(tr, TRACE_POWER,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ entry->state_data = *it;
+ trace_buffer_unlock_commit(tr, event, 0, 0);
+ out:
+ preempt_enable();
+}
+
+static int tracing_power_register(void)
+{
+ int ret;
+
+ ret = register_trace_power_start(probe_power_start);
+ if (ret) {
+ pr_info("power trace: Couldn't activate tracepoint"
+ " probe to trace_power_start\n");
+ return ret;
+ }
+ ret = register_trace_power_end(probe_power_end);
+ if (ret) {
+ pr_info("power trace: Couldn't activate tracepoint"
+ " probe to trace_power_end\n");
+ goto fail_start;
+ }
+ ret = register_trace_power_mark(probe_power_mark);
+ if (ret) {
+ pr_info("power trace: Couldn't activate tracepoint"
+ " probe to trace_power_mark\n");
+ goto fail_end;
+ }
+ return ret;
+fail_end:
+ unregister_trace_power_end(probe_power_end);
+fail_start:
+ unregister_trace_power_start(probe_power_start);
+ return ret;
+}
static void start_power_trace(struct trace_array *tr)
{
trace_power_enabled = 1;
+ tracing_power_register();
}
static void stop_power_trace(struct trace_array *tr)
{
trace_power_enabled = 0;
+ unregister_trace_power_start(probe_power_start);
+ unregister_trace_power_end(probe_power_end);
+ unregister_trace_power_mark(probe_power_mark);
}
@@ -38,6 +140,7 @@ static int power_trace_init(struct trace_array *tr)
power_trace = tr;
trace_power_enabled = 1;
+ tracing_power_register();
for_each_cpu(cpu, cpu_possible_mask)
tracing_reset(tr, cpu);
@@ -94,86 +197,3 @@ static int init_power_trace(void)
return register_tracer(&power_tracer);
}
device_initcall(init_power_trace);
-
-void trace_power_start(struct power_trace *it, unsigned int type,
- unsigned int level)
-{
- if (!trace_power_enabled)
- return;
-
- memset(it, 0, sizeof(struct power_trace));
- it->state = level;
- it->type = type;
- it->stamp = ktime_get();
-}
-EXPORT_SYMBOL_GPL(trace_power_start);
-
-
-void trace_power_end(struct power_trace *it)
-{
- struct ring_buffer_event *event;
- struct trace_power *entry;
- struct trace_array_cpu *data;
- unsigned long irq_flags;
- struct trace_array *tr = power_trace;
-
- if (!trace_power_enabled)
- return;
-
- preempt_disable();
- it->end = ktime_get();
- data = tr->data[smp_processor_id()];
-
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
- if (!event)
- goto out;
- entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, 0);
- entry->ent.type = TRACE_POWER;
- entry->state_data = *it;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-
- trace_wake_up();
-
- out:
- preempt_enable();
-}
-EXPORT_SYMBOL_GPL(trace_power_end);
-
-void trace_power_mark(struct power_trace *it, unsigned int type,
- unsigned int level)
-{
- struct ring_buffer_event *event;
- struct trace_power *entry;
- struct trace_array_cpu *data;
- unsigned long irq_flags;
- struct trace_array *tr = power_trace;
-
- if (!trace_power_enabled)
- return;
-
- memset(it, 0, sizeof(struct power_trace));
- it->state = level;
- it->type = type;
- it->stamp = ktime_get();
- preempt_disable();
- it->end = it->stamp;
- data = tr->data[smp_processor_id()];
-
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
- &irq_flags);
- if (!event)
- goto out;
- entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, 0);
- entry->ent.type = TRACE_POWER;
- entry->state_data = *it;
- ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
-
- trace_wake_up();
-
- out:
- preempt_enable();
-}
-EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index df175cb4564..77132c2cf3d 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -43,7 +43,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
data = ctx_trace->data[cpu];
if (likely(!atomic_read(&data->disabled)))
- tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
+ tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
local_irq_restore(flags);
}
@@ -66,7 +66,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
data = ctx_trace->data[cpu];
if (likely(!atomic_read(&data->disabled)))
- tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+ tracing_sched_wakeup_trace(ctx_trace, wakee, current,
flags, pc);
local_irq_restore(flags);
@@ -93,7 +93,7 @@ static int tracing_sched_register(void)
ret = register_trace_sched_switch(probe_sched_switch);
if (ret) {
pr_info("sched trace: Couldn't activate tracepoint"
- " probe to kernel_sched_schedule\n");
+ " probe to kernel_sched_switch\n");
goto fail_deprobe_wake_new;
}
@@ -185,12 +185,6 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
ctx_trace = tr;
}
-static void start_sched_trace(struct trace_array *tr)
-{
- tracing_reset_online_cpus(tr);
- tracing_start_sched_switch_record();
-}
-
static void stop_sched_trace(struct trace_array *tr)
{
tracing_stop_sched_switch_record();
@@ -199,7 +193,7 @@ static void stop_sched_trace(struct trace_array *tr)
static int sched_switch_trace_init(struct trace_array *tr)
{
ctx_trace = tr;
- start_sched_trace(tr);
+ tracing_start_sched_switch_record();
return 0;
}
@@ -227,6 +221,7 @@ static struct tracer sched_switch_trace __read_mostly =
.reset = sched_switch_trace_reset,
.start = sched_switch_trace_start,
.stop = sched_switch_trace_stop,
+ .wait_pipe = poll_wait_pipe,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_sched_switch,
#endif
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 42ae1e77b6b..db55f7aaa64 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -25,6 +25,7 @@ static int __read_mostly tracer_enabled;
static struct task_struct *wakeup_task;
static int wakeup_cpu;
static unsigned wakeup_prio = -1;
+static int wakeup_rt;
static raw_spinlock_t wakeup_lock =
(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
@@ -71,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
if (task_cpu(wakeup_task) != cpu)
goto unlock;
- trace_function(tr, data, ip, parent_ip, flags, pc);
+ trace_function(tr, ip, parent_ip, flags, pc);
unlock:
__raw_spin_unlock(&wakeup_lock);
@@ -151,7 +152,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
- trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
/*
* usecs conversion is slow so we try to delay the conversion
@@ -182,13 +184,10 @@ out:
static void __wakeup_reset(struct trace_array *tr)
{
- struct trace_array_cpu *data;
int cpu;
- for_each_possible_cpu(cpu) {
- data = tr->data[cpu];
+ for_each_possible_cpu(cpu)
tracing_reset(tr, cpu);
- }
wakeup_cpu = -1;
wakeup_prio = -1;
@@ -213,6 +212,7 @@ static void wakeup_reset(struct trace_array *tr)
static void
probe_wakeup(struct rq *rq, struct task_struct *p, int success)
{
+ struct trace_array_cpu *data;
int cpu = smp_processor_id();
unsigned long flags;
long disabled;
@@ -224,7 +224,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
tracing_record_cmdline(p);
tracing_record_cmdline(current);
- if (likely(!rt_task(p)) ||
+ if ((wakeup_rt && !rt_task(p)) ||
p->prio >= wakeup_prio ||
p->prio >= current->prio)
return;
@@ -252,9 +252,10 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
local_save_flags(flags);
- wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
- trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
- CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ data = wakeup_trace->data[wakeup_cpu];
+ data->preempt_timestamp = ftrace_now(cpu);
+ tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
+ trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
out_locked:
__raw_spin_unlock(&wakeup_lock);
@@ -262,12 +263,6 @@ out:
atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
-/*
- * save_tracer_enabled is used to save the state of the tracer_enabled
- * variable when we disable it when we open a trace output file.
- */
-static int save_tracer_enabled;
-
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
@@ -289,7 +284,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
ret = register_trace_sched_switch(probe_wakeup_sched_switch);
if (ret) {
pr_info("sched trace: Couldn't activate tracepoint"
- " probe to kernel_sched_schedule\n");
+ " probe to kernel_sched_switch\n");
goto fail_deprobe_wake_new;
}
@@ -306,13 +301,10 @@ static void start_wakeup_tracer(struct trace_array *tr)
register_ftrace_function(&trace_ops);
- if (tracing_is_enabled()) {
+ if (tracing_is_enabled())
tracer_enabled = 1;
- save_tracer_enabled = 1;
- } else {
+ else
tracer_enabled = 0;
- save_tracer_enabled = 0;
- }
return;
fail_deprobe_wake_new:
@@ -324,14 +316,13 @@ fail_deprobe:
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
- save_tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
unregister_trace_sched_switch(probe_wakeup_sched_switch);
unregister_trace_sched_wakeup_new(probe_wakeup);
unregister_trace_sched_wakeup(probe_wakeup);
}
-static int wakeup_tracer_init(struct trace_array *tr)
+static int __wakeup_tracer_init(struct trace_array *tr)
{
tracing_max_latency = 0;
wakeup_trace = tr;
@@ -339,6 +330,18 @@ static int wakeup_tracer_init(struct trace_array *tr)
return 0;
}
+static int wakeup_tracer_init(struct trace_array *tr)
+{
+ wakeup_rt = 0;
+ return __wakeup_tracer_init(tr);
+}
+
+static int wakeup_rt_tracer_init(struct trace_array *tr)
+{
+ wakeup_rt = 1;
+ return __wakeup_tracer_init(tr);
+}
+
static void wakeup_tracer_reset(struct trace_array *tr)
{
stop_wakeup_tracer(tr);
@@ -350,28 +353,11 @@ static void wakeup_tracer_start(struct trace_array *tr)
{
wakeup_reset(tr);
tracer_enabled = 1;
- save_tracer_enabled = 1;
}
static void wakeup_tracer_stop(struct trace_array *tr)
{
tracer_enabled = 0;
- save_tracer_enabled = 0;
-}
-
-static void wakeup_tracer_open(struct trace_iterator *iter)
-{
- /* stop the trace while dumping */
- tracer_enabled = 0;
-}
-
-static void wakeup_tracer_close(struct trace_iterator *iter)
-{
- /* forget about any processes we were recording */
- if (save_tracer_enabled) {
- wakeup_reset(iter->tr);
- tracer_enabled = 1;
- }
}
static struct tracer wakeup_tracer __read_mostly =
@@ -381,8 +367,20 @@ static struct tracer wakeup_tracer __read_mostly =
.reset = wakeup_tracer_reset,
.start = wakeup_tracer_start,
.stop = wakeup_tracer_stop,
- .open = wakeup_tracer_open,
- .close = wakeup_tracer_close,
+ .print_max = 1,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_wakeup,
+#endif
+};
+
+static struct tracer wakeup_rt_tracer __read_mostly =
+{
+ .name = "wakeup_rt",
+ .init = wakeup_rt_tracer_init,
+ .reset = wakeup_tracer_reset,
+ .start = wakeup_tracer_start,
+ .stop = wakeup_tracer_stop,
+ .wait_pipe = poll_wait_pipe,
.print_max = 1,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_wakeup,
@@ -397,6 +395,10 @@ __init static int init_wakeup_tracer(void)
if (ret)
return ret;
+ ret = register_tracer(&wakeup_rt_tracer);
+ if (ret)
+ return ret;
+
return 0;
}
device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index bc8e80a86bc..7238646b872 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,11 +9,12 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_FN:
case TRACE_CTX:
case TRACE_WAKE:
- case TRACE_CONT:
case TRACE_STACK:
case TRACE_PRINT:
case TRACE_SPECIAL:
case TRACE_BRANCH:
+ case TRACE_GRAPH_ENT:
+ case TRACE_GRAPH_RET:
return 1;
}
return 0;
@@ -125,9 +126,9 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
func();
/*
- * Some archs *cough*PowerPC*cough* add charachters to the
+ * Some archs *cough*PowerPC*cough* add characters to the
* start of the function names. We simply put a '*' to
- * accomodate them.
+ * accommodate them.
*/
func_name = "*" STR(DYN_FTRACE_TEST_NAME);
@@ -135,7 +136,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
ftrace_set_filter(func_name, strlen(func_name), 1);
/* enable tracing */
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
goto out;
@@ -209,7 +210,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
ftrace_enabled = 1;
tracer_enabled = 1;
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
goto out;
@@ -247,6 +248,54 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
}
#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * Pretty much the same than for the function tracer from which the selftest
+ * has been borrowed.
+ */
+int
+trace_selftest_startup_function_graph(struct tracer *trace,
+ struct trace_array *tr)
+{
+ int ret;
+ unsigned long count;
+
+ ret = tracer_init(trace, tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ goto out;
+ }
+
+ /* Sleep for a 1/10 of a second */
+ msleep(100);
+
+ tracing_stop();
+
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+
+ trace->reset(tr);
+ tracing_start();
+
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ goto out;
+ }
+
+ /* Don't test dynamic tracing, the function tracer already did */
+
+out:
+ /* Stop it if we failed */
+ if (ret)
+ ftrace_graph_stop();
+
+ return ret;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+
#ifdef CONFIG_IRQSOFF_TRACER
int
trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
@@ -256,7 +305,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
@@ -310,7 +359,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
}
/* start the tracing */
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
@@ -364,7 +413,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
}
/* start the tracing */
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
goto out;
@@ -496,7 +545,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
wait_for_completion(&isrt);
/* start the tracing */
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
@@ -557,7 +606,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
int ret;
/* start the tracing */
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
@@ -589,10 +638,10 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
- return 0;
+ return ret;
}
/* Sleep for a 1/10 of a second */
@@ -604,6 +653,11 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
trace->reset(tr);
tracing_start();
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
return ret;
}
#endif /* CONFIG_SYSPROF_TRACER */
@@ -616,7 +670,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- ret = trace->init(tr);
+ ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
@@ -631,6 +685,11 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
trace->reset(tr);
tracing_start();
+ if (!ret && !count) {
+ printk(KERN_CONT ".. no entries found ..");
+ ret = -1;
+ }
+
return ret;
}
#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
new file mode 100644
index 00000000000..39310e3434e
--- /dev/null
+++ b/kernel/trace/trace_stat.c
@@ -0,0 +1,319 @@
+/*
+ * Infrastructure for statistic tracing (histogram output).
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Based on the code from trace_branch.c which is
+ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+
+#include <linux/list.h>
+#include <linux/debugfs.h>
+#include "trace_stat.h"
+#include "trace.h"
+
+
+/* List of stat entries from a tracer */
+struct trace_stat_list {
+ struct list_head list;
+ void *stat;
+};
+
+/* A stat session is the stats output in one file */
+struct tracer_stat_session {
+ struct list_head session_list;
+ struct tracer_stat *ts;
+ struct list_head stat_list;
+ struct mutex stat_mutex;
+ struct dentry *file;
+};
+
+/* All of the sessions currently in use. Each stat file embed one session */
+static LIST_HEAD(all_stat_sessions);
+static DEFINE_MUTEX(all_stat_sessions_mutex);
+
+/* The root directory for all stat files */
+static struct dentry *stat_dir;
+
+
+static void reset_stat_session(struct tracer_stat_session *session)
+{
+ struct trace_stat_list *node, *next;
+
+ list_for_each_entry_safe(node, next, &session->stat_list, list)
+ kfree(node);
+
+ INIT_LIST_HEAD(&session->stat_list);
+}
+
+static void destroy_session(struct tracer_stat_session *session)
+{
+ debugfs_remove(session->file);
+ reset_stat_session(session);
+ mutex_destroy(&session->stat_mutex);
+ kfree(session);
+}
+
+/*
+ * For tracers that don't provide a stat_cmp callback.
+ * This one will force an immediate insertion on tail of
+ * the list.
+ */
+static int dummy_cmp(void *p1, void *p2)
+{
+ return 1;
+}
+
+/*
+ * Initialize the stat list at each trace_stat file opening.
+ * All of these copies and sorting are required on all opening
+ * since the stats could have changed between two file sessions.
+ */
+static int stat_seq_init(struct tracer_stat_session *session)
+{
+ struct trace_stat_list *iter_entry, *new_entry;
+ struct tracer_stat *ts = session->ts;
+ void *prev_stat;
+ int ret = 0;
+ int i;
+
+ mutex_lock(&session->stat_mutex);
+ reset_stat_session(session);
+
+ if (!ts->stat_cmp)
+ ts->stat_cmp = dummy_cmp;
+
+ /*
+ * The first entry. Actually this is the second, but the first
+ * one (the stat_list head) is pointless.
+ */
+ new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
+ if (!new_entry) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ INIT_LIST_HEAD(&new_entry->list);
+
+ list_add(&new_entry->list, &session->stat_list);
+
+ new_entry->stat = ts->stat_start();
+ prev_stat = new_entry->stat;
+
+ /*
+ * Iterate over the tracer stat entries and store them in a sorted
+ * list.
+ */
+ for (i = 1; ; i++) {
+ new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
+ if (!new_entry) {
+ ret = -ENOMEM;
+ goto exit_free_list;
+ }
+
+ INIT_LIST_HEAD(&new_entry->list);
+ new_entry->stat = ts->stat_next(prev_stat, i);
+
+ /* End of insertion */
+ if (!new_entry->stat)
+ break;
+
+ list_for_each_entry(iter_entry, &session->stat_list, list) {
+
+ /* Insertion with a descendent sorting */
+ if (ts->stat_cmp(new_entry->stat,
+ iter_entry->stat) > 0) {
+
+ list_add_tail(&new_entry->list,
+ &iter_entry->list);
+ break;
+
+ /* The current smaller value */
+ } else if (list_is_last(&iter_entry->list,
+ &session->stat_list)) {
+ list_add(&new_entry->list, &iter_entry->list);
+ break;
+ }
+ }
+
+ prev_stat = new_entry->stat;
+ }
+exit:
+ mutex_unlock(&session->stat_mutex);
+ return ret;
+
+exit_free_list:
+ reset_stat_session(session);
+ mutex_unlock(&session->stat_mutex);
+ return ret;
+}
+
+
+static void *stat_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct tracer_stat_session *session = s->private;
+
+ /* Prevent from tracer switch or stat_list modification */
+ mutex_lock(&session->stat_mutex);
+
+ /* If we are in the beginning of the file, print the headers */
+ if (!*pos && session->ts->stat_headers)
+ session->ts->stat_headers(s);
+
+ return seq_list_start(&session->stat_list, *pos);
+}
+
+static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
+{
+ struct tracer_stat_session *session = s->private;
+
+ return seq_list_next(p, &session->stat_list, pos);
+}
+
+static void stat_seq_stop(struct seq_file *s, void *p)
+{
+ struct tracer_stat_session *session = s->private;
+ mutex_unlock(&session->stat_mutex);
+}
+
+static int stat_seq_show(struct seq_file *s, void *v)
+{
+ struct tracer_stat_session *session = s->private;
+ struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
+
+ return session->ts->stat_show(s, l->stat);
+}
+
+static const struct seq_operations trace_stat_seq_ops = {
+ .start = stat_seq_start,
+ .next = stat_seq_next,
+ .stop = stat_seq_stop,
+ .show = stat_seq_show
+};
+
+/* The session stat is refilled and resorted at each stat file opening */
+static int tracing_stat_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ struct tracer_stat_session *session = inode->i_private;
+
+ ret = seq_open(file, &trace_stat_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = session;
+ ret = stat_seq_init(session);
+ }
+
+ return ret;
+}
+
+/*
+ * Avoid consuming memory with our now useless list.
+ */
+static int tracing_stat_release(struct inode *i, struct file *f)
+{
+ struct tracer_stat_session *session = i->i_private;
+
+ mutex_lock(&session->stat_mutex);
+ reset_stat_session(session);
+ mutex_unlock(&session->stat_mutex);
+
+ return 0;
+}
+
+static const struct file_operations tracing_stat_fops = {
+ .open = tracing_stat_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_stat_release
+};
+
+static int tracing_stat_init(void)
+{
+ struct dentry *d_tracing;
+
+ d_tracing = tracing_init_dentry();
+
+ stat_dir = debugfs_create_dir("trace_stat", d_tracing);
+ if (!stat_dir)
+ pr_warning("Could not create debugfs "
+ "'trace_stat' entry\n");
+ return 0;
+}
+
+static int init_stat_file(struct tracer_stat_session *session)
+{
+ if (!stat_dir && tracing_stat_init())
+ return -ENODEV;
+
+ session->file = debugfs_create_file(session->ts->name, 0644,
+ stat_dir,
+ session, &tracing_stat_fops);
+ if (!session->file)
+ return -ENOMEM;
+ return 0;
+}
+
+int register_stat_tracer(struct tracer_stat *trace)
+{
+ struct tracer_stat_session *session, *node, *tmp;
+ int ret;
+
+ if (!trace)
+ return -EINVAL;
+
+ if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
+ return -EINVAL;
+
+ /* Already registered? */
+ mutex_lock(&all_stat_sessions_mutex);
+ list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
+ if (node->ts == trace) {
+ mutex_unlock(&all_stat_sessions_mutex);
+ return -EINVAL;
+ }
+ }
+ mutex_unlock(&all_stat_sessions_mutex);
+
+ /* Init the session */
+ session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
+ if (!session)
+ return -ENOMEM;
+
+ session->ts = trace;
+ INIT_LIST_HEAD(&session->session_list);
+ INIT_LIST_HEAD(&session->stat_list);
+ mutex_init(&session->stat_mutex);
+ session->file = NULL;
+
+ ret = init_stat_file(session);
+ if (ret) {
+ destroy_session(session);
+ return ret;
+ }
+
+ /* Register */
+ mutex_lock(&all_stat_sessions_mutex);
+ list_add_tail(&session->session_list, &all_stat_sessions);
+ mutex_unlock(&all_stat_sessions_mutex);
+
+ return 0;
+}
+
+void unregister_stat_tracer(struct tracer_stat *trace)
+{
+ struct tracer_stat_session *node, *tmp;
+
+ mutex_lock(&all_stat_sessions_mutex);
+ list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
+ if (node->ts == trace) {
+ list_del(&node->session_list);
+ destroy_session(node);
+ break;
+ }
+ }
+ mutex_unlock(&all_stat_sessions_mutex);
+}
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
new file mode 100644
index 00000000000..202274cf7f3
--- /dev/null
+++ b/kernel/trace/trace_stat.h
@@ -0,0 +1,31 @@
+#ifndef __TRACE_STAT_H
+#define __TRACE_STAT_H
+
+#include <linux/seq_file.h>
+
+/*
+ * If you want to provide a stat file (one-shot statistics), fill
+ * an iterator with stat_start/stat_next and a stat_show callbacks.
+ * The others callbacks are optional.
+ */
+struct tracer_stat {
+ /* The name of your stat file */
+ const char *name;
+ /* Iteration over statistic entries */
+ void *(*stat_start)(void);
+ void *(*stat_next)(void *prev, int idx);
+ /* Compare two entries for stats sorting */
+ int (*stat_cmp)(void *p1, void *p2);
+ /* Print a stat entry */
+ int (*stat_show)(struct seq_file *s, void *p);
+ /* Print the headers of your stat entries */
+ int (*stat_headers)(struct seq_file *s);
+};
+
+/*
+ * Destroy or create a stat file
+ */
+extern int register_stat_tracer(struct tracer_stat *trace);
+extern void unregister_stat_tracer(struct tracer_stat *trace);
+
+#endif /* __TRACE_STAT_H */
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index eaca5ad803f..c771af4e8f1 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -88,7 +88,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
}
}
-const static struct stacktrace_ops backtrace_ops = {
+static const struct stacktrace_ops backtrace_ops = {
.warning = backtrace_warning,
.warning_symbol = backtrace_warning_symbol,
.stack = backtrace_stack,
@@ -226,15 +226,6 @@ static void stop_stack_timers(void)
stop_stack_timer(cpu);
}
-static void start_stack_trace(struct trace_array *tr)
-{
- mutex_lock(&sample_timer_lock);
- tracing_reset_online_cpus(tr);
- start_stack_timers();
- tracer_enabled = 1;
- mutex_unlock(&sample_timer_lock);
-}
-
static void stop_stack_trace(struct trace_array *tr)
{
mutex_lock(&sample_timer_lock);
@@ -247,12 +238,18 @@ static int stack_trace_init(struct trace_array *tr)
{
sysprof_trace = tr;
- start_stack_trace(tr);
+ tracing_start_cmdline_record();
+
+ mutex_lock(&sample_timer_lock);
+ start_stack_timers();
+ tracer_enabled = 1;
+ mutex_unlock(&sample_timer_lock);
return 0;
}
static void stack_trace_reset(struct trace_array *tr)
{
+ tracing_stop_cmdline_record();
stop_stack_trace(tr);
}
@@ -330,5 +327,5 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
d_tracer, NULL, &sysprof_sample_fops);
if (entry)
return;
- pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n");
+ pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
new file mode 100644
index 00000000000..4664990fe9c
--- /dev/null
+++ b/kernel/trace/trace_workqueue.c
@@ -0,0 +1,281 @@
+/*
+ * Workqueue statistical tracer.
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ */
+
+
+#include <trace/workqueue.h>
+#include <linux/list.h>
+#include <linux/percpu.h>
+#include "trace_stat.h"
+#include "trace.h"
+
+
+/* A cpu workqueue thread */
+struct cpu_workqueue_stats {
+ struct list_head list;
+/* Useful to know if we print the cpu headers */
+ bool first_entry;
+ int cpu;
+ pid_t pid;
+/* Can be inserted from interrupt or user context, need to be atomic */
+ atomic_t inserted;
+/*
+ * Don't need to be atomic, works are serialized in a single workqueue thread
+ * on a single CPU.
+ */
+ unsigned int executed;
+};
+
+/* List of workqueue threads on one cpu */
+struct workqueue_global_stats {
+ struct list_head list;
+ spinlock_t lock;
+};
+
+/* Don't need a global lock because allocated before the workqueues, and
+ * never freed.
+ */
+static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
+#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
+
+/* Insertion of a work */
+static void
+probe_workqueue_insertion(struct task_struct *wq_thread,
+ struct work_struct *work)
+{
+ int cpu = cpumask_first(&wq_thread->cpus_allowed);
+ struct cpu_workqueue_stats *node, *next;
+ unsigned long flags;
+
+ spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+ list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
+ list) {
+ if (node->pid == wq_thread->pid) {
+ atomic_inc(&node->inserted);
+ goto found;
+ }
+ }
+ pr_debug("trace_workqueue: entry not found\n");
+found:
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+}
+
+/* Execution of a work */
+static void
+probe_workqueue_execution(struct task_struct *wq_thread,
+ struct work_struct *work)
+{
+ int cpu = cpumask_first(&wq_thread->cpus_allowed);
+ struct cpu_workqueue_stats *node, *next;
+ unsigned long flags;
+
+ spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+ list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
+ list) {
+ if (node->pid == wq_thread->pid) {
+ node->executed++;
+ goto found;
+ }
+ }
+ pr_debug("trace_workqueue: entry not found\n");
+found:
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+}
+
+/* Creation of a cpu workqueue thread */
+static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
+{
+ struct cpu_workqueue_stats *cws;
+ unsigned long flags;
+
+ WARN_ON(cpu < 0 || cpu >= num_possible_cpus());
+
+ /* Workqueues are sometimes created in atomic context */
+ cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
+ if (!cws) {
+ pr_warning("trace_workqueue: not enough memory\n");
+ return;
+ }
+ tracing_record_cmdline(wq_thread);
+
+ INIT_LIST_HEAD(&cws->list);
+ cws->cpu = cpu;
+
+ cws->pid = wq_thread->pid;
+
+ spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+ if (list_empty(&workqueue_cpu_stat(cpu)->list))
+ cws->first_entry = true;
+ list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+}
+
+/* Destruction of a cpu workqueue thread */
+static void probe_workqueue_destruction(struct task_struct *wq_thread)
+{
+ /* Workqueue only execute on one cpu */
+ int cpu = cpumask_first(&wq_thread->cpus_allowed);
+ struct cpu_workqueue_stats *node, *next;
+ unsigned long flags;
+
+ spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+ list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
+ list) {
+ if (node->pid == wq_thread->pid) {
+ list_del(&node->list);
+ kfree(node);
+ goto found;
+ }
+ }
+
+ pr_debug("trace_workqueue: don't find workqueue to destroy\n");
+found:
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+
+}
+
+static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
+{
+ unsigned long flags;
+ struct cpu_workqueue_stats *ret = NULL;
+
+
+ spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+
+ if (!list_empty(&workqueue_cpu_stat(cpu)->list))
+ ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
+ struct cpu_workqueue_stats, list);
+
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+
+ return ret;
+}
+
+static void *workqueue_stat_start(void)
+{
+ int cpu;
+ void *ret = NULL;
+
+ for_each_possible_cpu(cpu) {
+ ret = workqueue_stat_start_cpu(cpu);
+ if (ret)
+ return ret;
+ }
+ return NULL;
+}
+
+static void *workqueue_stat_next(void *prev, int idx)
+{
+ struct cpu_workqueue_stats *prev_cws = prev;
+ int cpu = prev_cws->cpu;
+ unsigned long flags;
+ void *ret = NULL;
+
+ spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+ if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+ for (++cpu ; cpu < num_possible_cpus(); cpu++) {
+ ret = workqueue_stat_start_cpu(cpu);
+ if (ret)
+ return ret;
+ }
+ return NULL;
+ }
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+
+ return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
+ list);
+}
+
+static int workqueue_stat_show(struct seq_file *s, void *p)
+{
+ struct cpu_workqueue_stats *cws = p;
+ unsigned long flags;
+ int cpu = cws->cpu;
+
+ seq_printf(s, "%3d %6d %6u %s\n", cws->cpu,
+ atomic_read(&cws->inserted),
+ cws->executed,
+ trace_find_cmdline(cws->pid));
+
+ spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+ if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
+ seq_printf(s, "\n");
+ spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
+
+ return 0;
+}
+
+static int workqueue_stat_headers(struct seq_file *s)
+{
+ seq_printf(s, "# CPU INSERTED EXECUTED NAME\n");
+ seq_printf(s, "# | | | |\n\n");
+ return 0;
+}
+
+struct tracer_stat workqueue_stats __read_mostly = {
+ .name = "workqueues",
+ .stat_start = workqueue_stat_start,
+ .stat_next = workqueue_stat_next,
+ .stat_show = workqueue_stat_show,
+ .stat_headers = workqueue_stat_headers
+};
+
+
+int __init stat_workqueue_init(void)
+{
+ if (register_stat_tracer(&workqueue_stats)) {
+ pr_warning("Unable to register workqueue stat tracer\n");
+ return 1;
+ }
+
+ return 0;
+}
+fs_initcall(stat_workqueue_init);
+
+/*
+ * Workqueues are created very early, just after pre-smp initcalls.
+ * So we must register our tracepoints at this stage.
+ */
+int __init trace_workqueue_early_init(void)
+{
+ int ret, cpu;
+
+ ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
+ if (ret)
+ goto out;
+
+ ret = register_trace_workqueue_execution(probe_workqueue_execution);
+ if (ret)
+ goto no_insertion;
+
+ ret = register_trace_workqueue_creation(probe_workqueue_creation);
+ if (ret)
+ goto no_execution;
+
+ ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
+ if (ret)
+ goto no_creation;
+
+ for_each_possible_cpu(cpu) {
+ spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
+ INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
+ }
+
+ return 0;
+
+no_creation:
+ unregister_trace_workqueue_creation(probe_workqueue_creation);
+no_execution:
+ unregister_trace_workqueue_execution(probe_workqueue_execution);
+no_insertion:
+ unregister_trace_workqueue_insertion(probe_workqueue_insertion);
+out:
+ pr_warning("trace_workqueue: unable to trace workqueues\n");
+
+ return 1;
+}
+early_initcall(trace_workqueue_early_init);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1f0c509b40d..e53ee18ef43 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -33,6 +33,7 @@
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
+#include <trace/workqueue.h>
/*
* The per-CPU workqueue (if single thread, we always use the first
@@ -125,9 +126,13 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
}
+DEFINE_TRACE(workqueue_insertion);
+
static void insert_work(struct cpu_workqueue_struct *cwq,
struct work_struct *work, struct list_head *head)
{
+ trace_workqueue_insertion(cwq->thread, work);
+
set_wq_data(work, cwq);
/*
* Ensure that we get the right work->data if we see the
@@ -259,6 +264,8 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+DEFINE_TRACE(workqueue_execution);
+
static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
spin_lock_irq(&cwq->lock);
@@ -284,7 +291,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
*/
struct lockdep_map lockdep_map = work->lockdep_map;
#endif
-
+ trace_workqueue_execution(cwq->thread, work);
cwq->current_work = work;
list_del_init(cwq->worklist.next);
spin_unlock_irq(&cwq->lock);
@@ -765,6 +772,8 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
return cwq;
}
+DEFINE_TRACE(workqueue_creation);
+
static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -787,6 +796,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
cwq->thread = p;
+ trace_workqueue_creation(cwq->thread, cpu);
+
return 0;
}
@@ -868,6 +879,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
}
EXPORT_SYMBOL_GPL(__create_workqueue_key);
+DEFINE_TRACE(workqueue_destruction);
+
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
{
/*
@@ -891,6 +904,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
* checks list_empty(), and a "normal" queue_work() can't use
* a dead CPU.
*/
+ trace_workqueue_destruction(cwq->thread);
kthread_stop(cwq->thread);
cwq->thread = NULL;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 037161d61b4..cbe9e0581b7 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -660,7 +660,7 @@ void *alloc_locked_buffer(size_t size)
return buffer;
}
-void free_locked_buffer(void *buffer, size_t size)
+void release_locked_buffer(void *buffer, size_t size)
{
unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -670,6 +670,11 @@ void free_locked_buffer(void *buffer, size_t size)
current->mm->locked_vm -= pgsz;
up_write(&current->mm->mmap_sem);
+}
+
+void free_locked_buffer(void *buffer, size_t size)
+{
+ release_locked_buffer(buffer, size);
kfree(buffer);
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3c84128596b..74dc57c7434 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -240,7 +240,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL_GPL(bdi_writeout_inc);
-static inline void task_dirty_inc(struct task_struct *tsk)
+void task_dirty_inc(struct task_struct *tsk)
{
prop_inc_single(&vm_dirties, &tsk->dirties);
}
@@ -1230,6 +1230,7 @@ int __set_page_dirty_nobuffers(struct page *page)
__inc_zone_page_state(page, NR_FILE_DIRTY);
__inc_bdi_stat(mapping->backing_dev_info,
BDI_RECLAIMABLE);
+ task_dirty_inc(current);
task_io_account_write(PAGE_CACHE_SIZE);
}
radix_tree_tag_set(&mapping->page_tree,
@@ -1262,7 +1263,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
-static int __set_page_dirty(struct page *page)
+int set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);
@@ -1280,14 +1281,6 @@ static int __set_page_dirty(struct page *page)
}
return 0;
}
-
-int set_page_dirty(struct page *page)
-{
- int ret = __set_page_dirty(page);
- if (ret)
- task_dirty_inc(current);
- return ret;
-}
EXPORT_SYMBOL(set_page_dirty);
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5675b307385..5c44ed49ca9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
* was used and there are no special requirements, this is a convenient
* alternative
*/
-int __meminit early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn)
{
int i;
@@ -3000,10 +3000,33 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
if (start_pfn <= pfn && pfn < end_pfn)
return early_node_map[i].nid;
}
+ /* This is a memory hole */
+ return -1;
+}
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+
+int __meminit early_pfn_to_nid(unsigned long pfn)
+{
+ int nid;
+ nid = __early_pfn_to_nid(pfn);
+ if (nid >= 0)
+ return nid;
+ /* just returns 0 */
return 0;
}
-#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+
+#ifdef CONFIG_NODES_SPAN_OTHER_NODES
+bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+{
+ int nid;
+
+ nid = __early_pfn_to_nid(pfn);
+ if (nid >= 0 && nid != node)
+ return false;
+ return true;
+}
+#endif
/* Basic iterator support to walk early_node_map[] */
#define for_each_active_range_index_in_nid(i, nid) \
diff --git a/mm/page_io.c b/mm/page_io.c
index dc6ce0afbde..3023c475e04 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -111,7 +111,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
goto out;
}
if (wbc->sync_mode == WB_SYNC_ALL)
- rw |= (1 << BIO_RW_SYNC);
+ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
count_vm_event(PSWPOUT);
set_page_writeback(page);
unlock_page(page);
diff --git a/mm/slab.c b/mm/slab.c
index 4d00855629c..aeeb4ecb942 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,6 +102,7 @@
#include <linux/cpu.h>
#include <linux/sysctl.h>
#include <linux/module.h>
+#include <trace/kmemtrace.h>
#include <linux/rcupdate.h>
#include <linux/string.h>
#include <linux/uaccess.h>
@@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#endif
+#ifdef CONFIG_KMEMTRACE
+size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+ return cachep->buffer_size;
+}
+EXPORT_SYMBOL(slab_buffer_size);
+#endif
+
/*
* Do not go above this order unless 0 objects fit into the slab.
*/
@@ -3550,10 +3559,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- return __cache_alloc(cachep, flags, __builtin_return_address(0));
+ void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+ obj_size(cachep), cachep->buffer_size, flags);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+{
+ return __cache_alloc(cachep, flags, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
/**
* kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
* @cachep: the cache we're checking against
@@ -3598,23 +3620,47 @@ out:
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
- return __cache_alloc_node(cachep, flags, nodeid,
- __builtin_return_address(0));
+ void *ret = __cache_alloc_node(cachep, flags, nodeid,
+ __builtin_return_address(0));
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+ obj_size(cachep), cachep->buffer_size,
+ flags, nodeid);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid)
+{
+ return __cache_alloc_node(cachep, flags, nodeid,
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
{
struct kmem_cache *cachep;
+ void *ret;
cachep = kmem_find_general_cachep(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- return kmem_cache_alloc_node(cachep, flags, node);
+ ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ (unsigned long) caller, ret,
+ size, cachep->buffer_size, flags, node);
+
+ return ret;
}
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node,
@@ -3647,6 +3693,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
void *caller)
{
struct kmem_cache *cachep;
+ void *ret;
/* If you want to save a few bytes .text space: replace
* __ with kmem_.
@@ -3656,11 +3703,17 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
cachep = __find_general_cachep(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- return __cache_alloc(cachep, flags, caller);
+ ret = __cache_alloc(cachep, flags, caller);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+ (unsigned long) caller, ret,
+ size, cachep->buffer_size, flags);
+
+ return ret;
}
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc(size, flags, __builtin_return_address(0));
@@ -3699,6 +3752,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
debug_check_no_obj_freed(objp, obj_size(cachep));
__cache_free(cachep, objp);
local_irq_restore(flags);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3725,6 +3780,8 @@ void kfree(const void *objp)
debug_check_no_obj_freed(objp, obj_size(c));
__cache_free(c, (void *)objp);
local_irq_restore(flags);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp);
}
EXPORT_SYMBOL(kfree);
diff --git a/mm/slob.c b/mm/slob.c
index 52bc8a2bd9e..f9cc2468823 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,6 +65,7 @@
#include <linux/module.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
+#include <trace/kmemtrace.h>
#include <asm/atomic.h>
/*
@@ -463,27 +464,38 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
unsigned int *m;
int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+ void *ret;
if (size < PAGE_SIZE - align) {
if (!size)
return ZERO_SIZE_PTR;
m = slob_alloc(size + align, gfp, align, node);
+
if (!m)
return NULL;
*m = size;
- return (void *)m + align;
+ ret = (void *)m + align;
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _RET_IP_, ret,
+ size, size + align, gfp, node);
} else {
- void *ret;
+ unsigned int order = get_order(size);
- ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
+ ret = slob_new_page(gfp | __GFP_COMP, order, node);
if (ret) {
struct page *page;
page = virt_to_page(ret);
page->private = size;
}
- return ret;
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _RET_IP_, ret,
+ size, PAGE_SIZE << order, gfp, node);
}
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc_node);
@@ -501,6 +513,8 @@ void kfree(const void *block)
slob_free(m, *m + align);
} else
put_page(&sp->page);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block);
}
EXPORT_SYMBOL(kfree);
@@ -570,10 +584,19 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
{
void *b;
- if (c->size < PAGE_SIZE)
+ if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node);
- else
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+ _RET_IP_, b, c->size,
+ SLOB_UNITS(c->size) * SLOB_UNIT,
+ flags, node);
+ } else {
b = slob_new_page(flags, get_order(c->size), node);
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+ _RET_IP_, b, c->size,
+ PAGE_SIZE << get_order(c->size),
+ flags, node);
+ }
if (c->ctor)
c->ctor(b);
@@ -609,6 +632,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
} else {
__kmem_cache_free(b, c->size);
}
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b);
}
EXPORT_SYMBOL(kmem_cache_free);
diff --git a/mm/slub.c b/mm/slub.c
index 0280eee6cf3..3525e7b21d1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/mempolicy.h>
@@ -1623,18 +1624,46 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
- return slab_alloc(s, gfpflags, -1, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+ s->objsize, s->size, gfpflags);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return slab_alloc(s, gfpflags, -1, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
- return slab_alloc(s, gfpflags, node, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+ s->objsize, s->size, gfpflags, node);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
#endif
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node)
+{
+ return slab_alloc(s, gfpflags, node, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
/*
* Slow patch handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
@@ -1742,6 +1771,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
page = virt_to_head_page(x);
slab_free(s, page, x, _RET_IP_);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -2657,6 +2688,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > PAGE_SIZE))
return kmalloc_large(size, flags);
@@ -2666,7 +2698,12 @@ void *__kmalloc(size_t size, gfp_t flags)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, -1, _RET_IP_);
+ ret = slab_alloc(s, flags, -1, _RET_IP_);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+ size, s->size, flags);
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc);
@@ -2685,16 +2722,30 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *s;
+ void *ret;
- if (unlikely(size > PAGE_SIZE))
- return kmalloc_large_node(size, flags, node);
+ if (unlikely(size > PAGE_SIZE)) {
+ ret = kmalloc_large_node(size, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _RET_IP_, ret,
+ size, PAGE_SIZE << get_order(size),
+ flags, node);
+
+ return ret;
+ }
s = get_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, node, _RET_IP_);
+ ret = slab_alloc(s, flags, node, _RET_IP_);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+ size, s->size, flags, node);
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc_node);
#endif
@@ -2753,6 +2804,8 @@ void kfree(const void *x)
return;
}
slab_free(page->slab, page, object, _RET_IP_);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
}
EXPORT_SYMBOL(kfree);
@@ -3222,6 +3275,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > PAGE_SIZE))
return kmalloc_large(size, gfpflags);
@@ -3231,13 +3285,20 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, gfpflags, -1, caller);
+ ret = slab_alloc(s, gfpflags, -1, caller);
+
+ /* Honor the call site pointer we recieved. */
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
+ s->size, gfpflags);
+
+ return ret;
}
void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
int node, unsigned long caller)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > PAGE_SIZE))
return kmalloc_large_node(size, gfpflags, node);
@@ -3247,7 +3308,13 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, gfpflags, node, caller);
+ ret = slab_alloc(s, gfpflags, node, caller);
+
+ /* Honor the call site pointer we recieved. */
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
+ size, s->size, gfpflags, node);
+
+ return ret;
}
#ifdef CONFIG_SLUB_DEBUG
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 75f49d312e8..4dd2636d0b9 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1106,6 +1106,14 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
}
EXPORT_SYMBOL_GPL(__get_vm_area);
+struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
+ unsigned long start, unsigned long end,
+ void *caller)
+{
+ return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
+ caller);
+}
+
/**
* get_vm_area - reserve a contiguous kernel virtual area
* @size: size of the area
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index c7de8b39fcf..39a9642927d 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -112,13 +112,13 @@ endif
# ---------------------------------------------------------------------------
# Default is built-in, unless we know otherwise
-modkern_cflags := $(CFLAGS_KERNEL)
+modkern_cflags = $(if $(part-of-module), $(CFLAGS_MODULE), $(CFLAGS_KERNEL))
quiet_modtag := $(empty) $(empty)
-$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE)
-$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE)
-$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE)
-$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE)
+$(real-objs-m) : part-of-module := y
+$(real-objs-m:.o=.i) : part-of-module := y
+$(real-objs-m:.o=.s) : part-of-module := y
+$(real-objs-m:.o=.lst): part-of-module := y
$(real-objs-m) : quiet_modtag := [M]
$(real-objs-m:.o=.i) : quiet_modtag := [M]
@@ -205,7 +205,8 @@ endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
"$(if $(CONFIG_64BIT),64,32)" \
- "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
+ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
+ "$(if $(part-of-module),1,0)" "$(@)";
endif
define rule_cc_o_c
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index b0246307aac..12caa822a23 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -51,7 +51,7 @@ my %pidctr;
while (<>) {
my $line = $_;
- if ($line =~ /([0-9\.]+)\] calling ([a-zA-Z0-9\_]+)\+/) {
+ if ($line =~ /([0-9\.]+)\] calling ([a-zA-Z0-9\_\.]+)\+/) {
my $func = $2;
if ($done == 0) {
$start{$func} = $1;
@@ -87,7 +87,7 @@ while (<>) {
$count = $count + 1;
}
- if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z0-9\_]+)\+.*returned/) {
+ if ($line =~ /([0-9\.]+)\] initcall ([a-zA-Z0-9\_\.]+)\+.*returned/) {
if ($done == 0) {
$end{$2} = $1;
$maxtime = $1;
diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl
index d40449cafa8..528492bcba5 100644
--- a/scripts/markup_oops.pl
+++ b/scripts/markup_oops.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/perl
use File::Basename;
@@ -29,27 +29,151 @@ my $filename = $vmlinux_name;
my $target = "0";
my $function;
my $module = "";
-my $func_offset;
+my $func_offset = 0;
my $vmaoffset = 0;
+my %regs;
+
+
+sub parse_x86_regs
+{
+ my ($line) = @_;
+ if ($line =~ /EAX: ([0-9a-f]+) EBX: ([0-9a-f]+) ECX: ([0-9a-f]+) EDX: ([0-9a-f]+)/) {
+ $regs{"%eax"} = $1;
+ $regs{"%ebx"} = $2;
+ $regs{"%ecx"} = $3;
+ $regs{"%edx"} = $4;
+ }
+ if ($line =~ /ESI: ([0-9a-f]+) EDI: ([0-9a-f]+) EBP: ([0-9a-f]+) ESP: ([0-9a-f]+)/) {
+ $regs{"%esi"} = $1;
+ $regs{"%edi"} = $2;
+ $regs{"%esp"} = $4;
+ }
+ if ($line =~ /RAX: ([0-9a-f]+) RBX: ([0-9a-f]+) RCX: ([0-9a-f]+)/) {
+ $regs{"%eax"} = $1;
+ $regs{"%ebx"} = $2;
+ $regs{"%ecx"} = $3;
+ }
+ if ($line =~ /RDX: ([0-9a-f]+) RSI: ([0-9a-f]+) RDI: ([0-9a-f]+)/) {
+ $regs{"%edx"} = $1;
+ $regs{"%esi"} = $2;
+ $regs{"%edi"} = $3;
+ }
+ if ($line =~ /RBP: ([0-9a-f]+) R08: ([0-9a-f]+) R09: ([0-9a-f]+)/) {
+ $regs{"%r08"} = $2;
+ $regs{"%r09"} = $3;
+ }
+ if ($line =~ /R10: ([0-9a-f]+) R11: ([0-9a-f]+) R12: ([0-9a-f]+)/) {
+ $regs{"%r10"} = $1;
+ $regs{"%r11"} = $2;
+ $regs{"%r12"} = $3;
+ }
+ if ($line =~ /R13: ([0-9a-f]+) R14: ([0-9a-f]+) R15: ([0-9a-f]+)/) {
+ $regs{"%r13"} = $1;
+ $regs{"%r14"} = $2;
+ $regs{"%r15"} = $3;
+ }
+}
+
+sub reg_name
+{
+ my ($reg) = @_;
+ $reg =~ s/r(.)x/e\1x/;
+ $reg =~ s/r(.)i/e\1i/;
+ $reg =~ s/r(.)p/e\1p/;
+ return $reg;
+}
+
+sub process_x86_regs
+{
+ my ($line, $cntr) = @_;
+ my $str = "";
+ if (length($line) < 40) {
+ return ""; # not an asm istruction
+ }
+
+ # find the arguments to the instruction
+ if ($line =~ /([0-9a-zA-Z\,\%\(\)\-\+]+)$/) {
+ $lastword = $1;
+ } else {
+ return "";
+ }
+
+ # we need to find the registers that get clobbered,
+ # since their value is no longer relevant for previous
+ # instructions in the stream.
+
+ $clobber = $lastword;
+ # first, remove all memory operands, they're read only
+ $clobber =~ s/\([a-z0-9\%\,]+\)//g;
+ # then, remove everything before the comma, thats the read part
+ $clobber =~ s/.*\,//g;
+
+ # if this is the instruction that faulted, we haven't actually done
+ # the write yet... nothing is clobbered.
+ if ($cntr == 0) {
+ $clobber = "";
+ }
+
+ foreach $reg (keys(%regs)) {
+ my $clobberprime = reg_name($clobber);
+ my $lastwordprime = reg_name($lastword);
+ my $val = $regs{$reg};
+ if ($val =~ /^[0]+$/) {
+ $val = "0";
+ } else {
+ $val =~ s/^0*//;
+ }
+
+ # first check if we're clobbering this register; if we do
+ # we print it with a =>, and then delete its value
+ if ($clobber =~ /$reg/ || $clobberprime =~ /$reg/) {
+ if (length($val) > 0) {
+ $str = $str . " $reg => $val ";
+ }
+ $regs{$reg} = "";
+ $val = "";
+ }
+ # now check if we're reading this register
+ if ($lastword =~ /$reg/ || $lastwordprime =~ /$reg/) {
+ if (length($val) > 0) {
+ $str = $str . " $reg = $val ";
+ }
+ }
+ }
+ return $str;
+}
+
+# parse the oops
while (<STDIN>) {
my $line = $_;
if ($line =~ /EIP: 0060:\[\<([a-z0-9]+)\>\]/) {
$target = $1;
}
+ if ($line =~ /RIP: 0010:\[\<([a-z0-9]+)\>\]/) {
+ $target = $1;
+ }
if ($line =~ /EIP is at ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]/) {
$function = $1;
$func_offset = $2;
}
+ if ($line =~ /RIP: 0010:\[\<[0-9a-f]+\>\] \[\<[0-9a-f]+\>\] ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]/) {
+ $function = $1;
+ $func_offset = $2;
+ }
# check if it's a module
if ($line =~ /EIP is at ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]+\W\[([a-zA-Z0-9\_\-]+)\]/) {
$module = $3;
}
+ if ($line =~ /RIP: 0010:\[\<[0-9a-f]+\>\] \[\<[0-9a-f]+\>\] ([a-zA-Z0-9\_]+)\+(0x[0-9a-f]+)\/0x[a-f0-9]+\W\[([a-zA-Z0-9\_\-]+)\]/) {
+ $module = $3;
+ }
+ parse_x86_regs($line);
}
my $decodestart = hex($target) - hex($func_offset);
-my $decodestop = $decodestart + 8192;
+my $decodestop = hex($target) + 8192;
if ($target eq "0") {
print "No oops found!\n";
print "Usage: \n";
@@ -84,6 +208,7 @@ my $counter = 0;
my $state = 0;
my $center = 0;
my @lines;
+my @reglines;
sub InRange {
my ($address, $target) = @_;
@@ -188,16 +313,36 @@ while ($finish < $counter) {
my $i;
-my $fulltext = "";
+
+# start annotating the registers in the asm.
+# this goes from the oopsing point back, so that the annotator
+# can track (opportunistically) which registers got written and
+# whos value no longer is relevant.
+
+$i = $center;
+while ($i >= $start) {
+ $reglines[$i] = process_x86_regs($lines[$i], $center - $i);
+ $i = $i - 1;
+}
+
$i = $start;
while ($i < $finish) {
+ my $line;
if ($i == $center) {
- $fulltext = $fulltext . "*$lines[$i] <----- faulting instruction\n";
+ $line = "*$lines[$i] ";
} else {
- $fulltext = $fulltext . " $lines[$i]\n";
+ $line = " $lines[$i] ";
+ }
+ print $line;
+ if (defined($reglines[$i]) && length($reglines[$i]) > 0) {
+ my $c = 60 - length($line);
+ while ($c > 0) { print " "; $c = $c - 1; };
+ print "| $reglines[$i]";
}
+ if ($i == $center) {
+ print "<--- faulting instruction";
+ }
+ print "\n";
$i = $i +1;
}
-print $fulltext;
-
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 491b8b1b6ab..4eea60b1693 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -210,6 +210,7 @@ static void do_usb_table(void *symval, unsigned long size,
static int do_hid_entry(const char *filename,
struct hid_device_id *id, char *alias)
{
+ id->bus = TO_NATIVE(id->bus);
id->vendor = TO_NATIVE(id->vendor);
id->product = TO_NATIVE(id->product);
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 2500886fb90..ee448cdc6a2 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -86,6 +86,14 @@ echo "%endif"
echo 'cp System.map $RPM_BUILD_ROOT'"/boot/System.map-$KERNELRELEASE"
echo 'cp .config $RPM_BUILD_ROOT'"/boot/config-$KERNELRELEASE"
+
+echo "%ifnarch ppc64"
+echo 'cp vmlinux vmlinux.orig'
+echo 'bzip2 -9 vmlinux'
+echo 'mv vmlinux.bz2 $RPM_BUILD_ROOT'"/boot/vmlinux-$KERNELRELEASE.bz2"
+echo 'mv vmlinux.orig vmlinux'
+echo "%endif"
+
echo ""
echo "%clean"
echo '#echo -rf $RPM_BUILD_ROOT'
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index fe831412bea..409596eca12 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -100,14 +100,19 @@ $P =~ s@.*/@@g;
my $V = '0.1';
-if ($#ARGV < 6) {
- print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n";
+if ($#ARGV < 7) {
+ print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
print "version: $V\n";
exit(1);
}
my ($arch, $bits, $objdump, $objcopy, $cc,
- $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
+ $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV;
+
+# This file refers to mcount and shouldn't be ftraced, so lets' ignore it
+if ($inputfile eq "kernel/trace/ftrace.o") {
+ exit(0);
+}
# Acceptable sections to record.
my %text_sections = (
@@ -201,6 +206,13 @@ if ($arch eq "x86_64") {
$alignment = 2;
$section_type = '%progbits';
+} elsif ($arch eq "ia64") {
+ $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+ $type = "data8";
+
+ if ($is_module eq "0") {
+ $cc .= " -mconstant-gp";
+ }
} else {
die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
}
@@ -263,7 +275,6 @@ if (!$found_version) {
"\tDisabling local function references.\n";
}
-
#
# Step 1: find all the local (static functions) and weak symbols.
# 't' is local, 'w/W' is weak (we never use a weak function)
@@ -331,13 +342,16 @@ sub update_funcs
#
# Step 2: find the sections and mcount call sites
#
-open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
+open(IN, "$objdump -hdr $inputfile|") || die "error running $objdump";
my $text;
+my $read_headers = 1;
+
while (<IN>) {
# is it a section?
if (/$section_regex/) {
+ $read_headers = 0;
# Only record text sections that we know are safe
if (defined($text_sections{$1})) {
@@ -371,6 +385,19 @@ while (<IN>) {
$ref_func = $text;
}
}
+ } elsif ($read_headers && /$mcount_section/) {
+ #
+ # Somehow the make process can execute this script on an
+ # object twice. If it does, we would duplicate the mcount
+ # section and it will cause the function tracer self test
+ # to fail. Check if the mcount section exists, and if it does,
+ # warn and exit.
+ #
+ print STDERR "ERROR: $mcount_section already in $inputfile\n" .
+ "\tThis may be an indication that your build is corrupted.\n" .
+ "\tDelete $inputfile and try again. If the same object file\n" .
+ "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n";
+ exit(-1);
}
# is this a call site to mcount? If so, record it to print later
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index f6946cf99ce..f1c4b35bc32 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -58,14 +58,7 @@ fi
# Check for svn and a svn repo.
if rev=`svn info 2>/dev/null | grep '^Last Changed Rev'`; then
rev=`echo $rev | awk '{print $NF}'`
- changes=`svn status 2>/dev/null | grep '^[AMD]' | wc -l`
-
- # Are there uncommitted changes?
- if [ $changes != 0 ]; then
- printf -- '-svn%s%s' "$rev" -dirty
- else
- printf -- '-svn%s' "$rev"
- fi
+ printf -- '-svn%s' "$rev"
# All done with svn
exit
diff --git a/scripts/tags.sh b/scripts/tags.sh
index fdbe78bb5e2..5bd8b1003d4 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -76,7 +76,10 @@ all_sources()
all_kconfigs()
{
- find_sources $ALLSOURCE_ARCHS 'Kconfig*'
+ for arch in $ALLSOURCE_ARCHS; do
+ find_sources $arch 'Kconfig*'
+ done
+ find_other_sources 'Kconfig*'
}
all_defconfigs()
@@ -99,7 +102,8 @@ exuberant()
-I ____cacheline_internodealigned_in_smp \
-I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \
--extra=+f --c-kinds=+px \
- --regex-asm='/^ENTRY\(([^)]*)\).*/\1/'
+ --regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \
+ --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/'
all_kconfigs | xargs $1 -a \
--langdef=kconfig --language-force=kconfig \
@@ -117,7 +121,9 @@ exuberant()
emacs()
{
- all_sources | xargs $1 -a
+ all_sources | xargs $1 -a \
+ --regex='/^ENTRY(\([^)]*\)).*/\1/' \
+ --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/'
all_kconfigs | xargs $1 -a \
--regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/'
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e9693a29d00..4c403750360 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -73,14 +73,13 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
{
int i, r = 0;
- down_read(&kvm->slots_lock);
for (i = 0; i < kvm->nmemslots; i++) {
r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
if (r)
break;
}
- up_read(&kvm->slots_lock);
+
return r;
}
@@ -190,12 +189,11 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
int i;
- down_read(&kvm->slots_lock);
+
for (i = 0; i < kvm->nmemslots; i++) {
kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
}
- up_read(&kvm->slots_lock);
return 0;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3a5a08298aa..29a667ce35b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -173,7 +173,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
assigned_dev->host_irq_disabled = false;
}
mutex_unlock(&assigned_dev->kvm->lock);
- kvm_put_kvm(assigned_dev->kvm);
}
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -181,8 +180,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
- kvm_get_kvm(assigned_dev->kvm);
-
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
@@ -213,6 +210,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
}
}
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
static void kvm_free_assigned_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
@@ -228,11 +226,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
if (!assigned_dev->irq_requested_type)
return;
- if (cancel_work_sync(&assigned_dev->interrupt_work))
- /* We had pending work. That means we will have to take
- * care of kvm_put_kvm.
- */
- kvm_put_kvm(kvm);
+ /*
+ * In kvm_free_device_irq, cancel_work_sync return true if:
+ * 1. work is scheduled, and then cancelled.
+ * 2. work callback is executed.
+ *
+ * The first one ensured that the irq is disabled and no more events
+ * would happen. But for the second one, the irq may be enabled (e.g.
+ * for MSI). So we disable irq here to prevent further events.
+ *
+ * Notice this maybe result in nested disable if the interrupt type is
+ * INTx, but it's OK for we are going to free it.
+ *
+ * If this function is a part of VM destroy, please ensure that till
+ * now, the kvm state is still legal for probably we also have to wait
+ * interrupt_work done.
+ */
+ disable_irq_nosync(assigned_dev->host_irq);
+ cancel_work_sync(&assigned_dev->interrupt_work);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
@@ -285,8 +296,8 @@ static int assigned_device_update_intx(struct kvm *kvm,
if (irqchip_in_kernel(kvm)) {
if (!msi2intx &&
- adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
- free_irq(adev->host_irq, (void *)kvm);
+ (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
+ free_irq(adev->host_irq, (void *)adev);
pci_disable_msi(adev->dev);
}
@@ -455,6 +466,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
+ down_read(&kvm->slots_lock);
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
@@ -516,6 +528,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
out:
mutex_unlock(&kvm->lock);
+ up_read(&kvm->slots_lock);
return r;
out_list_del:
list_del(&match->list);
@@ -527,6 +540,7 @@ out_put:
out_free:
kfree(match);
mutex_unlock(&kvm->lock);
+ up_read(&kvm->slots_lock);
return r;
}
#endif
@@ -789,11 +803,19 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
return young;
}
+static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ kvm_arch_flush_shadow(kvm);
+}
+
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_page = kvm_mmu_notifier_invalidate_page,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
+ .release = kvm_mmu_notifier_release,
};
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
@@ -883,6 +905,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
{
struct mm_struct *mm = kvm->mm;
+ kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);