aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--Documentation/sysrq.txt2
-rw-r--r--Documentation/vm/kmemtrace.txt126
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--drivers/char/sysrq.c2
-rw-r--r--include/linux/slab_def.h68
-rw-r--r--include/linux/slob_def.h9
-rw-r--r--include/linux/slub_def.h53
-rw-r--r--include/trace/kmemtrace.h75
-rw-r--r--init/main.c2
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/trace/Kconfig21
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/kmemtrace.c345
-rw-r--r--kernel/trace/trace.h25
-rw-r--r--kernel/trace/trace_functions_graph.c6
-rw-r--r--mm/slab.c71
-rw-r--r--mm/slob.c37
-rw-r--r--mm/slub.c83
21 files changed, 978 insertions, 42 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644
index 00000000000..5e6a92a02d8
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -0,0 +1,71 @@
+What: /sys/kernel/debug/kmemtrace/
+Date: July 2008
+Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
+Description:
+
+In kmemtrace-enabled kernels, the following files are created:
+
+/sys/kernel/debug/kmemtrace/
+ cpu<n> (0400) Per-CPU tracing data, see below. (binary)
+ total_overruns (0400) Total number of bytes which were dropped from
+ cpu<n> files because of full buffer condition,
+ non-binary. (text)
+ abi_version (0400) Kernel's kmemtrace ABI version. (text)
+
+Each per-CPU file should be read according to the relay interface. That is,
+the reader should set affinity to that specific CPU and, as currently done by
+the userspace application (though there are other methods), use poll() with
+an infinite timeout before every read(). Otherwise, erroneous data may be
+read. The binary data has the following _core_ format:
+
+ Event ID (1 byte) Unsigned integer, one of:
+ 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
+ 1 - represents a freeing of previously allocated memory
+ (KMEMTRACE_EVENT_FREE)
+ Type ID (1 byte) Unsigned integer, one of:
+ 0 - this is a kmalloc() / kfree()
+ 1 - this is a kmem_cache_alloc() / kmem_cache_free()
+ 2 - this is a __get_free_pages() et al.
+ Event size (2 bytes) Unsigned integer representing the
+ size of this event. Used to extend
+ kmemtrace. Discard the bytes you
+ don't know about.
+ Sequence number (4 bytes) Signed integer used to reorder data
+ logged on SMP machines. Wraparound
+ must be taken into account, although
+ it is unlikely.
+ Caller address (8 bytes) Return address to the caller.
+ Pointer to mem (8 bytes) Pointer to target memory area. Can be
+ NULL, but not all such calls might be
+ recorded.
+
+In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
+
+ Requested bytes (8 bytes) Total number of requested bytes,
+ unsigned, must not be zero.
+ Allocated bytes (8 bytes) Total number of actually allocated
+ bytes, unsigned, must not be lower
+ than requested bytes.
+ Requested flags (4 bytes) GFP flags supplied by the caller.
+ Target CPU (4 bytes) Signed integer, valid for event id 1.
+ If equal to -1, target CPU is the same
+ as origin CPU, but the reverse might
+ not be true.
+
+The data is made available in the same endianness the machine has.
+
+Other event ids and type ids may be defined and added. Other fields may be
+added by increasing event size, but see below for details.
+Every modification to the ABI, including new id definitions, are followed
+by bumping the ABI version by one.
+
+Adding new data to the packet (features) is done at the end of the mandatory
+data:
+ Feature size (2 byte)
+ Feature ID (1 byte)
+ Feature data (Feature size - 3 bytes)
+
+
+Users:
+ kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
+
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a2d8805c03d..af600c0fe0e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -49,6 +49,7 @@ parameter is applicable:
ISAPNP ISA PnP code is enabled.
ISDN Appropriate ISDN support is enabled.
JOY Appropriate joystick support is enabled.
+ KMEMTRACE kmemtrace is enabled.
LIBATA Libata driver is enabled
LP Printer support is enabled.
LOOP Loopback device support is enabled.
@@ -1033,6 +1034,15 @@ and is between 256 and 4096 characters. It is defined in the file
use the HighMem zone if it exists, and the Normal
zone if it does not.
+ kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
+ Controls whether kmemtrace is enabled
+ at boot-time.
+
+ kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
+ subbufs kmemtrace's relay channel has. Set this
+ higher than default (KMEMTRACE_N_SUBBUFS in code) if
+ you experience buffer overruns.
+
movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
is similar to kernelcore except it specifies the
amount of memory used for migratable allocations.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 10a0263ebb3..56b53e005d1 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -114,6 +114,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
'x' - Used by xmon interface on ppc/powerpc platforms.
+'z' - Dump the ftrace buffer
+
'0'-'9' - Sets the console log level, controlling which kernel messages
will be printed to your console. ('0', for example would make
it so that only emergency messages like PANICs or OOPSes would
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
new file mode 100644
index 00000000000..a956d9b7f94
--- /dev/null
+++ b/Documentation/vm/kmemtrace.txt
@@ -0,0 +1,126 @@
+ kmemtrace - Kernel Memory Tracer
+
+ by Eduard - Gabriel Munteanu
+ <eduard.munteanu@linux360.ro>
+
+I. Introduction
+===============
+
+kmemtrace helps kernel developers figure out two things:
+1) how different allocators (SLAB, SLUB etc.) perform
+2) how kernel code allocates memory and how much
+
+To do this, we trace every allocation and export information to the userspace
+through the relay interface. We export things such as the number of requested
+bytes, the number of bytes actually allocated (i.e. including internal
+fragmentation), whether this is a slab allocation or a plain kmalloc() and so
+on.
+
+The actual analysis is performed by a userspace tool (see section III for
+details on where to get it from). It logs the data exported by the kernel,
+processes it and (as of writing this) can provide the following information:
+- the total amount of memory allocated and fragmentation per call-site
+- the amount of memory allocated and fragmentation per allocation
+- total memory allocated and fragmentation in the collected dataset
+- number of cross-CPU allocation and frees (makes sense in NUMA environments)
+
+Moreover, it can potentially find inconsistent and erroneous behavior in
+kernel code, such as using slab free functions on kmalloc'ed memory or
+allocating less memory than requested (but not truly failed allocations).
+
+kmemtrace also makes provisions for tracing on some arch and analysing the
+data on another.
+
+II. Design and goals
+====================
+
+kmemtrace was designed to handle rather large amounts of data. Thus, it uses
+the relay interface to export whatever is logged to userspace, which then
+stores it. Analysis and reporting is done asynchronously, that is, after the
+data is collected and stored. By design, it allows one to log and analyse
+on different machines and different arches.
+
+As of writing this, the ABI is not considered stable, though it might not
+change much. However, no guarantees are made about compatibility yet. When
+deemed stable, the ABI should still allow easy extension while maintaining
+backward compatibility. This is described further in Documentation/ABI.
+
+Summary of design goals:
+ - allow logging and analysis to be done across different machines
+ - be fast and anticipate usage in high-load environments (*)
+ - be reasonably extensible
+ - make it possible for GNU/Linux distributions to have kmemtrace
+ included in their repositories
+
+(*) - one of the reasons Pekka Enberg's original userspace data analysis
+ tool's code was rewritten from Perl to C (although this is more than a
+ simple conversion)
+
+
+III. Quick usage guide
+======================
+
+1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
+CONFIG_KMEMTRACE).
+
+2) Get the userspace tool and build it:
+$ git-clone git://repo.or.cz/kmemtrace-user.git # current repository
+$ cd kmemtrace-user/
+$ ./autogen.sh
+$ ./configure
+$ make
+
+3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
+'single' runlevel (so that relay buffers don't fill up easily), and run
+kmemtrace:
+# '$' does not mean user, but root here.
+$ mount -t debugfs none /sys/kernel/debug
+$ mount -t proc none /proc
+$ cd path/to/kmemtrace-user/
+$ ./kmemtraced
+Wait a bit, then stop it with CTRL+C.
+$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
+ # overrun, should
+ # be zero.
+$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
+ check its correctness]
+$ ./kmemtrace-report
+
+Now you should have a nice and short summary of how the allocator performs.
+
+IV. FAQ and known issues
+========================
+
+Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
+this? Should I worry?
+A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
+large the number is. You can fix it by supplying a higher
+'kmemtrace.subbufs=N' kernel parameter.
+---
+
+Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
+A: This is a bug and should be reported. It can occur for a variety of
+reasons:
+ - possible bugs in relay code
+ - possible misuse of relay by kmemtrace
+ - timestamps being collected unorderly
+Or you may fix it yourself and send us a patch.
+---
+
+Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
+A: This is a known issue and I'm working on it. These might be true errors
+in kernel code, which may have inconsistent behavior (e.g. allocating memory
+with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
+out this behavior may work with SLAB, but may fail with other allocators.
+
+It may also be due to lack of tracing in some unusual allocator functions.
+
+We don't want bug reports regarding this issue yet.
+---
+
+V. See also
+===========
+
+Documentation/kernel-parameters.txt
+Documentation/ABI/testing/debugfs-kmemtrace
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 141aff67bd6..ec2fd64d227 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2572,6 +2572,12 @@ M: jason.wessel@windriver.com
L: kgdb-bugreport@lists.sourceforge.net
S: Maintained
+KMEMTRACE
+P: Eduard - Gabriel Munteanu
+M: eduard.munteanu@linux360.ro
+L: linux-kernel@vger.kernel.org
+S: Maintained
+
KPROBES
P: Ananth N Mavinakayanahalli
M: ananth@in.ibm.com
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b81125f0bde..c7da3683f4c 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -55,7 +55,8 @@ config KVM_AMD
config KVM_TRACE
bool "KVM trace support"
- depends on KVM && MARKERS && SYSFS
+ depends on KVM && SYSFS
+ select MARKERS
select RELAY
select DEBUG_FS
default n
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 94966edfb44..785a08ef9a1 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -283,7 +283,7 @@ static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
}
static struct sysrq_key_op sysrq_ftrace_dump_op = {
.handler = sysrq_ftrace_dump,
- .help_msg = "dumpZ-ftrace-buffer",
+ .help_msg = "dump-ftrace-buffer(Z)",
.action_msg = "Dump ftrace buffer",
.enable_mask = SYSRQ_ENABLE_DUMP,
};
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 39c3a5eb8eb..455f9affea9 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,6 +14,7 @@
#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
#include <linux/compiler.h>
+#include <trace/kmemtrace.h>
/* Size description struct for general caches. */
struct cache_sizes {
@@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[];
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);
-static inline void *kmalloc(size_t size, gfp_t flags)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
+extern size_t slab_buffer_size(struct kmem_cache *cachep);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
{
+ return kmem_cache_alloc(cachep, flags);
+}
+static inline size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+ return 0;
+}
+#endif
+
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
+{
+ struct kmem_cache *cachep;
+ void *ret;
+
if (__builtin_constant_p(size)) {
int i = 0;
@@ -50,10 +69,17 @@ static inline void *kmalloc(size_t size, gfp_t flags)
found:
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
- return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep,
- flags);
+ cachep = malloc_sizes[i].cs_dmacachep;
+ else
#endif
- return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags);
+ cachep = malloc_sizes[i].cs_cachep;
+
+ ret = kmem_cache_alloc_notrace(cachep, flags);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+ size, slab_buffer_size(cachep), flags);
+
+ return ret;
}
return __kmalloc(size, flags);
}
@@ -62,8 +88,25 @@ found:
extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid)
+{
+ return kmem_cache_alloc_node(cachep, flags, nodeid);
+}
+#endif
+
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
+ struct kmem_cache *cachep;
+ void *ret;
+
if (__builtin_constant_p(size)) {
int i = 0;
@@ -84,11 +127,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
found:
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
- return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep,
- flags, node);
+ cachep = malloc_sizes[i].cs_dmacachep;
+ else
#endif
- return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep,
- flags, node);
+ cachep = malloc_sizes[i].cs_cachep;
+
+ ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_,
+ ret, size, slab_buffer_size(cachep),
+ flags, node);
+
+ return ret;
}
return __kmalloc_node(size, flags, node);
}
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 59a3fa476ab..0ec00b39d00 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -3,14 +3,15 @@
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags)
{
return kmem_cache_alloc_node(cachep, flags, -1);
}
void *__kmalloc_node(size_t size, gfp_t flags, int node);
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
return __kmalloc_node(size, flags, node);
}
@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* kmalloc is the normal method of allocating memory
* in the kernel.
*/
-static inline void *kmalloc(size_t size, gfp_t flags)
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
return __kmalloc_node(size, flags, -1);
}
-static inline void *__kmalloc(size_t size, gfp_t flags)
+static __always_inline void *__kmalloc(size_t size, gfp_t flags)
{
return kmalloc(size, flags);
}
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b1aac..6b657f7dcb2 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
#include <linux/gfp.h>
#include <linux/workqueue.h>
#include <linux/kobject.h>
+#include <trace/kmemtrace.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
@@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return kmem_cache_alloc(s, gfpflags);
+}
+#endif
+
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
{
- return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+ unsigned int order = get_order(size);
+ void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+ size, PAGE_SIZE << order, flags);
+
+ return ret;
}
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
+ void *ret;
+
if (__builtin_constant_p(size)) {
if (size > PAGE_SIZE)
return kmalloc_large(size, flags);
@@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
if (!s)
return ZERO_SIZE_PTR;
- return kmem_cache_alloc(s, flags);
+ ret = kmem_cache_alloc_notrace(s, flags);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+ _THIS_IP_, ret,
+ size, s->size, flags);
+
+ return ret;
}
}
return __kmalloc(size, flags);
@@ -231,8 +256,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
void *__kmalloc_node(size_t size, gfp_t flags, int node);
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node)
+{
+ return kmem_cache_alloc_node(s, gfpflags, node);
+}
+#endif
+
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
+ void *ret;
+
if (__builtin_constant_p(size) &&
size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
struct kmem_cache *s = kmalloc_slab(size);
@@ -240,7 +281,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
if (!s)
return ZERO_SIZE_PTR;
- return kmem_cache_alloc_node(s, flags, node);
+ ret = kmem_cache_alloc_node_notrace(s, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _THIS_IP_, ret,
+ size, s->size, flags, node);
+
+ return ret;
}
return __kmalloc_node(size, flags, node);
}
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
new file mode 100644
index 00000000000..ad8b7857855
--- /dev/null
+++ b/include/trace/kmemtrace.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/marker.h>
+
+enum kmemtrace_type_id {
+ KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
+ KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
+ KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
+};
+
+#ifdef CONFIG_KMEMTRACE
+
+extern void kmemtrace_init(void);
+
+extern void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node);
+
+extern void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr);
+
+#else /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_init(void)
+{
+}
+
+static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+}
+
+static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr)
+{
+}
+
+#endif /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags)
+{
+ kmemtrace_mark_alloc_node(type_id, call_site, ptr,
+ bytes_req, bytes_alloc, gfp_flags, -1);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/init/main.c b/init/main.c
index cd168ebc592..72a521c0da0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
+#include <trace/kmemtrace.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
@@ -642,6 +643,7 @@ asmlinkage void __init start_kernel(void)
enable_debug_pagealloc();
cpu_hotplug_init();
kmem_cache_init();
+ kmemtrace_init();
debug_objects_mem_init();
idr_init_cache();
setup_per_cpu_pageset();
diff --git a/kernel/relay.c b/kernel/relay.c
index 09ac2008f77..d06450670c8 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -675,9 +675,7 @@ int relay_late_setup_files(struct rchan *chan,
*/
for_each_online_cpu(i) {
if (unlikely(!chan->buf[i])) {
- printk(KERN_ERR "relay_late_setup_files: CPU %u "
- "has no buffer, it must have!\n", i);
- BUG();
+ WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
err = -EINVAL;
break;
}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6fc3a..1c0b7504cab 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -264,6 +264,27 @@ config HW_BRANCH_TRACER
This tracer records all branches on the system in a circular
buffer giving access to the last N branches for each cpu.
+config KMEMTRACE
+ bool "Trace SLAB allocations"
+ select TRACING
+ help
+ kmemtrace provides tracing for slab allocator functions, such as
+ kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+ data is then fed to the userspace application in order to analyse
+ allocation hotspots, internal fragmentation and so on, making it
+ possible to see how well an allocator performs, as well as debug
+ and profile kernel code.
+
+ This requires an userspace application to use. See
+ Documentation/vm/kmemtrace.txt for more information.
+
+ Saying Y will make the kernel somewhat larger and slower. However,
+ if you disable kmemtrace at run-time or boot-time, the performance
+ impact is minimal (depending on the arch the kernel is built for).
+
+ If unsure, say N.
+
+
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a93653..513dc86b5df 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -33,5 +33,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
obj-$(CONFIG_POWER_TRACER) += trace_power.o
+obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
new file mode 100644
index 00000000000..2bfdcd32622
--- /dev/null
+++ b/kernel/trace/kmemtrace.c
@@ -0,0 +1,345 @@
+/*
+ * Memory allocator tracing
+ *
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+#include <linux/dcache.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+/* Select an alternative, minimalistic output than the original one */
+#define TRACE_KMEM_OPT_MINIMAL 0x1
+
+static struct tracer_opt kmem_opts[] = {
+ /* Default disable the minimalistic output */
+ { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
+ { }
+};
+
+static struct tracer_flags kmem_tracer_flags = {
+ .val = 0,
+ .opts = kmem_opts
+};
+
+
+static bool kmem_tracing_enabled __read_mostly;
+static struct trace_array *kmemtrace_array;
+
+static int kmem_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ kmemtrace_array = tr;
+
+ for_each_cpu_mask(cpu, cpu_possible_map)
+ tracing_reset(tr, cpu);
+
+ kmem_tracing_enabled = true;
+
+ return 0;
+}
+
+static void kmem_trace_reset(struct trace_array *tr)
+{
+ kmem_tracing_enabled = false;
+}
+
+static void kmemtrace_headers(struct seq_file *s)
+{
+ /* Don't need headers for the original kmemtrace output */
+ if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
+ return;
+
+ seq_printf(s, "#\n");
+ seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
+ " POINTER NODE CALLER\n");
+ seq_printf(s, "# FREE | | | | "
+ " | | | |\n");
+ seq_printf(s, "# |\n\n");
+}
+
+/*
+ * The two following functions give the original output from kmemtrace,
+ * or something close to....perhaps they need some missing things
+ */
+static enum print_line_t
+kmemtrace_print_alloc_original(struct trace_iterator *iter,
+ struct kmemtrace_alloc_entry *entry)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ /* Taken from the old linux/kmemtrace.h */
+ ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu "
+ "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
+ entry->type_id, entry->call_site, (unsigned long) entry->ptr,
+ (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc,
+ (unsigned long) entry->gfp_flags, entry->node);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+kmemtrace_print_free_original(struct trace_iterator *iter,
+ struct kmemtrace_free_entry *entry)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ /* Taken from the old linux/kmemtrace.h */
+ ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n",
+ entry->type_id, entry->call_site, (unsigned long) entry->ptr);
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+
+/* The two other following provide a more minimalistic output */
+static enum print_line_t
+kmemtrace_print_alloc_compress(struct trace_iterator *iter,
+ struct kmemtrace_alloc_entry *entry)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ /* Alloc entry */
+ ret = trace_seq_printf(s, " + ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Type */
+ switch (entry->type_id) {
+ case KMEMTRACE_TYPE_KMALLOC:
+ ret = trace_seq_printf(s, "K ");
+ break;
+ case KMEMTRACE_TYPE_CACHE:
+ ret = trace_seq_printf(s, "C ");
+ break;
+ case KMEMTRACE_TYPE_PAGES:
+ ret = trace_seq_printf(s, "P ");
+ break;
+ default:
+ ret = trace_seq_printf(s, "? ");
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Requested */
+ ret = trace_seq_printf(s, "%4d ", entry->bytes_req);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Allocated */
+ ret = trace_seq_printf(s, "%4d ", entry->bytes_alloc);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Flags
+ * TODO: would be better to see the name of the GFP flag names
+ */
+ ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Pointer to allocated */
+ ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Node */
+ ret = trace_seq_printf(s, "%4d ", entry->node);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Call site */
+ ret = seq_print_ip_sym(s, entry->call_site, 0);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!trace_seq_printf(s, "\n"))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+kmemtrace_print_free_compress(struct trace_iterator *iter,
+ struct kmemtrace_free_entry *entry)
+{
+ struct trace_seq *s = &iter->seq;
+ int ret;
+
+ /* Free entry */
+ ret = trace_seq_printf(s, " - ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Type */
+ switch (entry->type_id) {
+ case KMEMTRACE_TYPE_KMALLOC:
+ ret = trace_seq_printf(s, "K ");
+ break;
+ case KMEMTRACE_TYPE_CACHE:
+ ret = trace_seq_printf(s, "C ");
+ break;
+ case KMEMTRACE_TYPE_PAGES:
+ ret = trace_seq_printf(s, "P ");
+ break;
+ default:
+ ret = trace_seq_printf(s, "? ");
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Skip requested/allocated/flags */
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Pointer to allocated */
+ ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Skip node */
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Call site */
+ ret = seq_print_ip_sym(s, entry->call_site, 0);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ if (!trace_seq_printf(s, "\n"))
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+
+ switch (entry->type) {
+ case TRACE_KMEM_ALLOC: {
+ struct kmemtrace_alloc_entry *field;
+ trace_assign_type(field, entry);
+ if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
+ return kmemtrace_print_alloc_compress(iter, field);
+ else
+ return kmemtrace_print_alloc_original(iter, field);
+ }
+
+ case TRACE_KMEM_FREE: {
+ struct kmemtrace_free_entry *field;
+ trace_assign_type(field, entry);
+ if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
+ return kmemtrace_print_free_compress(iter, field);
+ else
+ return kmemtrace_print_free_original(iter, field);
+ }
+
+ default:
+ return TRACE_TYPE_UNHANDLED;
+ }
+}
+
+/* Trace allocations */
+void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+ struct ring_buffer_event *event;
+ struct kmemtrace_alloc_entry *entry;
+ struct trace_array *tr = kmemtrace_array;
+ unsigned long irq_flags;
+
+ if (!kmem_tracing_enabled)
+ return;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+
+ entry->ent.type = TRACE_KMEM_ALLOC;
+ entry->call_site = call_site;
+ entry->ptr = ptr;
+ entry->bytes_req = bytes_req;
+ entry->bytes_alloc = bytes_alloc;
+ entry->gfp_flags = gfp_flags;
+ entry->node = node;
+
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+}
+EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
+
+void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr)
+{
+ struct ring_buffer_event *event;
+ struct kmemtrace_free_entry *entry;
+ struct trace_array *tr = kmemtrace_array;
+ unsigned long irq_flags;
+
+ if (!kmem_tracing_enabled)
+ return;
+
+ event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+ &irq_flags);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, 0, 0);
+
+ entry->ent.type = TRACE_KMEM_FREE;
+ entry->type_id = type_id;
+ entry->call_site = call_site;
+ entry->ptr = ptr;
+
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+ trace_wake_up();
+}
+EXPORT_SYMBOL(kmemtrace_mark_free);
+
+static struct tracer kmem_tracer __read_mostly = {
+ .name = "kmemtrace",
+ .init = kmem_trace_init,
+ .reset = kmem_trace_reset,
+ .print_line = kmemtrace_print_line,
+ .print_header = kmemtrace_headers,
+ .flags = &kmem_tracer_flags
+};
+
+static int __init init_kmem_tracer(void)
+{
+ return register_tracer(&kmem_tracer);
+}
+
+device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381bfd9..742fe134927 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,6 +9,7 @@
#include <linux/mmiotrace.h>
#include <linux/ftrace.h>
#include <trace/boot.h>
+#include <trace/kmemtrace.h>
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -29,6 +30,8 @@ enum trace_type {
TRACE_GRAPH_ENT,
TRACE_USER_STACK,
TRACE_HW_BRANCHES,
+ TRACE_KMEM_ALLOC,
+ TRACE_KMEM_FREE,
TRACE_POWER,
__TRACE_LAST_TYPE
@@ -170,6 +173,24 @@ struct trace_power {
struct power_trace state_data;
};
+struct kmemtrace_alloc_entry {
+ struct trace_entry ent;
+ enum kmemtrace_type_id type_id;
+ unsigned long call_site;
+ const void *ptr;
+ size_t bytes_req;
+ size_t bytes_alloc;
+ gfp_t gfp_flags;
+ int node;
+};
+
+struct kmemtrace_free_entry {
+ struct trace_entry ent;
+ enum kmemtrace_type_id type_id;
+ unsigned long call_site;
+ const void *ptr;
+};
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
@@ -280,6 +301,10 @@ extern void __ftrace_bad_type(void);
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+ IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
+ TRACE_KMEM_ALLOC); \
+ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
+ TRACE_KMEM_FREE); \
__ftrace_bad_type(); \
} while (0)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 930c08e5b38..8516e4f09e1 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -592,6 +592,12 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
if (ent->flags & TRACE_FLAG_CONT)
trace_seq_print_cont(s, iter);
+ /* Strip ending newline */
+ if (s->buffer[s->len - 1] == '\n') {
+ s->buffer[s->len - 1] = '\0';
+ s->len--;
+ }
+
ret = trace_seq_printf(s, " */\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
diff --git a/mm/slab.c b/mm/slab.c
index ddc41f337d5..dae716b3291 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,6 +102,7 @@
#include <linux/cpu.h>
#include <linux/sysctl.h>
#include <linux/module.h>
+#include <trace/kmemtrace.h>
#include <linux/rcupdate.h>
#include <linux/string.h>
#include <linux/uaccess.h>
@@ -568,6 +569,14 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#endif
+#ifdef CONFIG_KMEMTRACE
+size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+ return cachep->buffer_size;
+}
+EXPORT_SYMBOL(slab_buffer_size);
+#endif
+
/*
* Do not go above this order unless 0 objects fit into the slab.
*/
@@ -3550,10 +3559,23 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- return __cache_alloc(cachep, flags, __builtin_return_address(0));
+ void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+ obj_size(cachep), cachep->buffer_size, flags);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+{
+ return __cache_alloc(cachep, flags, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
/**
* kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
* @cachep: the cache we're checking against
@@ -3598,23 +3620,47 @@ out:
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
- return __cache_alloc_node(cachep, flags, nodeid,
- __builtin_return_address(0));
+ void *ret = __cache_alloc_node(cachep, flags, nodeid,
+ __builtin_return_address(0));
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+ obj_size(cachep), cachep->buffer_size,
+ flags, nodeid);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid)
+{
+ return __cache_alloc_node(cachep, flags, nodeid,
+ __builtin_return_address(0));
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
{
struct kmem_cache *cachep;
+ void *ret;
cachep = kmem_find_general_cachep(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- return kmem_cache_alloc_node(cachep, flags, node);
+ ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ (unsigned long) caller, ret,
+ size, cachep->buffer_size, flags, node);
+
+ return ret;
}
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node,
@@ -3647,6 +3693,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
void *caller)
{
struct kmem_cache *cachep;
+ void *ret;
/* If you want to save a few bytes .text space: replace
* __ with kmem_.
@@ -3656,11 +3703,17 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
cachep = __find_general_cachep(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- return __cache_alloc(cachep, flags, caller);
+ ret = __cache_alloc(cachep, flags, caller);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+ (unsigned long) caller, ret,
+ size, cachep->buffer_size, flags);
+
+ return ret;
}
-#ifdef CONFIG_DEBUG_SLAB
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
void *__kmalloc(size_t size, gfp_t flags)
{
return __do_kmalloc(size, flags, __builtin_return_address(0));
@@ -3699,6 +3752,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
debug_check_no_obj_freed(objp, obj_size(cachep));
__cache_free(cachep, objp);
local_irq_restore(flags);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, objp);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3725,6 +3780,8 @@ void kfree(const void *objp)
debug_check_no_obj_freed(objp, obj_size(c));
__cache_free(c, (void *)objp);
local_irq_restore(flags);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, objp);
}
EXPORT_SYMBOL(kfree);
diff --git a/mm/slob.c b/mm/slob.c
index bf7e8fc3aed..4d1c0fc33b6 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,6 +65,7 @@
#include <linux/module.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
+#include <trace/kmemtrace.h>
#include <asm/atomic.h>
/*
@@ -463,27 +464,38 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
{
unsigned int *m;
int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+ void *ret;
if (size < PAGE_SIZE - align) {
if (!size)
return ZERO_SIZE_PTR;
m = slob_alloc(size + align, gfp, align, node);
+
if (!m)
return NULL;
*m = size;
- return (void *)m + align;
+ ret = (void *)m + align;
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _RET_IP_, ret,
+ size, size + align, gfp, node);
} else {
- void *ret;
+ unsigned int order = get_order(size);
- ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
+ ret = slob_new_page(gfp | __GFP_COMP, order, node);
if (ret) {
struct page *page;
page = virt_to_page(ret);
page->private = size;
}
- return ret;
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _RET_IP_, ret,
+ size, PAGE_SIZE << order, gfp, node);
}
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc_node);
@@ -501,6 +513,8 @@ void kfree(const void *block)
slob_free(m, *m + align);
} else
put_page(&sp->page);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, block);
}
EXPORT_SYMBOL(kfree);
@@ -569,10 +583,19 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
{
void *b;
- if (c->size < PAGE_SIZE)
+ if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node);
- else
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+ _RET_IP_, b, c->size,
+ SLOB_UNITS(c->size) * SLOB_UNIT,
+ flags, node);
+ } else {
b = slob_new_page(flags, get_order(c->size), node);
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE,
+ _RET_IP_, b, c->size,
+ PAGE_SIZE << get_order(c->size),
+ flags, node);
+ }
if (c->ctor)
c->ctor(b);
@@ -608,6 +631,8 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
} else {
__kmem_cache_free(b, c->size);
}
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, b);
}
EXPORT_SYMBOL(kmem_cache_free);
diff --git a/mm/slub.c b/mm/slub.c
index f0e2892fe40..509e96f411f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <trace/kmemtrace.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
#include <linux/mempolicy.h>
@@ -1623,18 +1624,46 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
- return slab_alloc(s, gfpflags, -1, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+ s->objsize, s->size, gfpflags);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return slab_alloc(s, gfpflags, -1, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+#endif
+
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
- return slab_alloc(s, gfpflags, node, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_CACHE, _RET_IP_, ret,
+ s->objsize, s->size, gfpflags, node);
+
+ return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
#endif
+#ifdef CONFIG_KMEMTRACE
+void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node)
+{
+ return slab_alloc(s, gfpflags, node, _RET_IP_);
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+#endif
+
/*
* Slow patch handling. This may still be called frequently since objects
* have a longer lifetime than the cpu slabs in most processing loads.
@@ -1742,6 +1771,8 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
page = virt_to_head_page(x);
slab_free(s, page, x, _RET_IP_);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_CACHE, _RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -2657,6 +2688,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > PAGE_SIZE))
return kmalloc_large(size, flags);
@@ -2666,7 +2698,12 @@ void *__kmalloc(size_t size, gfp_t flags)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, -1, _RET_IP_);
+ ret = slab_alloc(s, flags, -1, _RET_IP_);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+ size, s->size, flags);
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc);
@@ -2685,16 +2722,30 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *s;
+ void *ret;
- if (unlikely(size > PAGE_SIZE))
- return kmalloc_large_node(size, flags, node);
+ if (unlikely(size > PAGE_SIZE)) {
+ ret = kmalloc_large_node(size, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _RET_IP_, ret,
+ size, PAGE_SIZE << get_order(size),
+ flags, node);
+
+ return ret;
+ }
s = get_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, node, _RET_IP_);
+ ret = slab_alloc(s, flags, node, _RET_IP_);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, ret,
+ size, s->size, flags, node);
+
+ return ret;
}
EXPORT_SYMBOL(__kmalloc_node);
#endif
@@ -2752,6 +2803,8 @@ void kfree(const void *x)
return;
}
slab_free(page->slab, page, object, _RET_IP_);
+
+ kmemtrace_mark_free(KMEMTRACE_TYPE_KMALLOC, _RET_IP_, x);
}
EXPORT_SYMBOL(kfree);
@@ -3221,6 +3274,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > PAGE_SIZE))
return kmalloc_large(size, gfpflags);
@@ -3230,13 +3284,20 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, gfpflags, -1, caller);
+ ret = slab_alloc(s, gfpflags, -1, caller);
+
+ /* Honor the call site pointer we recieved. */
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, caller, ret, size,
+ s->size, gfpflags);
+
+ return ret;
}
void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
int node, unsigned long caller)
{
struct kmem_cache *s;
+ void *ret;
if (unlikely(size > PAGE_SIZE))
return kmalloc_large_node(size, gfpflags, node);
@@ -3246,7 +3307,13 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, gfpflags, node, caller);
+ ret = slab_alloc(s, gfpflags, node, caller);
+
+ /* Honor the call site pointer we recieved. */
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, caller, ret,
+ size, s->size, gfpflags, node);
+
+ return ret;
}
#ifdef CONFIG_SLUB_DEBUG