diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 9 | ||||
-rw-r--r-- | lib/Kconfig.debug | 85 | ||||
-rw-r--r-- | lib/Makefile | 11 | ||||
-rw-r--r-- | lib/bitrev.c | 58 | ||||
-rw-r--r-- | lib/bug.c | 163 | ||||
-rw-r--r-- | lib/cmdline.c | 35 | ||||
-rw-r--r-- | lib/crc32.c | 28 | ||||
-rw-r--r-- | lib/fault-inject.c | 336 | ||||
-rw-r--r-- | lib/idr.c | 4 | ||||
-rw-r--r-- | lib/iomap.c | 32 | ||||
-rw-r--r-- | lib/ioremap.c | 1 | ||||
-rw-r--r-- | lib/kobject.c | 53 | ||||
-rw-r--r-- | lib/kobject_uevent.c | 28 | ||||
-rw-r--r-- | lib/list_debug.c | 10 | ||||
-rw-r--r-- | lib/locking-selftest.c | 2 | ||||
-rw-r--r-- | lib/radix-tree.c | 333 | ||||
-rw-r--r-- | lib/random32.c | 1 | ||||
-rw-r--r-- | lib/reciprocal_div.c | 9 | ||||
-rw-r--r-- | lib/spinlock_debug.c | 4 | ||||
-rw-r--r-- | lib/textsearch.c | 2 |
20 files changed, 1030 insertions, 174 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 734ce95a93d..9b03581cdec 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -4,6 +4,9 @@ menu "Library routines" +config BITREVERSE + tristate + config CRC_CCITT tristate "CRC-CCITT functions" help @@ -23,6 +26,7 @@ config CRC16 config CRC32 tristate "CRC32 functions" default y + select BITREVERSE help This option is provided for the case where no in-kernel-tree modules require CRC32 functions, but a module built outside the @@ -97,4 +101,9 @@ config TEXTSEARCH_FSM config PLIST boolean +config IOMAP_COPY + boolean + depends on !UML + default y + endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d3679103a8e..818e4589f71 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1,6 +1,7 @@ config PRINTK_TIME bool "Show timing information on printks" + depends on PRINTK help Selecting this option causes timing information to be included in printk output. This allows you to measure @@ -46,6 +47,30 @@ config UNUSED_SYMBOLS you really need it, and what the merge plan to the mainline kernel for your module is. +config DEBUG_FS + bool "Debug Filesystem" + depends on SYSFS + help + debugfs is a virtual file system that kernel developers use to put + debugging files into. Enable this option to be able to read and + write to these files. + + If unsure, say N. + +config HEADERS_CHECK + bool "Run 'make headers_check' when building vmlinux" + depends on !UML + help + This option will extract the user-visible kernel headers whenever + building the kernel, and will run basic sanity checks on them to + ensure that exported files do not attempt to include files which + were not exported, etc. + + If you're making modifications to header files which are + relevant for userspace, say 'Y', and check the headers + exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in + your build tree), to make sure they're suitable. + config DEBUG_KERNEL bool "Kernel debugging" help @@ -284,7 +309,7 @@ config DEBUG_HIGHMEM config DEBUG_BUGVERBOSE bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED depends on BUG - depends on ARM || ARM26 || AVR32 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV || SUPERH + depends on ARM || ARM26 || AVR32 || M32R || M68K || SPARC32 || SPARC64 || FRV || SUPERH || GENERIC_BUG default !EMBEDDED help Say Y here to make BUG() panics output the file name and line number @@ -301,16 +326,6 @@ config DEBUG_INFO If unsure, say N. -config DEBUG_FS - bool "Debug Filesystem" - depends on SYSFS - help - debugfs is a virtual file system that kernel developers use to put - debugging files into. Enable this option to be able to read and - write to these files. - - If unsure, say N. - config DEBUG_VM bool "Debug VM" depends on DEBUG_KERNEL @@ -371,20 +386,6 @@ config FORCED_INLINING become the default in the future, until then this option is there to test gcc for this. -config HEADERS_CHECK - bool "Run 'make headers_check' when building vmlinux" - depends on !UML - help - This option will extract the user-visible kernel headers whenever - building the kernel, and will run basic sanity checks on them to - ensure that exported files do not attempt to include files which - were not exported, etc. - - If you're making modifications to header files which are - relevant for userspace, say 'Y', and check the headers - exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in - your build tree), to make sure they're suitable. - config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL @@ -401,6 +402,7 @@ config RCU_TORTURE_TEST config LKDTM tristate "Linux Kernel Dump Test Tool Module" + depends on DEBUG_KERNEL depends on KPROBES default n help @@ -412,3 +414,36 @@ config LKDTM Documentation on how to use the module can be found in drivers/misc/lkdtm.c + +config FAULT_INJECTION + bool "Fault-injection framework" + depends on DEBUG_KERNEL + depends on STACKTRACE + select FRAME_POINTER + help + Provide fault-injection framework. + For more details, see Documentation/fault-injection/. + +config FAILSLAB + bool "Fault-injection capability for kmalloc" + depends on FAULT_INJECTION + help + Provide fault-injection capability for kmalloc. + +config FAIL_PAGE_ALLOC + bool "Fault-injection capabilitiy for alloc_pages()" + depends on FAULT_INJECTION + help + Provide fault-injection capability for alloc_pages(). + +config FAIL_MAKE_REQUEST + bool "Fault-injection capability for disk IO" + depends on FAULT_INJECTION + help + Provide fault-injection capability for disk IO. + +config FAULT_INJECTION_DEBUG_FS + bool "Debugfs entries for fault-injection capabilities" + depends on FAULT_INJECTION && SYSFS && DEBUG_FS + help + Enable configuration of fault-injection capabilities via debugfs. diff --git a/lib/Makefile b/lib/Makefile index cf98fabaa54..77b4bad7d44 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -5,27 +5,28 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ - sha1.o irq_regs.o + sha1.o irq_regs.o reciprocal_div.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o kref.o kobject_uevent.o klist.o -obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o random32.o +obj-y += sort.o parser.o halfmd4.o debug_locks.o random32.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG CFLAGS_kobject_uevent.o += -DDEBUG endif +obj-$(CONFIG_IOMAP_COPY) += iomap_copy.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o -lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o +obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_PLIST) += plist.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o @@ -35,6 +36,7 @@ ifneq ($(CONFIG_HAVE_DEC_LOCK),y) lib-y += dec_and_lock.o endif +obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC32) += crc32.o @@ -54,6 +56,9 @@ obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o +obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o + +lib-$(CONFIG_GENERIC_BUG) += bug.o hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/bitrev.c b/lib/bitrev.c new file mode 100644 index 00000000000..989aff73f88 --- /dev/null +++ b/lib/bitrev.c @@ -0,0 +1,58 @@ +#include <linux/types.h> +#include <linux/module.h> +#include <linux/bitrev.h> + +MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); +MODULE_DESCRIPTION("Bit ordering reversal functions"); +MODULE_LICENSE("GPL"); + +const u8 byte_rev_table[256] = { + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +}; +EXPORT_SYMBOL_GPL(byte_rev_table); + +static __always_inline u16 bitrev16(u16 x) +{ + return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8); +} + +/** + * bitrev32 - reverse the order of bits in a u32 value + * @x: value to be bit-reversed + */ +u32 bitrev32(u32 x) +{ + return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16); +} +EXPORT_SYMBOL(bitrev32); diff --git a/lib/bug.c b/lib/bug.c new file mode 100644 index 00000000000..014b582c5c4 --- /dev/null +++ b/lib/bug.c @@ -0,0 +1,163 @@ +/* + Generic support for BUG() + + This respects the following config options: + + CONFIG_BUG - emit BUG traps. Nothing happens without this. + CONFIG_GENERIC_BUG - enable this code. + CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG + + CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable + (though they're generally always on). + + CONFIG_GENERIC_BUG is set by each architecture using this code. + + To use this, your architecture must: + + 1. Set up the config options: + - Enable CONFIG_GENERIC_BUG if CONFIG_BUG + + 2. Implement BUG (and optionally BUG_ON, WARN, WARN_ON) + - Define HAVE_ARCH_BUG + - Implement BUG() to generate a faulting instruction + - NOTE: struct bug_entry does not have "file" or "line" entries + when CONFIG_DEBUG_BUGVERBOSE is not enabled, so you must generate + the values accordingly. + + 3. Implement the trap + - In the illegal instruction trap handler (typically), verify + that the fault was in kernel mode, and call report_bug() + - report_bug() will return whether it was a false alarm, a warning, + or an actual bug. + - You must implement the is_valid_bugaddr(bugaddr) callback which + returns true if the eip is a real kernel address, and it points + to the expected BUG trap instruction. + + Jeremy Fitzhardinge <jeremy@goop.org> 2006 + */ +#include <linux/list.h> +#include <linux/module.h> +#include <linux/bug.h> + +extern const struct bug_entry __start___bug_table[], __stop___bug_table[]; + +#ifdef CONFIG_MODULES +static LIST_HEAD(module_bug_list); + +static const struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + struct module *mod; + + list_for_each_entry(mod, &module_bug_list, bug_list) { + const struct bug_entry *bug = mod->bug_table; + unsigned i; + + for (i = 0; i < mod->num_bugs; ++i, ++bug) + if (bugaddr == bug->bug_addr) + return bug; + } + return NULL; +} + +int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) +{ + char *secstrings; + unsigned int i; + + mod->bug_table = NULL; + mod->num_bugs = 0; + + /* Find the __bug_table section, if present */ + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (i = 1; i < hdr->e_shnum; i++) { + if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table")) + continue; + mod->bug_table = (void *) sechdrs[i].sh_addr; + mod->num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry); + break; + } + + /* + * Strictly speaking this should have a spinlock to protect against + * traversals, but since we only traverse on BUG()s, a spinlock + * could potentially lead to deadlock and thus be counter-productive. + */ + list_add(&mod->bug_list, &module_bug_list); + + return 0; +} + +void module_bug_cleanup(struct module *mod) +{ + list_del(&mod->bug_list); +} + +#else + +static inline const struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + return NULL; +} +#endif + +const struct bug_entry *find_bug(unsigned long bugaddr) +{ + const struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug->bug_addr) + return bug; + + return module_find_bug(bugaddr); +} + +enum bug_trap_type report_bug(unsigned long bugaddr) +{ + const struct bug_entry *bug; + const char *file; + unsigned line, warning; + + if (!is_valid_bugaddr(bugaddr)) + return BUG_TRAP_TYPE_NONE; + + bug = find_bug(bugaddr); + + printk(KERN_EMERG "------------[ cut here ]------------\n"); + + file = NULL; + line = 0; + warning = 0; + + if (bug) { +#ifdef CONFIG_DEBUG_BUGVERBOSE + file = bug->file; + line = bug->line; +#endif + warning = (bug->flags & BUGFLAG_WARNING) != 0; + } + + if (warning) { + /* this is a WARN_ON rather than BUG/BUG_ON */ + if (file) + printk(KERN_ERR "Badness at %s:%u\n", + file, line); + else + printk(KERN_ERR "Badness at %p " + "[verbose debug info unavailable]\n", + (void *)bugaddr); + + dump_stack(); + return BUG_TRAP_TYPE_WARN; + } + + if (file) + printk(KERN_CRIT "kernel BUG at %s:%u!\n", + file, line); + else + printk(KERN_CRIT "Kernel BUG at %p " + "[verbose debug info unavailable]\n", + (void *)bugaddr); + + return BUG_TRAP_TYPE_BUG; +} diff --git a/lib/cmdline.c b/lib/cmdline.c index 0331ed825ea..8a5b5303bd4 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -16,6 +16,23 @@ #include <linux/kernel.h> #include <linux/string.h> +/* + * If a hyphen was found in get_option, this will handle the + * range of numbers, M-N. This will expand the range and insert + * the values[M, M+1, ..., N] into the ints array in get_options. + */ + +static int get_range(char **str, int *pint) +{ + int x, inc_counter, upper_range; + + (*str)++; + upper_range = simple_strtol((*str), NULL, 0); + inc_counter = upper_range - *pint; + for (x = *pint; x < upper_range; x++) + *pint++ = x; + return inc_counter; +} /** * get_option - Parse integer from an option string @@ -29,6 +46,7 @@ * 0 : no int in string * 1 : int found, no subsequent comma * 2 : int found including a subsequent comma + * 3 : hyphen found to denote a range */ int get_option (char **str, int *pint) @@ -44,6 +62,8 @@ int get_option (char **str, int *pint) (*str)++; return 2; } + if (**str == '-') + return 3; return 1; } @@ -55,7 +75,8 @@ int get_option (char **str, int *pint) * @ints: integer array * * This function parses a string containing a comma-separated - * list of integers. The parse halts when the array is + * list of integers, a hyphen-separated range of _positive_ integers, + * or a combination of both. The parse halts when the array is * full, or when no more numbers can be retrieved from the * string. * @@ -72,6 +93,18 @@ char *get_options(const char *str, int nints, int *ints) res = get_option ((char **)&str, ints + i); if (res == 0) break; + if (res == 3) { + int range_nums; + range_nums = get_range((char **)&str, ints + i); + if (range_nums < 0) + break; + /* + * Decrement the result by one to leave out the + * last number in the range. The next iteration + * will handle the upper number in the range + */ + i += (range_nums - 1); + } i++; if (res == 1) break; diff --git a/lib/crc32.c b/lib/crc32.c index 285fd9bc61b..bfc33314c72 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -235,23 +235,8 @@ u32 __attribute_pure__ crc32_be(u32 crc, unsigned char const *p, size_t len) } #endif -/** - * bitreverse - reverse the order of bits in a u32 value - * @x: value to be bit-reversed - */ -u32 bitreverse(u32 x) -{ - x = (x >> 16) | (x << 16); - x = (x >> 8 & 0x00ff00ff) | (x << 8 & 0xff00ff00); - x = (x >> 4 & 0x0f0f0f0f) | (x << 4 & 0xf0f0f0f0); - x = (x >> 2 & 0x33333333) | (x << 2 & 0xcccccccc); - x = (x >> 1 & 0x55555555) | (x << 1 & 0xaaaaaaaa); - return x; -} - EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(crc32_be); -EXPORT_SYMBOL(bitreverse); /* * A brief CRC tutorial. @@ -400,10 +385,7 @@ buf_dump(char const *prefix, unsigned char const *buf, size_t len) static void bytereverse(unsigned char *buf, size_t len) { while (len--) { - unsigned char x = *buf; - x = (x >> 4) | (x << 4); - x = (x >> 2 & 0x33) | (x << 2 & 0xcc); - x = (x >> 1 & 0x55) | (x << 1 & 0xaa); + unsigned char x = bitrev8(*buf); *buf++ = x; } } @@ -460,11 +442,11 @@ static u32 test_step(u32 init, unsigned char *buf, size_t len) /* Now swap it around for the other test */ bytereverse(buf, len + 4); - init = bitreverse(init); - crc2 = bitreverse(crc1); - if (crc1 != bitreverse(crc2)) + init = bitrev32(init); + crc2 = bitrev32(crc1); + if (crc1 != bitrev32(crc2)) printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n", - crc1, crc2, bitreverse(crc2)); + crc1, crc2, bitrev32(crc2)); crc1 = crc32_le(init, buf, len); if (crc1 != crc2) printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1, diff --git a/lib/fault-inject.c b/lib/fault-inject.c new file mode 100644 index 00000000000..d143c0faf24 --- /dev/null +++ b/lib/fault-inject.c @@ -0,0 +1,336 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/random.h> +#include <linux/stat.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/unwind.h> +#include <linux/stacktrace.h> +#include <linux/kallsyms.h> +#include <linux/fault-inject.h> + +/* + * setup_fault_attr() is a helper function for various __setup handlers, so it + * returns 0 on error, because that is what __setup handlers do. + */ +int __init setup_fault_attr(struct fault_attr *attr, char *str) +{ + unsigned long probability; + unsigned long interval; + int times; + int space; + + /* "<interval>,<probability>,<space>,<times>" */ + if (sscanf(str, "%lu,%lu,%d,%d", + &interval, &probability, &space, ×) < 4) { + printk(KERN_WARNING + "FAULT_INJECTION: failed to parse arguments\n"); + return 0; + } + + attr->probability = probability; + attr->interval = interval; + atomic_set(&attr->times, times); + atomic_set(&attr->space, space); + + return 1; +} + +static void fail_dump(struct fault_attr *attr) +{ + if (attr->verbose > 0) + printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure\n"); + if (attr->verbose > 1) + dump_stack(); +} + +#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) + +static bool fail_task(struct fault_attr *attr, struct task_struct *task) +{ + return !in_interrupt() && task->make_it_fail; +} + +#define MAX_STACK_TRACE_DEPTH 32 + +#ifdef CONFIG_STACK_UNWIND + +static asmlinkage int fail_stacktrace_callback(struct unwind_frame_info *info, + void *arg) +{ + int depth; + struct fault_attr *attr = arg; + bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX); + + for (depth = 0; depth < attr->stacktrace_depth + && unwind(info) == 0 && UNW_PC(info); depth++) { + if (arch_unw_user_mode(info)) + break; + if (attr->reject_start <= UNW_PC(info) && + UNW_PC(info) < attr->reject_end) + return false; + if (attr->require_start <= UNW_PC(info) && + UNW_PC(info) < attr->require_end) + found = true; + } + return found; +} + +static bool fail_stacktrace(struct fault_attr *attr) +{ + struct unwind_frame_info info; + + return unwind_init_running(&info, fail_stacktrace_callback, attr); +} + +#elif defined(CONFIG_STACKTRACE) + +static bool fail_stacktrace(struct fault_attr *attr) +{ + struct stack_trace trace; + int depth = attr->stacktrace_depth; + unsigned long entries[MAX_STACK_TRACE_DEPTH]; + int n; + bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX); + + if (depth == 0) + return found; + + trace.nr_entries = 0; + trace.entries = entries; + trace.max_entries = depth; + trace.skip = 1; + trace.all_contexts = 0; + + save_stack_trace(&trace, NULL); + for (n = 0; n < trace.nr_entries; n++) { + if (attr->reject_start <= entries[n] && + entries[n] < attr->reject_end) + return false; + if (attr->require_start <= entries[n] && + entries[n] < attr->require_end) + found = true; + } + return found; +} + +#else + +static inline bool fail_stacktrace(struct fault_attr *attr) +{ + static bool firsttime = true; + + if (firsttime) { + printk(KERN_WARNING + "This architecture does not implement save_stack_trace()\n"); + firsttime = false; + } + return false; +} + +#endif + +/* + * This code is stolen from failmalloc-1.0 + * http://www.nongnu.org/failmalloc/ + */ + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + if (attr->task_filter && !fail_task(attr, current)) + return false; + + if (atomic_read(&attr->times) == 0) + return false; + + if (atomic_read(&attr->space) > size) { + atomic_sub(size, &attr->space); + return false; + } + + if (attr->interval > 1) { + attr->count++; + if (attr->count % attr->interval) + return false; + } + + if (attr->probability <= random32() % 100) + return false; + + if (!fail_stacktrace(attr)) + return false; + + fail_dump(attr); + + if (atomic_read(&attr->times) != -1) + atomic_dec_not_zero(&attr->times); + + return true; +} + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + +static void debugfs_ul_set(void *data, u64 val) +{ + *(unsigned long *)data = val; +} + +static void debugfs_ul_set_MAX_STACK_TRACE_DEPTH(void *data, u64 val) +{ + *(unsigned long *)data = + val < MAX_STACK_TRACE_DEPTH ? + val : MAX_STACK_TRACE_DEPTH; +} + +static u64 debugfs_ul_get(void *data) +{ + return *(unsigned long *)data; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n"); + +static struct dentry *debugfs_create_ul(const char *name, mode_t mode, + struct dentry *parent, unsigned long *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_ul); +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_ul_MAX_STACK_TRACE_DEPTH, debugfs_ul_get, + debugfs_ul_set_MAX_STACK_TRACE_DEPTH, "%llu\n"); + +static struct dentry *debugfs_create_ul_MAX_STACK_TRACE_DEPTH( + const char *name, mode_t mode, + struct dentry *parent, unsigned long *value) +{ + return debugfs_create_file(name, mode, parent, value, + &fops_ul_MAX_STACK_TRACE_DEPTH); +} + +static void debugfs_atomic_t_set(void *data, u64 val) +{ + atomic_set((atomic_t *)data, val); +} + +static u64 debugfs_atomic_t_get(void *data) +{ + return atomic_read((atomic_t *)data); +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, + debugfs_atomic_t_set, "%lld\n"); + +static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode, + struct dentry *parent, atomic_t *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); +} + +void cleanup_fault_attr_dentries(struct fault_attr *attr) +{ + debugfs_remove(attr->dentries.probability_file); + attr->dentries.probability_file = NULL; + + debugfs_remove(attr->dentries.interval_file); + attr->dentries.interval_file = NULL; + + debugfs_remove(attr->dentries.times_file); + attr->dentries.times_file = NULL; + + debugfs_remove(attr->dentries.space_file); + attr->dentries.space_file = NULL; + + debugfs_remove(attr->dentries.verbose_file); + attr->dentries.verbose_file = NULL; + + debugfs_remove(attr->dentries.task_filter_file); + attr->dentries.task_filter_file = NULL; + + debugfs_remove(attr->dentries.stacktrace_depth_file); + attr->dentries.stacktrace_depth_file = NULL; + + debugfs_remove(attr->dentries.require_start_file); + attr->dentries.require_start_file = NULL; + + debugfs_remove(attr->dentries.require_end_file); + attr->dentries.require_end_file = NULL; + + debugfs_remove(attr->dentries.reject_start_file); + attr->dentries.reject_start_file = NULL; + + debugfs_remove(attr->dentries.reject_end_file); + attr->dentries.reject_end_file = NULL; + + if (attr->dentries.dir) + WARN_ON(!simple_empty(attr->dentries.dir)); + + debugfs_remove(attr->dentries.dir); + attr->dentries.dir = NULL; +} + +int init_fault_attr_dentries(struct fault_attr *attr, const char *name) +{ + mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + struct dentry *dir; + + memset(&attr->dentries, 0, sizeof(attr->dentries)); + + dir = debugfs_create_dir(name, NULL); + if (!dir) + goto fail; + attr->dentries.dir = dir; + + attr->dentries.probability_file = + debugfs_create_ul("probability", mode, dir, &attr->probability); + + attr->dentries.interval_file = + debugfs_create_ul("interval", mode, dir, &attr->interval); + + attr->dentries.times_file = + debugfs_create_atomic_t("times", mode, dir, &attr->times); + + attr->dentries.space_file = + debugfs_create_atomic_t("space", mode, dir, &attr->space); + + attr->dentries.verbose_file = + debugfs_create_ul("verbose", mode, dir, &attr->verbose); + + attr->dentries.task_filter_file = debugfs_create_bool("task-filter", + mode, dir, &attr->task_filter); + + attr->dentries.stacktrace_depth_file = + debugfs_create_ul_MAX_STACK_TRACE_DEPTH( + "stacktrace-depth", mode, dir, &attr->stacktrace_depth); + + attr->dentries.require_start_file = + debugfs_create_ul("require-start", mode, dir, &attr->require_start); + + attr->dentries.require_end_file = + debugfs_create_ul("require-end", mode, dir, &attr->require_end); + + attr->dentries.reject_start_file = + debugfs_create_ul("reject-start", mode, dir, &attr->reject_start); + + attr->dentries.reject_end_file = + debugfs_create_ul("reject-end", mode, dir, &attr->reject_end); + + + if (!attr->dentries.probability_file || !attr->dentries.interval_file + || !attr->dentries.times_file || !attr->dentries.space_file + || !attr->dentries.verbose_file || !attr->dentries.task_filter_file + || !attr->dentries.stacktrace_depth_file + || !attr->dentries.require_start_file + || !attr->dentries.require_end_file + || !attr->dentries.reject_start_file + || !attr->dentries.reject_end_file + ) + goto fail; + + return 0; +fail: + cleanup_fault_attr_dentries(attr); + return -ENOMEM; +} + +#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ diff --git a/lib/idr.c b/lib/idr.c index 16d2143fea4..71853531d3b 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -33,7 +33,7 @@ #include <linux/string.h> #include <linux/idr.h> -static kmem_cache_t *idr_layer_cache; +static struct kmem_cache *idr_layer_cache; static struct idr_layer *alloc_layer(struct idr *idp) { @@ -445,7 +445,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) } EXPORT_SYMBOL(idr_replace); -static void idr_cache_ctor(void * idr_layer, kmem_cache_t *idr_layer_cache, +static void idr_cache_ctor(void * idr_layer, struct kmem_cache *idr_layer_cache, unsigned long flags) { memset(idr_layer, 0, sizeof(struct idr_layer)); diff --git a/lib/iomap.c b/lib/iomap.c index 55689c5d337..d6ccdd85df5 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -50,6 +50,16 @@ } \ } while (0) +#ifndef pio_read16be +#define pio_read16be(port) swab16(inw(port)) +#define pio_read32be(port) swab32(inl(port)) +#endif + +#ifndef mmio_read16be +#define mmio_read16be(addr) be16_to_cpu(__raw_readw(addr)) +#define mmio_read32be(addr) be32_to_cpu(__raw_readl(addr)) +#endif + unsigned int fastcall ioread8(void __iomem *addr) { IO_COND(addr, return inb(port), return readb(addr)); @@ -60,7 +70,7 @@ unsigned int fastcall ioread16(void __iomem *addr) } unsigned int fastcall ioread16be(void __iomem *addr) { - IO_COND(addr, return inw(port), return be16_to_cpu(__raw_readw(addr))); + IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr)); } unsigned int fastcall ioread32(void __iomem *addr) { @@ -68,7 +78,7 @@ unsigned int fastcall ioread32(void __iomem *addr) } unsigned int fastcall ioread32be(void __iomem *addr) { - IO_COND(addr, return inl(port), return be32_to_cpu(__raw_readl(addr))); + IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr)); } EXPORT_SYMBOL(ioread8); EXPORT_SYMBOL(ioread16); @@ -76,6 +86,16 @@ EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); +#ifndef pio_write16be +#define pio_write16be(val,port) outw(swab16(val),port) +#define pio_write32be(val,port) outl(swab32(val),port) +#endif + +#ifndef mmio_write16be +#define mmio_write16be(val,port) __raw_writew(be16_to_cpu(val),port) +#define mmio_write32be(val,port) __raw_writel(be32_to_cpu(val),port) +#endif + void fastcall iowrite8(u8 val, void __iomem *addr) { IO_COND(addr, outb(val,port), writeb(val, addr)); @@ -86,7 +106,7 @@ void fastcall iowrite16(u16 val, void __iomem *addr) } void fastcall iowrite16be(u16 val, void __iomem *addr) { - IO_COND(addr, outw(val,port), __raw_writew(cpu_to_be16(val), addr)); + IO_COND(addr, pio_write16be(val,port), mmio_write16be(val, addr)); } void fastcall iowrite32(u32 val, void __iomem *addr) { @@ -94,7 +114,7 @@ void fastcall iowrite32(u32 val, void __iomem *addr) } void fastcall iowrite32be(u32 val, void __iomem *addr) { - IO_COND(addr, outl(val,port), __raw_writel(cpu_to_be32(val), addr)); + IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr)); } EXPORT_SYMBOL(iowrite8); EXPORT_SYMBOL(iowrite16); @@ -108,6 +128,7 @@ EXPORT_SYMBOL(iowrite32be); * convert to CPU byte order. We write in "IO byte * order" (we also don't have IO barriers). */ +#ifndef mmio_insb static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) { while (--count >= 0) { @@ -132,7 +153,9 @@ static inline void mmio_insl(void __iomem *addr, u32 *dst, int count) dst++; } } +#endif +#ifndef mmio_outsb static inline void mmio_outsb(void __iomem *addr, const u8 *src, int count) { while (--count >= 0) { @@ -154,6 +177,7 @@ static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) src++; } } +#endif void fastcall ioread8_rep(void __iomem *addr, void *dst, unsigned long count) { diff --git a/lib/ioremap.c b/lib/ioremap.c index 99fa277f9f7..a9e4415b02d 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -5,7 +5,6 @@ * * (C) Copyright 1995 1996 Linus Torvalds */ -#include <linux/io.h> #include <linux/vmalloc.h> #include <linux/mm.h> diff --git a/lib/kobject.c b/lib/kobject.c index 7dd5c0e9d99..7ce6dc138e9 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -111,10 +111,9 @@ char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) len = get_kobj_path_length(kobj); if (len == 0) return NULL; - path = kmalloc(len, gfp_mask); + path = kzalloc(len, gfp_mask); if (!path) return NULL; - memset(path, 0x00, len); fill_kobj_path(kobj, path, len); return path; @@ -311,6 +310,56 @@ int kobject_rename(struct kobject * kobj, const char *new_name) } /** + * kobject_move - move object to another parent + * @kobj: object in question. + * @new_parent: object's new parent + */ + +int kobject_move(struct kobject *kobj, struct kobject *new_parent) +{ + int error; + struct kobject *old_parent; + const char *devpath = NULL; + char *devpath_string = NULL; + char *envp[2]; + + kobj = kobject_get(kobj); + if (!kobj) + return -EINVAL; + new_parent = kobject_get(new_parent); + if (!new_parent) { + error = -EINVAL; + goto out; + } + /* old object path */ + devpath = kobject_get_path(kobj, GFP_KERNEL); + if (!devpath) { + error = -ENOMEM; + goto out; + } + devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL); + if (!devpath_string) { + error = -ENOMEM; + goto out; + } + sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); + envp[0] = devpath_string; + envp[1] = NULL; + error = sysfs_move_dir(kobj, new_parent); + if (error) + goto out; + old_parent = kobj->parent; + kobj->parent = new_parent; + kobject_put(old_parent); + kobject_uevent_env(kobj, KOBJ_MOVE, envp); +out: + kobject_put(kobj); + kfree(devpath_string); + kfree(devpath); + return error; +} + +/** * kobject_del - unlink kobject from hierarchy. * @kobj: object. */ diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 7f20e7b857c..a1922765ff3 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -50,18 +50,22 @@ static char *action_to_string(enum kobject_action action) return "offline"; case KOBJ_ONLINE: return "online"; + case KOBJ_MOVE: + return "move"; default: return NULL; } } /** - * kobject_uevent - notify userspace by ending an uevent + * kobject_uevent_env - send an uevent with environmental data * - * @action: action that is happening (usually KOBJ_ADD and KOBJ_REMOVE) + * @action: action that is happening (usually KOBJ_MOVE) * @kobj: struct kobject that the action is happening to + * @envp_ext: pointer to environmental data */ -void kobject_uevent(struct kobject *kobj, enum kobject_action action) +void kobject_uevent_env(struct kobject *kobj, enum kobject_action action, + char *envp_ext[]) { char **envp; char *buffer; @@ -76,6 +80,7 @@ void kobject_uevent(struct kobject *kobj, enum kobject_action action) char *seq_buff; int i = 0; int retval; + int j; pr_debug("%s\n", __FUNCTION__); @@ -134,7 +139,8 @@ void kobject_uevent(struct kobject *kobj, enum kobject_action action) scratch += sprintf (scratch, "DEVPATH=%s", devpath) + 1; envp [i++] = scratch; scratch += sprintf(scratch, "SUBSYSTEM=%s", subsystem) + 1; - + for (j = 0; envp_ext && envp_ext[j]; j++) + envp[i++] = envp_ext[j]; /* just reserve the space, overwrite it after kset call has returned */ envp[i++] = seq_buff = scratch; scratch += strlen("SEQNUM=18446744073709551616") + 1; @@ -200,6 +206,20 @@ exit: kfree(envp); return; } + +EXPORT_SYMBOL_GPL(kobject_uevent_env); + +/** + * kobject_uevent - notify userspace by ending an uevent + * + * @action: action that is happening (usually KOBJ_ADD and KOBJ_REMOVE) + * @kobj: struct kobject that the action is happening to + */ +void kobject_uevent(struct kobject *kobj, enum kobject_action action) +{ + kobject_uevent_env(kobj, action, NULL); +} + EXPORT_SYMBOL_GPL(kobject_uevent); /** diff --git a/lib/list_debug.c b/lib/list_debug.c index 7ba9d823d38..4350ba9655b 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -21,13 +21,15 @@ void __list_add(struct list_head *new, struct list_head *next) { if (unlikely(next->prev != prev)) { - printk(KERN_ERR "list_add corruption. next->prev should be %p, but was %p\n", - prev, next->prev); + printk(KERN_ERR "list_add corruption. next->prev should be " + "prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); BUG(); } if (unlikely(prev->next != next)) { - printk(KERN_ERR "list_add corruption. prev->next should be %p, but was %p\n", - next, prev->next); + printk(KERN_ERR "list_add corruption. prev->next should be " + "next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); BUG(); } next->prev = new; diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 7945787f439..280332c1827 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -963,7 +963,9 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) printk("failed|"); } else { unexpected_testcase_failures++; + printk("FAILED|"); + dump_stack(); } } else { testcase_successes++; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index aa9bfd0bdbd..d69ddbe4386 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2,6 +2,7 @@ * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig * Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com> + * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -30,6 +31,7 @@ #include <linux/gfp.h> #include <linux/string.h> #include <linux/bitops.h> +#include <linux/rcupdate.h> #ifdef __KERNEL__ @@ -45,7 +47,9 @@ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) struct radix_tree_node { + unsigned int height; /* Height from the bottom */ unsigned int count; + struct rcu_head rcu_head; void *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; @@ -63,7 +67,7 @@ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; /* * Radix tree node cache. */ -static kmem_cache_t *radix_tree_node_cachep; +static struct kmem_cache *radix_tree_node_cachep; /* * Per-cpu pool of preloaded nodes @@ -100,13 +104,21 @@ radix_tree_node_alloc(struct radix_tree_root *root) rtp->nr--; } } + BUG_ON(radix_tree_is_direct_ptr(ret)); return ret; } +static void radix_tree_node_rcu_free(struct rcu_head *head) +{ + struct radix_tree_node *node = + container_of(head, struct radix_tree_node, rcu_head); + kmem_cache_free(radix_tree_node_cachep, node); +} + static inline void radix_tree_node_free(struct radix_tree_node *node) { - kmem_cache_free(radix_tree_node_cachep, node); + call_rcu(&node->rcu_head, radix_tree_node_rcu_free); } /* @@ -222,11 +234,12 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) } do { + unsigned int newheight; if (!(node = radix_tree_node_alloc(root))) return -ENOMEM; /* Increase the height. */ - node->slots[0] = root->rnode; + node->slots[0] = radix_tree_direct_to_ptr(root->rnode); /* Propagate the aggregated tag info into the new root */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { @@ -234,9 +247,11 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) tag_set(node, tag, 0); } + newheight = root->height+1; + node->height = newheight; node->count = 1; - root->rnode = node; - root->height++; + rcu_assign_pointer(root->rnode, node); + root->height = newheight; } while (height > root->height); out: return 0; @@ -258,6 +273,8 @@ int radix_tree_insert(struct radix_tree_root *root, int offset; int error; + BUG_ON(radix_tree_is_direct_ptr(item)); + /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index); @@ -275,11 +292,12 @@ int radix_tree_insert(struct radix_tree_root *root, /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; + slot->height = height; if (node) { - node->slots[offset] = slot; + rcu_assign_pointer(node->slots[offset], slot); node->count++; } else - root->rnode = slot; + rcu_assign_pointer(root->rnode, slot); } /* Go a level down */ @@ -295,11 +313,11 @@ int radix_tree_insert(struct radix_tree_root *root, if (node) { node->count++; - node->slots[offset] = item; + rcu_assign_pointer(node->slots[offset], item); BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, offset)); } else { - root->rnode = item; + rcu_assign_pointer(root->rnode, radix_tree_ptr_to_direct(item)); BUG_ON(root_tag_get(root, 0)); BUG_ON(root_tag_get(root, 1)); } @@ -308,49 +326,54 @@ int radix_tree_insert(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_insert); -static inline void **__lookup_slot(struct radix_tree_root *root, - unsigned long index) +/** + * radix_tree_lookup_slot - lookup a slot in a radix tree + * @root: radix tree root + * @index: index key + * + * Returns: the slot corresponding to the position @index in the + * radix tree @root. This is useful for update-if-exists operations. + * + * This function cannot be called under rcu_read_lock, it must be + * excluded from writers, as must the returned slot for subsequent + * use by radix_tree_deref_slot() and radix_tree_replace slot. + * Caller must hold tree write locked across slot lookup and + * replace. + */ +void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) { unsigned int height, shift; - struct radix_tree_node **slot; - - height = root->height; + struct radix_tree_node *node, **slot; - if (index > radix_tree_maxindex(height)) + node = root->rnode; + if (node == NULL) return NULL; - if (height == 0 && root->rnode) + if (radix_tree_is_direct_ptr(node)) { + if (index > 0) + return NULL; return (void **)&root->rnode; + } + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; - while (height > 0) { - if (*slot == NULL) + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = *slot; + if (node == NULL) return NULL; - slot = (struct radix_tree_node **) - ((*slot)->slots + - ((index >> shift) & RADIX_TREE_MAP_MASK)); shift -= RADIX_TREE_MAP_SHIFT; height--; - } + } while (height > 0); return (void **)slot; } - -/** - * radix_tree_lookup_slot - lookup a slot in a radix tree - * @root: radix tree root - * @index: index key - * - * Lookup the slot corresponding to the position @index in the radix tree - * @root. This is useful for update-if-exists operations. - */ -void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) -{ - return __lookup_slot(root, index); -} EXPORT_SYMBOL(radix_tree_lookup_slot); /** @@ -359,13 +382,45 @@ EXPORT_SYMBOL(radix_tree_lookup_slot); * @index: index key * * Lookup the item at the position @index in the radix tree @root. + * + * This function can be called under rcu_read_lock, however the caller + * must manage lifetimes of leaf nodes (eg. RCU may also be used to free + * them safely). No RCU barriers are required to access or modify the + * returned item, however. */ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { - void **slot; + unsigned int height, shift; + struct radix_tree_node *node, **slot; + + node = rcu_dereference(root->rnode); + if (node == NULL) + return NULL; + + if (radix_tree_is_direct_ptr(node)) { + if (index > 0) + return NULL; + return radix_tree_direct_to_ptr(node); + } + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; + + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = __lookup_slot(root, index); - return slot != NULL ? *slot : NULL; + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = rcu_dereference(*slot); + if (node == NULL) + return NULL; + + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + return node; } EXPORT_SYMBOL(radix_tree_lookup); @@ -495,27 +550,30 @@ int radix_tree_tag_get(struct radix_tree_root *root, unsigned long index, unsigned int tag) { unsigned int height, shift; - struct radix_tree_node *slot; + struct radix_tree_node *node; int saw_unset_tag = 0; - height = root->height; - if (index > radix_tree_maxindex(height)) - return 0; - /* check the root's tag bit */ if (!root_tag_get(root, tag)) return 0; - if (height == 0) - return 1; + node = rcu_dereference(root->rnode); + if (node == NULL) + return 0; + + if (radix_tree_is_direct_ptr(node)) + return (index == 0); + + height = node->height; + if (index > radix_tree_maxindex(height)) + return 0; shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; for ( ; ; ) { int offset; - if (slot == NULL) + if (node == NULL) return 0; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -524,15 +582,15 @@ int radix_tree_tag_get(struct radix_tree_root *root, * This is just a debug check. Later, we can bale as soon as * we see an unset tag. */ - if (!tag_get(slot, tag, offset)) + if (!tag_get(node, tag, offset)) saw_unset_tag = 1; if (height == 1) { - int ret = tag_get(slot, tag, offset); + int ret = tag_get(node, tag, offset); BUG_ON(ret && saw_unset_tag); return !!ret; } - slot = slot->slots[offset]; + node = rcu_dereference(node->slots[offset]); shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -541,47 +599,45 @@ EXPORT_SYMBOL(radix_tree_tag_get); #endif static unsigned int -__lookup(struct radix_tree_root *root, void **results, unsigned long index, +__lookup(struct radix_tree_node *slot, void **results, unsigned long index, unsigned int max_items, unsigned long *next_index) { unsigned int nr_found = 0; unsigned int shift, height; - struct radix_tree_node *slot; unsigned long i; - height = root->height; - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; + height = slot->height; + if (height == 0) goto out; - } - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; for ( ; height > 1; height--) { - - for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; - i < RADIX_TREE_MAP_SIZE; i++) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { if (slot->slots[i] != NULL) break; index &= ~((1UL << shift) - 1); index += 1UL << shift; if (index == 0) goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; } - if (i == RADIX_TREE_MAP_SIZE) - goto out; shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; + slot = rcu_dereference(slot->slots[i]); + if (slot == NULL) + goto out; } /* Bottom level: grab some items */ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { + struct radix_tree_node *node; index++; - if (slot->slots[i]) { - results[nr_found++] = slot->slots[i]; + node = slot->slots[i]; + if (node) { + results[nr_found++] = rcu_dereference(node); if (nr_found == max_items) goto out; } @@ -603,28 +659,51 @@ out: * *@results. * * The implementation is naive. + * + * Like radix_tree_lookup, radix_tree_gang_lookup may be called under + * rcu_read_lock. In this case, rather than the returned results being + * an atomic snapshot of the tree at a single point in time, the semantics + * of an RCU protected gang lookup are as though multiple radix_tree_lookups + * have been issued in individual locks, and results stored in 'results'. */ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items) { - const unsigned long max_index = radix_tree_maxindex(root->height); + unsigned long max_index; + struct radix_tree_node *node; unsigned long cur_index = first_index; - unsigned int ret = 0; + unsigned int ret; + + node = rcu_dereference(root->rnode); + if (!node) + return 0; + if (radix_tree_is_direct_ptr(node)) { + if (first_index > 0) + return 0; + node = radix_tree_direct_to_ptr(node); + results[0] = rcu_dereference(node); + return 1; + } + + max_index = radix_tree_maxindex(node->height); + + ret = 0; while (ret < max_items) { unsigned int nr_found; unsigned long next_index; /* Index of next search */ if (cur_index > max_index) break; - nr_found = __lookup(root, results + ret, cur_index, + nr_found = __lookup(node, results + ret, cur_index, max_items - ret, &next_index); ret += nr_found; if (next_index == 0) break; cur_index = next_index; } + return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup); @@ -634,55 +713,64 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * open-coding the search. */ static unsigned int -__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index, +__lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, unsigned int max_items, unsigned long *next_index, unsigned int tag) { unsigned int nr_found = 0; - unsigned int shift; - unsigned int height = root->height; - struct radix_tree_node *slot; + unsigned int shift, height; - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; + height = slot->height; + if (height == 0) goto out; - } - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - do { - unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK; + while (height > 0) { + unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; - for ( ; i < RADIX_TREE_MAP_SIZE; i++) { - if (tag_get(slot, tag, i)) { - BUG_ON(slot->slots[i] == NULL); + for (;;) { + if (tag_get(slot, tag, i)) break; - } index &= ~((1UL << shift) - 1); index += 1UL << shift; if (index == 0) goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; } - if (i == RADIX_TREE_MAP_SIZE) - goto out; height--; if (height == 0) { /* Bottom level: grab some items */ unsigned long j = index & RADIX_TREE_MAP_MASK; for ( ; j < RADIX_TREE_MAP_SIZE; j++) { + struct radix_tree_node *node; index++; - if (tag_get(slot, tag, j)) { - BUG_ON(slot->slots[j] == NULL); - results[nr_found++] = slot->slots[j]; + if (!tag_get(slot, tag, j)) + continue; + node = slot->slots[j]; + /* + * Even though the tag was found set, we need to + * recheck that we have a non-NULL node, because + * if this lookup is lockless, it may have been + * subsequently deleted. + * + * Similar care must be taken in any place that + * lookup ->slots[x] without a lock (ie. can't + * rely on its value remaining the same). + */ + if (node) { + node = rcu_dereference(node); + results[nr_found++] = node; if (nr_found == max_items) goto out; } } } shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } while (height > 0); + slot = rcu_dereference(slot->slots[i]); + if (slot == NULL) + break; + } out: *next_index = index; return nr_found; @@ -706,27 +794,44 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items, unsigned int tag) { - const unsigned long max_index = radix_tree_maxindex(root->height); + struct radix_tree_node *node; + unsigned long max_index; unsigned long cur_index = first_index; - unsigned int ret = 0; + unsigned int ret; /* check the root's tag bit */ if (!root_tag_get(root, tag)) return 0; + node = rcu_dereference(root->rnode); + if (!node) + return 0; + + if (radix_tree_is_direct_ptr(node)) { + if (first_index > 0) + return 0; + node = radix_tree_direct_to_ptr(node); + results[0] = rcu_dereference(node); + return 1; + } + + max_index = radix_tree_maxindex(node->height); + + ret = 0; while (ret < max_items) { unsigned int nr_found; unsigned long next_index; /* Index of next search */ if (cur_index > max_index) break; - nr_found = __lookup_tag(root, results + ret, cur_index, + nr_found = __lookup_tag(node, results + ret, cur_index, max_items - ret, &next_index, tag); ret += nr_found; if (next_index == 0) break; cur_index = next_index; } + return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup_tag); @@ -742,8 +847,19 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) root->rnode->count == 1 && root->rnode->slots[0]) { struct radix_tree_node *to_free = root->rnode; + void *newptr; - root->rnode = to_free->slots[0]; + /* + * We don't need rcu_assign_pointer(), since we are simply + * moving the node from one part of the tree to another. If + * it was safe to dereference the old pointer to it + * (to_free->slots[0]), it will be safe to dereference the new + * one (root->rnode). + */ + newptr = to_free->slots[0]; + if (root->height == 1) + newptr = radix_tree_ptr_to_direct(newptr); + root->rnode = newptr; root->height--; /* must only free zeroed nodes into the slab */ tag_clear(to_free, 0, 0); @@ -767,6 +883,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; struct radix_tree_node *slot = NULL; + struct radix_tree_node *to_free; unsigned int height, shift; int tag; int offset; @@ -777,6 +894,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) slot = root->rnode; if (height == 0 && root->rnode) { + slot = radix_tree_direct_to_ptr(slot); root_tag_clear_all(root); root->rnode = NULL; goto out; @@ -809,10 +927,17 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) radix_tree_tag_clear(root, index, tag); } + to_free = NULL; /* Now free the nodes we do not need anymore */ while (pathp->node) { pathp->node->slots[pathp->offset] = NULL; pathp->node->count--; + /* + * Queue the node for deferred freeing after the + * last reference to it disappears (set NULL, above). + */ + if (to_free) + radix_tree_node_free(to_free); if (pathp->node->count) { if (pathp->node == root->rnode) @@ -821,13 +946,15 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) } /* Node with zero slots in use so free it */ - radix_tree_node_free(pathp->node); - + to_free = pathp->node; pathp--; + } root_tag_clear_all(root); root->height = 0; root->rnode = NULL; + if (to_free) + radix_tree_node_free(to_free); out: return slot; @@ -846,7 +973,7 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) EXPORT_SYMBOL(radix_tree_tagged); static void -radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags) +radix_tree_node_ctor(void *node, struct kmem_cache *cachep, unsigned long flags) { memset(node, 0, sizeof(struct radix_tree_node)); } @@ -869,7 +996,6 @@ static __init void radix_tree_init_maxindex(void) height_to_maxindex[i] = __maxindex(i); } -#ifdef CONFIG_HOTPLUG_CPU static int radix_tree_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -889,7 +1015,6 @@ static int radix_tree_callback(struct notifier_block *nfb, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ void __init radix_tree_init(void) { diff --git a/lib/random32.c b/lib/random32.c index 4a15ce51cea..ec7f81d3fb1 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -36,6 +36,7 @@ #include <linux/types.h> #include <linux/percpu.h> #include <linux/module.h> +#include <linux/jiffies.h> #include <linux/random.h> struct rnd_state { diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c new file mode 100644 index 00000000000..6a3bd48fa2a --- /dev/null +++ b/lib/reciprocal_div.c @@ -0,0 +1,9 @@ +#include <asm/div64.h> +#include <linux/reciprocal_div.h> + +u32 reciprocal_value(u32 k) +{ + u64 val = (1LL << 32) + (k - 1); + do_div(val, k); + return (u32)val; +} diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index b6c4f898197..479fd462eaa 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -7,6 +7,7 @@ */ #include <linux/spinlock.h> +#include <linux/nmi.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> #include <linux/delay.h> @@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock) raw_smp_processor_id(), current->comm, current->pid, lock); dump_stack(); +#ifdef CONFIG_SMP + trigger_all_cpu_backtrace(); +#endif } } } diff --git a/lib/textsearch.c b/lib/textsearch.c index 2cb4a437942..98bcadc0118 100644 --- a/lib/textsearch.c +++ b/lib/textsearch.c @@ -40,7 +40,7 @@ * configuration according to the specified parameters. * (3) User starts the search(es) by calling _find() or _next() to * fetch subsequent occurrences. A state variable is provided - * to the algorihtm to store persistant variables. + * to the algorihtm to store persistent variables. * (4) Core eventually resets the search offset and forwards the find() * request to the algorithm. * (5) Algorithm calls get_next_block() provided by the user continously |