From 6fd92b63d0626a8fe7eb8e2e50d19ecaa18cb412 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sun, 9 Mar 2008 21:01:04 +0100 Subject: x86: change x86 to use generic find_next_bit The versions with inline assembly are in fact slower on the machines I tested them on (in userspace) (Athlon XP 2800+, p4-like Xeon 2.8GHz, AMD Opteron 270). The i386-version needed a fix similar to 06024f21 to avoid crashing the benchmark. Benchmark using: gcc -fomit-frame-pointer -Os. For each bitmap size 1...512, for each possible bitmap with one bit set, for each possible offset: find the position of the first bit starting at offset. If you follow ;). Times include setup of the bitmap and checking of the results. Athlon Xeon Opteron 32/64bit x86-specific: 0m3.692s 0m2.820s 0m3.196s / 0m2.480s generic: 0m2.622s 0m1.662s 0m2.100s / 0m1.572s If the bitmap size is not a multiple of BITS_PER_LONG, and no set (cleared) bit is found, find_next_bit (find_next_zero_bit) returns a value outside of the range [0, size]. The generic version always returns exactly size. The generic version also uses unsigned long everywhere, while the x86 versions use a mishmash of int, unsigned (int), long and unsigned long. Using the generic version does give a slightly bigger kernel, though. defconfig: text data bss dec hex filename x86-specific: 4738555 481232 626688 5846475 5935cb vmlinux (32 bit) generic: 4738621 481232 626688 5846541 59360d vmlinux (32 bit) x86-specific: 5392395 846568 724424 6963387 6a40bb vmlinux (64 bit) generic: 5392458 846568 724424 6963450 6a40fa vmlinux (64 bit) Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +++ arch/x86/lib/Makefile | 2 +- arch/x86/lib/bitops_32.c | 70 ------------------------------------------------ arch/x86/lib/bitops_64.c | 68 ---------------------------------------------- 4 files changed, 4 insertions(+), 139 deletions(-) delete mode 100644 arch/x86/lib/bitops_32.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fadf794483..5639de47ed4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -77,6 +77,9 @@ config GENERIC_BUG def_bool y depends on BUG +config GENERIC_FIND_NEXT_BIT + def_bool y + config GENERIC_HWEIGHT def_bool y diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 25df1c1989f..436093299bd 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o ifeq ($(CONFIG_X86_32),y) lib-y += checksum_32.o lib-y += strstr_32.o - lib-y += bitops_32.o semaphore_32.o string_32.o + lib-y += semaphore_32.o string_32.o lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c deleted file mode 100644 index b6544045985..00000000000 --- a/arch/x86/lib/bitops_32.c +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -int find_next_bit(const unsigned long *addr, int size, int offset) -{ - const unsigned long *p = addr + (offset >> 5); - int set = 0, bit = offset & 31, res; - - if (bit) { - /* - * Look for nonzero in the first 32 bits: - */ - __asm__("bsfl %1,%0\n\t" - "jne 1f\n\t" - "movl $32, %0\n" - "1:" - : "=r" (set) - : "r" (*p >> bit)); - if (set < (32 - bit)) - return set + offset; - set = 32 - bit; - p++; - } - /* - * No set bit yet, search remaining full words for a bit - */ - res = find_first_bit (p, size - 32 * (p - addr)); - return (offset + set + res); -} -EXPORT_SYMBOL(find_next_bit); - -/** - * find_next_zero_bit - find the first zero bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -int find_next_zero_bit(const unsigned long *addr, int size, int offset) -{ - const unsigned long *p = addr + (offset >> 5); - int set = 0, bit = offset & 31, res; - - if (bit) { - /* - * Look for zero in the first 32 bits. - */ - __asm__("bsfl %1,%0\n\t" - "jne 1f\n\t" - "movl $32, %0\n" - "1:" - : "=r" (set) - : "r" (~(*p >> bit))); - if (set < (32 - bit)) - return set + offset; - set = 32 - bit; - p++; - } - /* - * No zero yet, search remaining full bytes for a zero - */ - res = find_first_zero_bit(p, size - 32 * (p - addr)); - return (offset + set + res); -} -EXPORT_SYMBOL(find_next_zero_bit); diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c index 0e8f491e6cc..0eeb704d251 100644 --- a/arch/x86/lib/bitops_64.c +++ b/arch/x86/lib/bitops_64.c @@ -1,9 +1,7 @@ #include #undef find_first_zero_bit -#undef find_next_zero_bit #undef find_first_bit -#undef find_next_bit static inline long __find_first_zero_bit(const unsigned long * addr, unsigned long size) @@ -57,39 +55,6 @@ long find_first_zero_bit(const unsigned long * addr, unsigned long size) return __find_first_zero_bit (addr, size); } -/** - * find_next_zero_bit - find the next zero bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -long find_next_zero_bit (const unsigned long * addr, long size, long offset) -{ - const unsigned long * p = addr + (offset >> 6); - unsigned long set = 0; - unsigned long res, bit = offset&63; - - if (bit) { - /* - * Look for zero in first word - */ - asm("bsfq %1,%0\n\t" - "cmoveq %2,%0" - : "=r" (set) - : "r" (~(*p >> bit)), "r"(64L)); - if (set < (64 - bit)) - return set + offset; - set = 64 - bit; - p++; - } - /* - * No zero yet, search remaining full words for a zero - */ - res = __find_first_zero_bit (p, size - 64 * (p - addr)); - - return (offset + set + res); -} - static inline long __find_first_bit(const unsigned long * addr, unsigned long size) { @@ -136,40 +101,7 @@ long find_first_bit(const unsigned long * addr, unsigned long size) return __find_first_bit(addr,size); } -/** - * find_next_bit - find the first set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -long find_next_bit(const unsigned long * addr, long size, long offset) -{ - const unsigned long * p = addr + (offset >> 6); - unsigned long set = 0, bit = offset & 63, res; - - if (bit) { - /* - * Look for nonzero in the first 64 bits: - */ - asm("bsfq %1,%0\n\t" - "cmoveq %2,%0\n\t" - : "=r" (set) - : "r" (*p >> bit), "r" (64L)); - if (set < (64 - bit)) - return set + offset; - set = 64 - bit; - p++; - } - /* - * No set bit yet, search remaining full words for a bit - */ - res = __find_first_bit (p, size - 64 * (p - addr)); - return (offset + set + res); -} - #include -EXPORT_SYMBOL(find_next_bit); EXPORT_SYMBOL(find_first_bit); EXPORT_SYMBOL(find_first_zero_bit); -EXPORT_SYMBOL(find_next_zero_bit); -- cgit v1.2.3 From 12d9c8420b9daa1da3d9e090640fb24bcd0deba2 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sat, 15 Mar 2008 13:04:42 +0100 Subject: x86: merge the simple bitops and move them to bitops.h Some of those can be written in such a way that the same inline assembly can be used to generate both 32 bit and 64 bit code. For ffs and fls, x86_64 unconditionally used the cmov instruction and i386 unconditionally used a conditional branch over a mov instruction. In the current patch I chose to select the version based on the availability of the cmov instruction instead. A small detail here is that x86_64 did not previously set CONFIG_X86_CMOV=y. Improved comments for ffs, ffz, fls and variations. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 4da3cdb9c1b..cf3ff2c5cef 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -398,7 +398,7 @@ config X86_TSC # generates cmov. config X86_CMOV def_bool y - depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7) + depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64) config X86_MINIMUM_CPU_FAMILY int -- cgit v1.2.3 From 77b9bd9c49442407804c37bcc82021a35277f83c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 1 Apr 2008 11:46:19 +0200 Subject: x86: generic versions of find_first_(zero_)bit, convert i386 Generic versions of __find_first_bit and __find_first_zero_bit are introduced as simplified versions of __find_next_bit and __find_next_zero_bit. Their compilation and use are guarded by a new config variable GENERIC_FIND_FIRST_BIT. The generic versions of find_first_bit and find_first_zero_bit are implemented in terms of the newly introduced __find_first_bit and __find_first_zero_bit. This patch does not remove the i386-specific implementation, but it does switch i386 to use the generic functions by setting GENERIC_FIND_FIRST_BIT=y for X86_32. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5639de47ed4..1a69b68ff6c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -77,6 +77,9 @@ config GENERIC_BUG def_bool y depends on BUG +config GENERIC_FIND_FIRST_BIT + def_bool X86_32 + config GENERIC_FIND_NEXT_BIT def_bool y -- cgit v1.2.3 From 2aba6925fdb96428d1129a61b1233597a03a387b Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 1 Apr 2008 17:41:26 +0200 Subject: x86: switch 64-bit to generic find_first_bit Switch x86_64 to generic find_first_bit. The x86_64-specific implementation is not removed. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/lib/bitops_64.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1a69b68ff6c..700447738e7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -78,7 +78,7 @@ config GENERIC_BUG depends on BUG config GENERIC_FIND_FIRST_BIT - def_bool X86_32 + def_bool y config GENERIC_FIND_NEXT_BIT def_bool y diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c index 0eeb704d251..568467d390c 100644 --- a/arch/x86/lib/bitops_64.c +++ b/arch/x86/lib/bitops_64.c @@ -1,3 +1,4 @@ +#ifndef CONFIG_GENERIC_FIND_FIRST_BIT #include #undef find_first_zero_bit @@ -105,3 +106,4 @@ long find_first_bit(const unsigned long * addr, unsigned long size) EXPORT_SYMBOL(find_first_bit); EXPORT_SYMBOL(find_first_zero_bit); +#endif -- cgit v1.2.3 From 5245698f665c4b7a533dcc47a5afdf33095d436a Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 1 Apr 2008 17:47:57 +0200 Subject: x86, UML: remove x86-specific implementations of find_first_bit x86 has been switched to the generic versions of find_first_bit and find_first_zero_bit, but the original versions were retained. This patch just removes the now unused x86-specific versions. also update UML. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 1 - arch/x86/lib/bitops_64.c | 109 ----------------------------------------------- 2 files changed, 110 deletions(-) delete mode 100644 arch/x86/lib/bitops_64.c (limited to 'arch/x86') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 436093299bd..76f60f52a88 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -21,7 +21,6 @@ else lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o - lib-y += bitops_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o endif diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c deleted file mode 100644 index 568467d390c..00000000000 --- a/arch/x86/lib/bitops_64.c +++ /dev/null @@ -1,109 +0,0 @@ -#ifndef CONFIG_GENERIC_FIND_FIRST_BIT -#include - -#undef find_first_zero_bit -#undef find_first_bit - -static inline long -__find_first_zero_bit(const unsigned long * addr, unsigned long size) -{ - long d0, d1, d2; - long res; - - /* - * We must test the size in words, not in bits, because - * otherwise incoming sizes in the range -63..-1 will not run - * any scasq instructions, and then the flags used by the je - * instruction will have whatever random value was in place - * before. Nobody should call us like that, but - * find_next_zero_bit() does when offset and size are at the - * same word and it fails to find a zero itself. - */ - size += 63; - size >>= 6; - if (!size) - return 0; - asm volatile( - " repe; scasq\n" - " je 1f\n" - " xorq -8(%%rdi),%%rax\n" - " subq $8,%%rdi\n" - " bsfq %%rax,%%rdx\n" - "1: subq %[addr],%%rdi\n" - " shlq $3,%%rdi\n" - " addq %%rdi,%%rdx" - :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) - :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL), - [addr] "S" (addr) : "memory"); - /* - * Any register would do for [addr] above, but GCC tends to - * prefer rbx over rsi, even though rsi is readily available - * and doesn't have to be saved. - */ - return res; -} - -/** - * find_first_zero_bit - find the first zero bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit-number of the first zero bit, not the number of the byte - * containing a bit. - */ -long find_first_zero_bit(const unsigned long * addr, unsigned long size) -{ - return __find_first_zero_bit (addr, size); -} - -static inline long -__find_first_bit(const unsigned long * addr, unsigned long size) -{ - long d0, d1; - long res; - - /* - * We must test the size in words, not in bits, because - * otherwise incoming sizes in the range -63..-1 will not run - * any scasq instructions, and then the flags used by the jz - * instruction will have whatever random value was in place - * before. Nobody should call us like that, but - * find_next_bit() does when offset and size are at the same - * word and it fails to find a one itself. - */ - size += 63; - size >>= 6; - if (!size) - return 0; - asm volatile( - " repe; scasq\n" - " jz 1f\n" - " subq $8,%%rdi\n" - " bsfq (%%rdi),%%rax\n" - "1: subq %[addr],%%rdi\n" - " shlq $3,%%rdi\n" - " addq %%rdi,%%rax" - :"=a" (res), "=&c" (d0), "=&D" (d1) - :"0" (0ULL), "1" (size), "2" (addr), - [addr] "r" (addr) : "memory"); - return res; -} - -/** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum size to search - * - * Returns the bit-number of the first set bit, not the number of the byte - * containing a bit. - */ -long find_first_bit(const unsigned long * addr, unsigned long size) -{ - return __find_first_bit(addr,size); -} - -#include - -EXPORT_SYMBOL(find_first_bit); -EXPORT_SYMBOL(find_first_zero_bit); -#endif -- cgit v1.2.3 From 19870def587554c4055df3e74a21508e3647fb7e Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 25 Apr 2008 13:12:53 +0200 Subject: x86, bitops: select the generic bitmap search functions Introduce GENERIC_FIND_FIRST_BIT and GENERIC_FIND_NEXT_BIT in lib/Kconfig, defaulting to off. An arch that wants to use the generic implementation now only has to use a select statement to include them. I added an always-y option (X86_CPU) to arch/x86/Kconfig.cpu and used that to select the generic search functions. This way ARCH=um SUBARCH=i386 automatically picks up the change too, and arch/um/Kconfig.i386 can therefore be simplified a bit. ARCH=um SUBARCH=x86_64 does things differently, but still compiles fine. It seems that a "def_bool y" always wins over a "def_bool n"? Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 ------ arch/x86/Kconfig.cpu | 5 +++++ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 700447738e7..2fadf794483 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -77,12 +77,6 @@ config GENERIC_BUG def_bool y depends on BUG -config GENERIC_FIND_FIRST_BIT - def_bool y - -config GENERIC_FIND_NEXT_BIT - def_bool y - config GENERIC_HWEIGHT def_bool y diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index cf3ff2c5cef..7ef18b01f0b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -278,6 +278,11 @@ config GENERIC_CPU endchoice +config X86_CPU + def_bool y + select GENERIC_FIND_FIRST_BIT + select GENERIC_FIND_NEXT_BIT + config X86_GENERIC bool "Generic x86 support" depends on X86_32 -- cgit v1.2.3