From e01b70ef3eb3080fecc35e15f68cd274c0a48163 Mon Sep 17 00:00:00 2001 From: Jiri Hladky Date: Mon, 2 Jun 2008 12:00:19 +0200 Subject: x86: fix bug in arch/i386/lib/delay.c file, delay_loop function when trying to understand how Bogomips are implemented I have found a bug in arch/i386/lib/delay.c file, delay_loop function. The function fails for loops > 2^31+1. It because SF is set when dec returns numbers > 2^31. The fix is to use jnz instruction instead of jns (and add one decl instruction to the end to have exactly the same number of loops as in original version). Martin Mares observed: > It is a long time since I have hacked that file, but you should definitely > make sure that the function is never called with a zero argument. In such > case, the original version made just a single pass, but your version > makes 2^32 of them. fixed that. Signed-off-by: Ingo Molnar --- arch/x86/lib/delay_32.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index d710f2d167b..ef691316f8b 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -3,6 +3,7 @@ * * Copyright (C) 1993 Linus Torvalds * Copyright (C) 1997 Martin Mares + * Copyright (C) 2008 Jiri Hladky * * The __delay function must _NOT_ be inlined as its execution time * depends wildly on alignment on many x86 processors. The additional @@ -28,16 +29,22 @@ /* simple loop based delay: */ static void delay_loop(unsigned long loops) { - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: decl %0 \n" + " jnz 2b \n" + "3: decl %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); } /* TSC based delay: */ -- cgit v1.2.3 From ff1b15b646177c6cc465ac2dd0be6ae16e965654 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 09:27:19 -0300 Subject: x86: don't use size specifiers. Remove the "l" from inline asm at arch/x86/lib/delay_32.c. It is not needed. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/delay_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index ef691316f8b..54013f87d95 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -38,9 +38,9 @@ static void delay_loop(unsigned long loops) "1: jmp 2f \n" ".align 16 \n" - "2: decl %0 \n" + "2: dec %0 \n" " jnz 2b \n" - "3: decl %0 \n" + "3: dec %0 \n" : /* we don't need output */ :"a" (loops) -- cgit v1.2.3 From 0a4d8a472f645d99f86303db1462b64e371b090d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 09:34:08 -0300 Subject: x86: provide delay loop for x86_64. This is for consistency with i386. We call use_tsc_delay() at tsc initialization for x86_64, so we'll be always using it. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/delay_64.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index 4c441be9264..d0326d07c84 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -22,13 +22,28 @@ #include #endif -int __devinit read_current_timer(unsigned long *timer_value) +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) { - rdtscll(*timer_value); - return 0; + asm volatile( + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: dec %0 \n" + " jnz 2b \n" + "3: dec %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); } -void __delay(unsigned long loops) +static void delay_tsc(unsigned long loops) { unsigned bclock, now; int cpu; @@ -63,6 +78,27 @@ void __delay(unsigned long loops) } preempt_enable(); } + +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int __devinit read_current_timer(unsigned long *timer_value) +{ + if (delay_fn == delay_tsc) { + rdtscll(*timer_value); + return 0; + } + return -1; +} + +void __delay(unsigned long loops) +{ + delay_fn(loops); +} EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) -- cgit v1.2.3 From a76febe975997b933b7285b6e20bb0a21c09d453 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 09:52:36 -0300 Subject: x86: use rdtscll in read_current_timer for i386. This way we achieve the same code for both arches. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/delay_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index 54013f87d95..bf6de05445b 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -98,7 +98,7 @@ void use_tsc_delay(void) int __devinit read_current_timer(unsigned long *timer_val) { if (delay_fn == delay_tsc) { - rdtscl(*timer_val); + rdtscll(*timer_val); return 0; } return -1; -- cgit v1.2.3 From 7e58818d32c18197602d1869b22cfda99efd05fe Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 10:21:25 -0300 Subject: x86: explicitly use edx in const delay function. For x86_64, we can't just use %0, as it would generate a mul against rdx, which is not really what we want (note the ">> 32" in x86_64 version). Using a u64 variable with a shift in i386 generates bad code, so the solution is to explicitly use %%edx in inline assembly for both. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/delay_32.c | 2 +- arch/x86/lib/delay_64.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index bf6de05445b..0b659a320b1 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -114,7 +114,7 @@ inline void __const_udelay(unsigned long xloops) int d0; xloops *= 4; - __asm__("mull %0" + __asm__("mull %%edx" :"=d" (xloops), "=&a" (d0) :"1" (xloops), "0" (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index d0326d07c84..ff3dfecdb6f 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -103,9 +103,16 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { - __delay(((xloops * HZ * - cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1); + int d0; + xloops *= 4; + __asm__("mull %%edx" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops), "0" + (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); + + __delay(++xloops); } + EXPORT_SYMBOL(__const_udelay); void __udelay(unsigned long usecs) -- cgit v1.2.3 From f0fbf0abc093ec8bf64506eee4ede9e5daf40ffd Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 3 Jul 2008 12:35:41 -0300 Subject: x86: integrate delay functions. delay_32.c, delay_64.c are now equal, and are integrated into delay.c. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- arch/x86/lib/delay.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/delay_32.c | 138 ------------------------------------------------ arch/x86/lib/delay_64.c | 128 -------------------------------------------- 4 files changed, 138 insertions(+), 267 deletions(-) create mode 100644 arch/x86/lib/delay.c delete mode 100644 arch/x86/lib/delay_32.c delete mode 100644 arch/x86/lib/delay_64.c (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 76f60f52a88..86960a6c41c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o -lib-y := delay_$(BITS).o +lib-y := delay.o lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o lib-y += memcpy_$(BITS).o diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c new file mode 100644 index 00000000000..f4568605d7d --- /dev/null +++ b/arch/x86/lib/delay.c @@ -0,0 +1,137 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * Copyright (C) 2008 Jiri Hladky + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_SMP +# include +#endif + +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) +{ + asm volatile( + " test %0,%0 \n" + " jz 3f \n" + " jmp 1f \n" + + ".align 16 \n" + "1: jmp 2f \n" + + ".align 16 \n" + "2: dec %0 \n" + " jnz 2b \n" + "3: dec %0 \n" + + : /* we don't need output */ + :"a" (loops) + ); +} + +/* TSC based delay: */ +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + rdtscl(bclock); + for (;;) { + rdtscl(now); + if ((now - bclock) >= loops) + break; + + /* Allow RT tasks to run */ + preempt_enable(); + rep_nop(); + preempt_disable(); + + /* + * It is possible that we moved to another CPU, and + * since TSC's are per-cpu we need to calculate + * that. The delay must guarantee that we wait "at + * least" the amount of time. Being moved to another + * CPU could make the wait longer but we just need to + * make sure we waited long enough. Rebalance the + * counter for this CPU. + */ + if (unlikely(cpu != smp_processor_id())) { + loops -= (now - bclock); + cpu = smp_processor_id(); + rdtscl(bclock); + } + } + preempt_enable(); +} + +/* + * Since we calibrate only once at boot, this + * function should be set once at boot and not changed + */ +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int __devinit read_current_timer(unsigned long *timer_val) +{ + if (delay_fn == delay_tsc) { + rdtscll(*timer_val); + return 0; + } + return -1; +} + +void __delay(unsigned long loops) +{ + delay_fn(loops); +} +EXPORT_SYMBOL(__delay); + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + + xloops *= 4; + asm("mull %%edx" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops), "0" + (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); + + __delay(++xloops); +} +EXPORT_SYMBOL(__const_udelay); + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c deleted file mode 100644 index 0b659a320b1..00000000000 --- a/arch/x86/lib/delay_32.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Precise Delay Loops for i386 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * Copyright (C) 2008 Jiri Hladky - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. The additional - * jump magic is needed to get the timing stable on all the CPU's - * we have to worry about. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_SMP -# include -#endif - -/* simple loop based delay: */ -static void delay_loop(unsigned long loops) -{ - __asm__ __volatile__( - " test %0,%0 \n" - " jz 3f \n" - " jmp 1f \n" - - ".align 16 \n" - "1: jmp 2f \n" - - ".align 16 \n" - "2: dec %0 \n" - " jnz 2b \n" - "3: dec %0 \n" - - : /* we don't need output */ - :"a" (loops) - ); -} - -/* TSC based delay: */ -static void delay_tsc(unsigned long loops) -{ - unsigned long bclock, now; - int cpu; - - preempt_disable(); - cpu = smp_processor_id(); - rdtscl(bclock); - for (;;) { - rdtscl(now); - if ((now - bclock) >= loops) - break; - - /* Allow RT tasks to run */ - preempt_enable(); - rep_nop(); - preempt_disable(); - - /* - * It is possible that we moved to another CPU, and - * since TSC's are per-cpu we need to calculate - * that. The delay must guarantee that we wait "at - * least" the amount of time. Being moved to another - * CPU could make the wait longer but we just need to - * make sure we waited long enough. Rebalance the - * counter for this CPU. - */ - if (unlikely(cpu != smp_processor_id())) { - loops -= (now - bclock); - cpu = smp_processor_id(); - rdtscl(bclock); - } - } - preempt_enable(); -} - -/* - * Since we calibrate only once at boot, this - * function should be set once at boot and not changed - */ -static void (*delay_fn)(unsigned long) = delay_loop; - -void use_tsc_delay(void) -{ - delay_fn = delay_tsc; -} - -int __devinit read_current_timer(unsigned long *timer_val) -{ - if (delay_fn == delay_tsc) { - rdtscll(*timer_val); - return 0; - } - return -1; -} - -void __delay(unsigned long loops) -{ - delay_fn(loops); -} - -inline void __const_udelay(unsigned long xloops) -{ - int d0; - - xloops *= 4; - __asm__("mull %%edx" - :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); - - __delay(++xloops); -} - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ -} - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} - -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c deleted file mode 100644 index ff3dfecdb6f..00000000000 --- a/arch/x86/lib/delay_64.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Precise Delay Loops for x86-64 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifdef CONFIG_SMP -#include -#endif - -/* simple loop based delay: */ -static void delay_loop(unsigned long loops) -{ - asm volatile( - " test %0,%0 \n" - " jz 3f \n" - " jmp 1f \n" - - ".align 16 \n" - "1: jmp 2f \n" - - ".align 16 \n" - "2: dec %0 \n" - " jnz 2b \n" - "3: dec %0 \n" - - : /* we don't need output */ - :"a" (loops) - ); -} - -static void delay_tsc(unsigned long loops) -{ - unsigned bclock, now; - int cpu; - - preempt_disable(); - cpu = smp_processor_id(); - rdtscl(bclock); - for (;;) { - rdtscl(now); - if ((now - bclock) >= loops) - break; - - /* Allow RT tasks to run */ - preempt_enable(); - rep_nop(); - preempt_disable(); - - /* - * It is possible that we moved to another CPU, and - * since TSC's are per-cpu we need to calculate - * that. The delay must guarantee that we wait "at - * least" the amount of time. Being moved to another - * CPU could make the wait longer but we just need to - * make sure we waited long enough. Rebalance the - * counter for this CPU. - */ - if (unlikely(cpu != smp_processor_id())) { - loops -= (now - bclock); - cpu = smp_processor_id(); - rdtscl(bclock); - } - } - preempt_enable(); -} - -static void (*delay_fn)(unsigned long) = delay_loop; - -void use_tsc_delay(void) -{ - delay_fn = delay_tsc; -} - -int __devinit read_current_timer(unsigned long *timer_value) -{ - if (delay_fn == delay_tsc) { - rdtscll(*timer_value); - return 0; - } - return -1; -} - -void __delay(unsigned long loops) -{ - delay_fn(loops); -} -EXPORT_SYMBOL(__delay); - -inline void __const_udelay(unsigned long xloops) -{ - int d0; - xloops *= 4; - __asm__("mull %%edx" - :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); - - __delay(++xloops); -} - -EXPORT_SYMBOL(__const_udelay); - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ -} -EXPORT_SYMBOL(__udelay); - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} -EXPORT_SYMBOL(__ndelay); -- cgit v1.2.3 From edf10162b2c5ad78ada8e63e960f9d0949c6c219 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 13 Jun 2008 16:35:52 -0300 Subject: x86: don't clobber r8 nor use rcx. There's really no reason to clobber r8 or pass the address in rcx. We can safely use only two registers (which we already have to touch anyway) to do the job. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/getuser_64.S | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S index 5448876261f..2b003d31348 100644 --- a/arch/x86/lib/getuser_64.S +++ b/arch/x86/lib/getuser_64.S @@ -36,10 +36,10 @@ .text ENTRY(__get_user_1) CFI_STARTPROC - GET_THREAD_INFO(%r8) - cmpq threadinfo_addr_limit(%r8),%rcx + GET_THREAD_INFO(%rdx) + cmpq threadinfo_addr_limit(%rdx),%rax jae bad_get_user -1: movzb (%rcx),%edx +1: movzb (%rax),%edx xorl %eax,%eax ret CFI_ENDPROC @@ -47,48 +47,48 @@ ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $1,%rcx + GET_THREAD_INFO(%rdx) + addq $1,%rax jc 20f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%rdx),%rax jae 20f - decq %rcx -2: movzwl (%rcx),%edx + decq %rax +2: movzwl (%rax),%edx xorl %eax,%eax ret -20: decq %rcx +20: decq %rax jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $3,%rcx + GET_THREAD_INFO(%rdx) + addq $3,%rax jc 30f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%rdx),%rax jae 30f - subq $3,%rcx -3: movl (%rcx),%edx + subq $3,%rax +3: movl (%rax),%edx xorl %eax,%eax ret -30: subq $3,%rcx +30: subq $3,%rax jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_4) ENTRY(__get_user_8) CFI_STARTPROC - GET_THREAD_INFO(%r8) - addq $7,%rcx + GET_THREAD_INFO(%rdx) + addq $7,%rax jc 40f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq threadinfo_addr_limit(%rdx),%rax jae 40f - subq $7,%rcx -4: movq (%rcx),%rdx + subq $7,%rax +4: movq (%rax),%rdx xorl %eax,%eax ret -40: subq $7,%rcx +40: subq $7,%rax jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_8) -- cgit v1.2.3 From 9aa038815b5756e20a00b8e1efd5740434b37aea Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 13 Jun 2008 22:41:51 -0300 Subject: x86: don't use word-size specifiers. Since the instructions refer to registers, they'll be able to figure it out. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/getuser_32.S | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S index 6d84b53f12a..8200fde55f5 100644 --- a/arch/x86/lib/getuser_32.S +++ b/arch/x86/lib/getuser_32.S @@ -29,44 +29,44 @@ ENTRY(__get_user_1) CFI_STARTPROC GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax + cmp TI_addr_limit(%edx),%eax jae bad_get_user -1: movzbl (%eax),%edx - xorl %eax,%eax +1: movzb (%eax),%edx + xor %eax,%eax ret CFI_ENDPROC ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC - addl $1,%eax + add $1,%eax jc bad_get_user GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax + cmp TI_addr_limit(%edx),%eax jae bad_get_user 2: movzwl -1(%eax),%edx - xorl %eax,%eax + xor %eax,%eax ret CFI_ENDPROC ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC - addl $3,%eax + add $3,%eax jc bad_get_user GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax + cmp TI_addr_limit(%edx),%eax jae bad_get_user -3: movl -3(%eax),%edx - xorl %eax,%eax +3: mov -3(%eax),%edx + xor %eax,%eax ret CFI_ENDPROC ENDPROC(__get_user_4) bad_get_user: CFI_STARTPROC - xorl %edx,%edx - movl $-14,%eax + xor %edx,%edx + mov $-14,%eax ret CFI_ENDPROC END(bad_get_user) -- cgit v1.2.3 From 9262875395cf22b5a90dd8a640e1070cedf55d0e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 11:13:16 -0300 Subject: x86: adapt x86_64 getuser functions. Instead of doing a sub after the addition, use the offset directly at the memory operand of the mov instructions. This is the way i386 do. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/getuser_64.S | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S index 2b003d31348..df37d3a9ba2 100644 --- a/arch/x86/lib/getuser_64.S +++ b/arch/x86/lib/getuser_64.S @@ -47,49 +47,40 @@ ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC - GET_THREAD_INFO(%rdx) addq $1,%rax - jc 20f + jc bad_get_user + GET_THREAD_INFO(%rdx) cmpq threadinfo_addr_limit(%rdx),%rax - jae 20f - decq %rax -2: movzwl (%rax),%edx + jae bad_get_user +2: movzwl -1(%rax),%edx xorl %eax,%eax ret -20: decq %rax - jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC - GET_THREAD_INFO(%rdx) addq $3,%rax - jc 30f + jc bad_get_user + GET_THREAD_INFO(%rdx) cmpq threadinfo_addr_limit(%rdx),%rax - jae 30f - subq $3,%rax -3: movl (%rax),%edx + jae bad_get_user +3: movl -3(%rax),%edx xorl %eax,%eax ret -30: subq $3,%rax - jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_4) ENTRY(__get_user_8) CFI_STARTPROC - GET_THREAD_INFO(%rdx) addq $7,%rax - jc 40f + jc bad_get_user + GET_THREAD_INFO(%rdx) cmpq threadinfo_addr_limit(%rdx),%rax - jae 40f - subq $7,%rax -4: movq (%rax),%rdx + jae bad_get_user +4: movq -7(%rax),%rdx xorl %eax,%eax ret -40: subq $7,%rax - jmp bad_get_user CFI_ENDPROC ENDPROC(__get_user_8) -- cgit v1.2.3 From 26ccb8a7183eed424ff9c874c83af20dafe7cdef Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 11:19:35 -0300 Subject: x86: rename threadinfo to TI. This is for consistency with i386. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/copy_user_64.S | 4 ++-- arch/x86/lib/getuser_64.S | 8 ++++---- arch/x86/lib/putuser_64.S | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index ee1c3f63515..7eaaf0123b4 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -40,7 +40,7 @@ ENTRY(copy_to_user) movq %rdi,%rcx addq %rdx,%rcx jc bad_to_user - cmpq threadinfo_addr_limit(%rax),%rcx + cmpq TI_addr_limit(%rax),%rcx jae bad_to_user xorl %eax,%eax /* clear zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string @@ -65,7 +65,7 @@ ENTRY(copy_from_user) movq %rsi,%rcx addq %rdx,%rcx jc bad_from_user - cmpq threadinfo_addr_limit(%rax),%rcx + cmpq TI_addr_limit(%rax),%rcx jae bad_from_user movl $1,%ecx /* set zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S index df37d3a9ba2..0ec7890f9dc 100644 --- a/arch/x86/lib/getuser_64.S +++ b/arch/x86/lib/getuser_64.S @@ -37,7 +37,7 @@ ENTRY(__get_user_1) CFI_STARTPROC GET_THREAD_INFO(%rdx) - cmpq threadinfo_addr_limit(%rdx),%rax + cmpq TI_addr_limit(%rdx),%rax jae bad_get_user 1: movzb (%rax),%edx xorl %eax,%eax @@ -50,7 +50,7 @@ ENTRY(__get_user_2) addq $1,%rax jc bad_get_user GET_THREAD_INFO(%rdx) - cmpq threadinfo_addr_limit(%rdx),%rax + cmpq TI_addr_limit(%rdx),%rax jae bad_get_user 2: movzwl -1(%rax),%edx xorl %eax,%eax @@ -63,7 +63,7 @@ ENTRY(__get_user_4) addq $3,%rax jc bad_get_user GET_THREAD_INFO(%rdx) - cmpq threadinfo_addr_limit(%rdx),%rax + cmpq TI_addr_limit(%rdx),%rax jae bad_get_user 3: movl -3(%rax),%edx xorl %eax,%eax @@ -76,7 +76,7 @@ ENTRY(__get_user_8) addq $7,%rax jc bad_get_user GET_THREAD_INFO(%rdx) - cmpq threadinfo_addr_limit(%rdx),%rax + cmpq TI_addr_limit(%rdx),%rax jae bad_get_user 4: movq -7(%rax),%rdx xorl %eax,%eax diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S index 4989f5a8fa9..940796fa0d9 100644 --- a/arch/x86/lib/putuser_64.S +++ b/arch/x86/lib/putuser_64.S @@ -35,7 +35,7 @@ ENTRY(__put_user_1) CFI_STARTPROC GET_THREAD_INFO(%r8) - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq TI_addr_limit(%r8),%rcx jae bad_put_user 1: movb %dl,(%rcx) xorl %eax,%eax @@ -48,7 +48,7 @@ ENTRY(__put_user_2) GET_THREAD_INFO(%r8) addq $1,%rcx jc 20f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq TI_addr_limit(%r8),%rcx jae 20f decq %rcx 2: movw %dx,(%rcx) @@ -64,7 +64,7 @@ ENTRY(__put_user_4) GET_THREAD_INFO(%r8) addq $3,%rcx jc 30f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq TI_addr_limit(%r8),%rcx jae 30f subq $3,%rcx 3: movl %edx,(%rcx) @@ -80,7 +80,7 @@ ENTRY(__put_user_8) GET_THREAD_INFO(%r8) addq $7,%rcx jc 40f - cmpq threadinfo_addr_limit(%r8),%rcx + cmpq TI_addr_limit(%r8),%rcx jae 40f subq $7,%rcx 4: movq %rdx,(%rcx) -- cgit v1.2.3 From ef8c1a2d0e990d0f4f15e1d45eeb262755e3d4c3 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 11:21:53 -0300 Subject: x86: don't use word-size specifiers on getuser_64. The instructions access registers, so the size is unambiguous. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/getuser_64.S | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S index 0ec7890f9dc..6134752a75f 100644 --- a/arch/x86/lib/getuser_64.S +++ b/arch/x86/lib/getuser_64.S @@ -37,57 +37,57 @@ ENTRY(__get_user_1) CFI_STARTPROC GET_THREAD_INFO(%rdx) - cmpq TI_addr_limit(%rdx),%rax + cmp TI_addr_limit(%rdx),%rax jae bad_get_user 1: movzb (%rax),%edx - xorl %eax,%eax + xor %eax,%eax ret CFI_ENDPROC ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC - addq $1,%rax + add $1,%rax jc bad_get_user GET_THREAD_INFO(%rdx) - cmpq TI_addr_limit(%rdx),%rax + cmp TI_addr_limit(%rdx),%rax jae bad_get_user 2: movzwl -1(%rax),%edx - xorl %eax,%eax + xor %eax,%eax ret CFI_ENDPROC ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC - addq $3,%rax + add $3,%rax jc bad_get_user GET_THREAD_INFO(%rdx) - cmpq TI_addr_limit(%rdx),%rax + cmp TI_addr_limit(%rdx),%rax jae bad_get_user -3: movl -3(%rax),%edx - xorl %eax,%eax +3: mov -3(%rax),%edx + xor %eax,%eax ret CFI_ENDPROC ENDPROC(__get_user_4) ENTRY(__get_user_8) CFI_STARTPROC - addq $7,%rax + add $7,%rax jc bad_get_user GET_THREAD_INFO(%rdx) - cmpq TI_addr_limit(%rdx),%rax + cmp TI_addr_limit(%rdx),%rax jae bad_get_user 4: movq -7(%rax),%rdx - xorl %eax,%eax + xor %eax,%eax ret CFI_ENDPROC ENDPROC(__get_user_8) bad_get_user: CFI_STARTPROC - xorl %edx,%edx - movq $(-EFAULT),%rax + xor %edx,%edx + mov $(-EFAULT),%rax ret CFI_ENDPROC END(bad_get_user) -- cgit v1.2.3 From 40faf463e62de0b29722910eded7dd26cd8b684b Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 11:37:57 -0300 Subject: x86: introduce __ASM_REG macro. There are situations in which the architecture wants to use the register that represents its word-size, whatever it is. For those, introduce __ASM_REG in asm.h, along with the first users _ASM_AX and _ASM_DX. They have users waiting for it, namely the getuser functions. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/getuser_32.S | 25 +++++++++++++------------ arch/x86/lib/getuser_64.S | 36 ++++++++++++++++++------------------ 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S index 8200fde55f5..2cc3ceee8f9 100644 --- a/arch/x86/lib/getuser_32.S +++ b/arch/x86/lib/getuser_32.S @@ -11,6 +11,7 @@ #include #include #include +#include /* @@ -28,10 +29,10 @@ .text ENTRY(__get_user_1) CFI_STARTPROC - GET_THREAD_INFO(%edx) - cmp TI_addr_limit(%edx),%eax + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -1: movzb (%eax),%edx +1: movzb (%_ASM_AX),%edx xor %eax,%eax ret CFI_ENDPROC @@ -39,12 +40,12 @@ ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC - add $1,%eax + add $1,%_ASM_AX jc bad_get_user - GET_THREAD_INFO(%edx) - cmp TI_addr_limit(%edx),%eax + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -2: movzwl -1(%eax),%edx +2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax ret CFI_ENDPROC @@ -52,12 +53,12 @@ ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC - add $3,%eax + add $3,%_ASM_AX jc bad_get_user - GET_THREAD_INFO(%edx) - cmp TI_addr_limit(%edx),%eax + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -3: mov -3(%eax),%edx +3: mov -3(%_ASM_AX),%edx xor %eax,%eax ret CFI_ENDPROC @@ -66,7 +67,7 @@ ENDPROC(__get_user_4) bad_get_user: CFI_STARTPROC xor %edx,%edx - mov $-14,%eax + mov $-14,%_ASM_AX ret CFI_ENDPROC END(bad_get_user) diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S index 6134752a75f..63b0e5c1e58 100644 --- a/arch/x86/lib/getuser_64.S +++ b/arch/x86/lib/getuser_64.S @@ -13,14 +13,13 @@ /* * __get_user_X * - * Inputs: %rcx contains the address. + * Inputs: %rax contains the address. * The register is modified, but all changes are undone * before returning because the C code doesn't know about it. * * Outputs: %rax is error code (0 or -EFAULT) * %rdx contains zero-extended value * - * %r8 is destroyed. * * These functions should not modify any other registers, * as they get called from within inline assembly. @@ -32,14 +31,15 @@ #include #include #include +#include .text ENTRY(__get_user_1) CFI_STARTPROC - GET_THREAD_INFO(%rdx) - cmp TI_addr_limit(%rdx),%rax + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -1: movzb (%rax),%edx +1: movzb (%_ASM_AX),%edx xor %eax,%eax ret CFI_ENDPROC @@ -47,12 +47,12 @@ ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC - add $1,%rax + add $1,%_ASM_AX jc bad_get_user - GET_THREAD_INFO(%rdx) - cmp TI_addr_limit(%rdx),%rax + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -2: movzwl -1(%rax),%edx +2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax ret CFI_ENDPROC @@ -60,12 +60,12 @@ ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC - add $3,%rax + add $3,%_ASM_AX jc bad_get_user - GET_THREAD_INFO(%rdx) - cmp TI_addr_limit(%rdx),%rax + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -3: mov -3(%rax),%edx +3: mov -3(%_ASM_AX),%edx xor %eax,%eax ret CFI_ENDPROC @@ -73,12 +73,12 @@ ENDPROC(__get_user_4) ENTRY(__get_user_8) CFI_STARTPROC - add $7,%rax + add $7,%_ASM_AX jc bad_get_user - GET_THREAD_INFO(%rdx) - cmp TI_addr_limit(%rdx),%rax + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -4: movq -7(%rax),%rdx +4: movq -7(%_ASM_AX),%_ASM_DX xor %eax,%eax ret CFI_ENDPROC @@ -87,7 +87,7 @@ ENDPROC(__get_user_8) bad_get_user: CFI_STARTPROC xor %edx,%edx - mov $(-EFAULT),%rax + mov $(-EFAULT),%_ASM_AX ret CFI_ENDPROC END(bad_get_user) -- cgit v1.2.3 From 87e2f1e7f6ab66306320403d4502d7938d3c703e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 12:02:44 -0300 Subject: x86: use _ASM_PTR instead of explicit word-size pointers. Switch .long and .quad with _ASM_PTR in getuser*.S. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/getuser_32.S | 6 +++--- arch/x86/lib/getuser_64.S | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S index 2cc3ceee8f9..2bb0a183e06 100644 --- a/arch/x86/lib/getuser_32.S +++ b/arch/x86/lib/getuser_32.S @@ -73,7 +73,7 @@ bad_get_user: END(bad_get_user) .section __ex_table,"a" - .long 1b,bad_get_user - .long 2b,bad_get_user - .long 3b,bad_get_user + _ASM_PTR 1b,bad_get_user + _ASM_PTR 2b,bad_get_user + _ASM_PTR 3b,bad_get_user .previous diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S index 63b0e5c1e58..e33388419b7 100644 --- a/arch/x86/lib/getuser_64.S +++ b/arch/x86/lib/getuser_64.S @@ -93,8 +93,8 @@ bad_get_user: END(bad_get_user) .section __ex_table,"a" - .quad 1b,bad_get_user - .quad 2b,bad_get_user - .quad 3b,bad_get_user - .quad 4b,bad_get_user + _ASM_PTR 1b,bad_get_user + _ASM_PTR 2b,bad_get_user + _ASM_PTR 3b,bad_get_user + _ASM_PTR 4b,bad_get_user .previous -- cgit v1.2.3 From 6c2d458680d49d939ffd4b4cdc84d9e004d65910 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 12:05:11 -0300 Subject: x86: merge getuser asm functions. getuser_32.S and getuser_64.S are merged into getuser.S. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- arch/x86/lib/getuser.S | 104 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/getuser_32.S | 79 ----------------------------------- arch/x86/lib/getuser_64.S | 100 -------------------------------------------- 4 files changed, 105 insertions(+), 180 deletions(-) create mode 100644 arch/x86/lib/getuser.S delete mode 100644 arch/x86/lib/getuser_32.S delete mode 100644 arch/x86/lib/getuser_64.S (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 86960a6c41c..e92948203a5 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o lib-y := delay.o -lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o +lib-y += usercopy_$(BITS).o getuser.o putuser_$(BITS).o lib-y += memcpy_$(BITS).o ifeq ($(CONFIG_X86_32),y) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S new file mode 100644 index 00000000000..ad374003742 --- /dev/null +++ b/arch/x86/lib/getuser.S @@ -0,0 +1,104 @@ +/* + * __get_user functions. + * + * (C) Copyright 1998 Linus Torvalds + * (C) Copyright 2005 Andi Kleen + * (C) Copyright 2008 Glauber Costa + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ + +/* + * __get_user_X + * + * Inputs: %[r|e]ax contains the address. + * The register is modified, but all changes are undone + * before returning because the C code doesn't know about it. + * + * Outputs: %[r|e]ax is error code (0 or -EFAULT) + * %[r|e]dx contains zero-extended value + * + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +#include +#include +#include +#include +#include +#include +#include + + .text +ENTRY(__get_user_1) + CFI_STARTPROC + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +1: movzb (%_ASM_AX),%edx + xor %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_1) + +ENTRY(__get_user_2) + CFI_STARTPROC + add $1,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +2: movzwl -1(%_ASM_AX),%edx + xor %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_2) + +ENTRY(__get_user_4) + CFI_STARTPROC + add $3,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +3: mov -3(%_ASM_AX),%edx + xor %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_4) + +#ifdef CONFIG_X86_64 +ENTRY(__get_user_8) + CFI_STARTPROC + add $7,%_ASM_AX + jc bad_get_user + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user +4: movq -7(%_ASM_AX),%_ASM_DX + xor %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_8) +#endif + +bad_get_user: + CFI_STARTPROC + xor %edx,%edx + mov $(-EFAULT),%_ASM_AX + ret + CFI_ENDPROC +END(bad_get_user) + +.section __ex_table,"a" + _ASM_PTR 1b,bad_get_user + _ASM_PTR 2b,bad_get_user + _ASM_PTR 3b,bad_get_user +#ifdef CONFIG_X86_64 + _ASM_PTR 4b,bad_get_user +#endif diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S deleted file mode 100644 index 2bb0a183e06..00000000000 --- a/arch/x86/lib/getuser_32.S +++ /dev/null @@ -1,79 +0,0 @@ -/* - * __get_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ -#include -#include -#include -#include - - -/* - * __get_user_X - * - * Inputs: %eax contains the address - * - * Outputs: %eax is error code (0 or -EFAULT) - * %edx contains zero-extended value - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -.text -ENTRY(__get_user_1) - CFI_STARTPROC - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -1: movzb (%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_1) - -ENTRY(__get_user_2) - CFI_STARTPROC - add $1,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -2: movzwl -1(%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_2) - -ENTRY(__get_user_4) - CFI_STARTPROC - add $3,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -3: mov -3(%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_4) - -bad_get_user: - CFI_STARTPROC - xor %edx,%edx - mov $-14,%_ASM_AX - ret - CFI_ENDPROC -END(bad_get_user) - -.section __ex_table,"a" - _ASM_PTR 1b,bad_get_user - _ASM_PTR 2b,bad_get_user - _ASM_PTR 3b,bad_get_user -.previous diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S deleted file mode 100644 index e33388419b7..00000000000 --- a/arch/x86/lib/getuser_64.S +++ /dev/null @@ -1,100 +0,0 @@ -/* - * __get_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * (C) Copyright 2005 Andi Kleen - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ - -/* - * __get_user_X - * - * Inputs: %rax contains the address. - * The register is modified, but all changes are undone - * before returning because the C code doesn't know about it. - * - * Outputs: %rax is error code (0 or -EFAULT) - * %rdx contains zero-extended value - * - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -#include -#include -#include -#include -#include -#include -#include - - .text -ENTRY(__get_user_1) - CFI_STARTPROC - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -1: movzb (%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_1) - -ENTRY(__get_user_2) - CFI_STARTPROC - add $1,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -2: movzwl -1(%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_2) - -ENTRY(__get_user_4) - CFI_STARTPROC - add $3,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -3: mov -3(%_ASM_AX),%edx - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_4) - -ENTRY(__get_user_8) - CFI_STARTPROC - add $7,%_ASM_AX - jc bad_get_user - GET_THREAD_INFO(%_ASM_DX) - cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user -4: movq -7(%_ASM_AX),%_ASM_DX - xor %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_8) - -bad_get_user: - CFI_STARTPROC - xor %edx,%edx - mov $(-EFAULT),%_ASM_AX - ret - CFI_ENDPROC -END(bad_get_user) - -.section __ex_table,"a" - _ASM_PTR 1b,bad_get_user - _ASM_PTR 2b,bad_get_user - _ASM_PTR 3b,bad_get_user - _ASM_PTR 4b,bad_get_user -.previous -- cgit v1.2.3 From 268cf048c890d10bd3a86bd87922ed8a722d502f Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 12:40:55 -0300 Subject: x86: don't save ebx in putuser_32.S. Clobber it in the inline asm macros, and let the compiler do this for us. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/putuser_32.S | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S index f58fba109d1..5b2a926f0e2 100644 --- a/arch/x86/lib/putuser_32.S +++ b/arch/x86/lib/putuser_32.S @@ -26,14 +26,8 @@ */ #define ENTER CFI_STARTPROC ; \ - pushl %ebx ; \ - CFI_ADJUST_CFA_OFFSET 4 ; \ - CFI_REL_OFFSET ebx, 0 ; \ GET_THREAD_INFO(%ebx) -#define EXIT popl %ebx ; \ - CFI_ADJUST_CFA_OFFSET -4 ; \ - CFI_RESTORE ebx ; \ - ret ; \ +#define EXIT ret ; \ CFI_ENDPROC .text @@ -81,10 +75,7 @@ ENTRY(__put_user_8) ENDPROC(__put_user_8) bad_put_user: - CFI_STARTPROC simple - CFI_DEF_CFA esp, 2*4 - CFI_OFFSET eip, -1*4 - CFI_OFFSET ebx, -2*4 + CFI_STARTPROC movl $-14,%eax EXIT END(bad_put_user) -- cgit v1.2.3 From 770546b99fb99e71a3aa4181980d42664f9c18bd Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 15:03:40 -0300 Subject: x86: clobber rbx in putuser_64.S. Instead of clobbering r8, clobber rbx, which is the i386 way. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/putuser_64.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S index 940796fa0d9..07028851064 100644 --- a/arch/x86/lib/putuser_64.S +++ b/arch/x86/lib/putuser_64.S @@ -18,7 +18,7 @@ * * Outputs: %rax is error code (0 or -EFAULT) * - * %r8 is destroyed. + * %rbx is destroyed. * * These functions should not modify any other registers, * as they get called from within inline assembly. @@ -34,8 +34,8 @@ .text ENTRY(__put_user_1) CFI_STARTPROC - GET_THREAD_INFO(%r8) - cmpq TI_addr_limit(%r8),%rcx + GET_THREAD_INFO(%rbx) + cmpq TI_addr_limit(%rbx),%rcx jae bad_put_user 1: movb %dl,(%rcx) xorl %eax,%eax @@ -45,10 +45,10 @@ ENDPROC(__put_user_1) ENTRY(__put_user_2) CFI_STARTPROC - GET_THREAD_INFO(%r8) + GET_THREAD_INFO(%rbx) addq $1,%rcx jc 20f - cmpq TI_addr_limit(%r8),%rcx + cmpq TI_addr_limit(%rbx),%rcx jae 20f decq %rcx 2: movw %dx,(%rcx) @@ -61,10 +61,10 @@ ENDPROC(__put_user_2) ENTRY(__put_user_4) CFI_STARTPROC - GET_THREAD_INFO(%r8) + GET_THREAD_INFO(%rbx) addq $3,%rcx jc 30f - cmpq TI_addr_limit(%r8),%rcx + cmpq TI_addr_limit(%rbx),%rcx jae 30f subq $3,%rcx 3: movl %edx,(%rcx) @@ -77,10 +77,10 @@ ENDPROC(__put_user_4) ENTRY(__put_user_8) CFI_STARTPROC - GET_THREAD_INFO(%r8) + GET_THREAD_INFO(%rbx) addq $7,%rcx jc 40f - cmpq TI_addr_limit(%r8),%rcx + cmpq TI_addr_limit(%rbx),%rcx jae 40f subq $7,%rcx 4: movq %rdx,(%rcx) -- cgit v1.2.3 From 0ada3164031162b4e1b7ff6b36ba8cc80ff7fe96 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 16:44:39 -0300 Subject: x86: pass argument to putuser_64 functions in ax register. This is consistent with i386 usage. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/putuser_64.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S index 07028851064..ce5fcd5d8c1 100644 --- a/arch/x86/lib/putuser_64.S +++ b/arch/x86/lib/putuser_64.S @@ -37,7 +37,7 @@ ENTRY(__put_user_1) GET_THREAD_INFO(%rbx) cmpq TI_addr_limit(%rbx),%rcx jae bad_put_user -1: movb %dl,(%rcx) +1: movb %al,(%rcx) xorl %eax,%eax ret CFI_ENDPROC @@ -51,7 +51,7 @@ ENTRY(__put_user_2) cmpq TI_addr_limit(%rbx),%rcx jae 20f decq %rcx -2: movw %dx,(%rcx) +2: movw %ax,(%rcx) xorl %eax,%eax ret 20: decq %rcx @@ -67,7 +67,7 @@ ENTRY(__put_user_4) cmpq TI_addr_limit(%rbx),%rcx jae 30f subq $3,%rcx -3: movl %edx,(%rcx) +3: movl %eax,(%rcx) xorl %eax,%eax ret 30: subq $3,%rcx @@ -83,7 +83,7 @@ ENTRY(__put_user_8) cmpq TI_addr_limit(%rbx),%rcx jae 40f subq $7,%rcx -4: movq %rdx,(%rcx) +4: movq %rax,(%rcx) xorl %eax,%eax ret 40: subq $7,%rcx -- cgit v1.2.3 From 663aa96df32af9c4141ef3179282f95c7537643a Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 16:51:59 -0300 Subject: x86: change testing logic in putuser_64.S. Instead of operating over a register we need to put back into normal state afterwards (the memory position), just sub from rbx, which is trashed anyway. We can save a few instructions. Also, this is the i386 way. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/putuser_64.S | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S index ce5fcd5d8c1..a96bd8a5298 100644 --- a/arch/x86/lib/putuser_64.S +++ b/arch/x86/lib/putuser_64.S @@ -46,48 +46,39 @@ ENDPROC(__put_user_1) ENTRY(__put_user_2) CFI_STARTPROC GET_THREAD_INFO(%rbx) - addq $1,%rcx - jc 20f - cmpq TI_addr_limit(%rbx),%rcx - jae 20f - decq %rcx + mov TI_addr_limit(%rbx),%rbx + sub $1, %rbx + cmpq %rbx ,%rcx + jae bad_put_user 2: movw %ax,(%rcx) xorl %eax,%eax ret -20: decq %rcx - jmp bad_put_user CFI_ENDPROC ENDPROC(__put_user_2) ENTRY(__put_user_4) CFI_STARTPROC GET_THREAD_INFO(%rbx) - addq $3,%rcx - jc 30f - cmpq TI_addr_limit(%rbx),%rcx - jae 30f - subq $3,%rcx + mov TI_addr_limit(%rbx),%rbx + sub $3, %rbx + cmp %rbx, %rcx + jae bad_put_user 3: movl %eax,(%rcx) xorl %eax,%eax ret -30: subq $3,%rcx - jmp bad_put_user CFI_ENDPROC ENDPROC(__put_user_4) ENTRY(__put_user_8) CFI_STARTPROC GET_THREAD_INFO(%rbx) - addq $7,%rcx - jc 40f - cmpq TI_addr_limit(%rbx),%rcx - jae 40f - subq $7,%rcx + mov TI_addr_limit(%rbx),%rbx + sub $7, %rbx + cmp %rbx, %rcx + jae bad_put_user 4: movq %rax,(%rcx) xorl %eax,%eax ret -40: subq $7,%rcx - jmp bad_put_user CFI_ENDPROC ENDPROC(__put_user_8) -- cgit v1.2.3 From 766ed4282114eab616741107745b0dd11075e496 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 16:56:30 -0300 Subject: x86: replace function headers by macros. In putuser_64.S, do it the i386 way, and replace the code in beginning and end of functions with macros, since it's always the same thing. Save lines. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/putuser_64.S | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S index a96bd8a5298..6d7513bf885 100644 --- a/arch/x86/lib/putuser_64.S +++ b/arch/x86/lib/putuser_64.S @@ -31,62 +31,58 @@ #include #include +#define ENTER CFI_STARTPROC ; \ + GET_THREAD_INFO(%rbx) +#define EXIT ret ; \ + CFI_ENDPROC + .text ENTRY(__put_user_1) - CFI_STARTPROC - GET_THREAD_INFO(%rbx) + ENTER cmpq TI_addr_limit(%rbx),%rcx jae bad_put_user 1: movb %al,(%rcx) xorl %eax,%eax - ret - CFI_ENDPROC + EXIT ENDPROC(__put_user_1) ENTRY(__put_user_2) - CFI_STARTPROC - GET_THREAD_INFO(%rbx) + ENTER mov TI_addr_limit(%rbx),%rbx sub $1, %rbx cmpq %rbx ,%rcx jae bad_put_user 2: movw %ax,(%rcx) xorl %eax,%eax - ret - CFI_ENDPROC + EXIT ENDPROC(__put_user_2) ENTRY(__put_user_4) - CFI_STARTPROC - GET_THREAD_INFO(%rbx) + ENTER mov TI_addr_limit(%rbx),%rbx sub $3, %rbx cmp %rbx, %rcx jae bad_put_user 3: movl %eax,(%rcx) xorl %eax,%eax - ret - CFI_ENDPROC + EXIT ENDPROC(__put_user_4) ENTRY(__put_user_8) - CFI_STARTPROC - GET_THREAD_INFO(%rbx) + ENTER mov TI_addr_limit(%rbx),%rbx sub $7, %rbx cmp %rbx, %rcx jae bad_put_user 4: movq %rax,(%rcx) xorl %eax,%eax - ret - CFI_ENDPROC + EXIT ENDPROC(__put_user_8) bad_put_user: CFI_STARTPROC movq $(-EFAULT),%rax - ret - CFI_ENDPROC + EXIT END(bad_put_user) .section __ex_table,"a" -- cgit v1.2.3 From efea505d83873cfc8a7cdbb8a2a11d2c67467843 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 16:59:05 -0300 Subject: x86: don't use word-size specifiers in putuser files. Remove them where unambiguous. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/putuser_32.S | 28 ++++++++++++++-------------- arch/x86/lib/putuser_64.S | 14 +++++++------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S index 5b2a926f0e2..b67a37cab1b 100644 --- a/arch/x86/lib/putuser_32.S +++ b/arch/x86/lib/putuser_32.S @@ -33,44 +33,44 @@ .text ENTRY(__put_user_1) ENTER - cmpl TI_addr_limit(%ebx),%ecx + cmp TI_addr_limit(%ebx),%ecx jae bad_put_user 1: movb %al,(%ecx) - xorl %eax,%eax + xor %eax,%eax EXIT ENDPROC(__put_user_1) ENTRY(__put_user_2) ENTER - movl TI_addr_limit(%ebx),%ebx - subl $1,%ebx - cmpl %ebx,%ecx + mov TI_addr_limit(%ebx),%ebx + sub $1,%ebx + cmp %ebx,%ecx jae bad_put_user 2: movw %ax,(%ecx) - xorl %eax,%eax + xor %eax,%eax EXIT ENDPROC(__put_user_2) ENTRY(__put_user_4) ENTER - movl TI_addr_limit(%ebx),%ebx - subl $3,%ebx - cmpl %ebx,%ecx + mov TI_addr_limit(%ebx),%ebx + sub $3,%ebx + cmp %ebx,%ecx jae bad_put_user 3: movl %eax,(%ecx) - xorl %eax,%eax + xor %eax,%eax EXIT ENDPROC(__put_user_4) ENTRY(__put_user_8) ENTER - movl TI_addr_limit(%ebx),%ebx - subl $7,%ebx - cmpl %ebx,%ecx + mov TI_addr_limit(%ebx),%ebx + sub $7,%ebx + cmp %ebx,%ecx jae bad_put_user 4: movl %eax,(%ecx) 5: movl %edx,4(%ecx) - xorl %eax,%eax + xor %eax,%eax EXIT ENDPROC(__put_user_8) diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S index 6d7513bf885..c18fc0f5256 100644 --- a/arch/x86/lib/putuser_64.S +++ b/arch/x86/lib/putuser_64.S @@ -39,10 +39,10 @@ .text ENTRY(__put_user_1) ENTER - cmpq TI_addr_limit(%rbx),%rcx + cmp TI_addr_limit(%rbx),%rcx jae bad_put_user 1: movb %al,(%rcx) - xorl %eax,%eax + xor %eax,%eax EXIT ENDPROC(__put_user_1) @@ -50,10 +50,10 @@ ENTRY(__put_user_2) ENTER mov TI_addr_limit(%rbx),%rbx sub $1, %rbx - cmpq %rbx ,%rcx + cmp %rbx ,%rcx jae bad_put_user 2: movw %ax,(%rcx) - xorl %eax,%eax + xor %eax,%eax EXIT ENDPROC(__put_user_2) @@ -64,7 +64,7 @@ ENTRY(__put_user_4) cmp %rbx, %rcx jae bad_put_user 3: movl %eax,(%rcx) - xorl %eax,%eax + xor %eax,%eax EXIT ENDPROC(__put_user_4) @@ -75,13 +75,13 @@ ENTRY(__put_user_8) cmp %rbx, %rcx jae bad_put_user 4: movq %rax,(%rcx) - xorl %eax,%eax + xor %eax,%eax EXIT ENDPROC(__put_user_8) bad_put_user: CFI_STARTPROC - movq $(-EFAULT),%rax + mov $(-EFAULT),%rax EXIT END(bad_put_user) -- cgit v1.2.3 From 2528de431ddb200653d1dc6ca90074bad9520f09 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 17:36:31 -0300 Subject: x86: use macros from asm.h. In putuser_32.S and putuser_64.S, replace things like .quad, .long, and explicit references to [r|e]ax for the apropriate macros in asm/asm.h. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/putuser_32.S | 43 ++++++++++++++++++++++--------------------- arch/x86/lib/putuser_64.S | 41 +++++++++++++++++++++-------------------- 2 files changed, 43 insertions(+), 41 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S index b67a37cab1b..e7eda34feb3 100644 --- a/arch/x86/lib/putuser_32.S +++ b/arch/x86/lib/putuser_32.S @@ -11,6 +11,7 @@ #include #include #include +#include /* @@ -26,50 +27,50 @@ */ #define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%ebx) + GET_THREAD_INFO(%_ASM_BX) #define EXIT ret ; \ CFI_ENDPROC .text ENTRY(__put_user_1) ENTER - cmp TI_addr_limit(%ebx),%ecx + cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user -1: movb %al,(%ecx) +1: movb %al,(%_ASM_CX) xor %eax,%eax EXIT ENDPROC(__put_user_1) ENTRY(__put_user_2) ENTER - mov TI_addr_limit(%ebx),%ebx - sub $1,%ebx - cmp %ebx,%ecx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $1,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX jae bad_put_user -2: movw %ax,(%ecx) +2: movw %ax,(%_ASM_CX) xor %eax,%eax EXIT ENDPROC(__put_user_2) ENTRY(__put_user_4) ENTER - mov TI_addr_limit(%ebx),%ebx - sub $3,%ebx - cmp %ebx,%ecx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $3,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX jae bad_put_user -3: movl %eax,(%ecx) +3: movl %eax,(%_ASM_CX) xor %eax,%eax EXIT ENDPROC(__put_user_4) ENTRY(__put_user_8) ENTER - mov TI_addr_limit(%ebx),%ebx - sub $7,%ebx - cmp %ebx,%ecx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $7,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX jae bad_put_user -4: movl %eax,(%ecx) -5: movl %edx,4(%ecx) +4: movl %_ASM_AX,(%_ASM_CX) +5: movl %edx,4(%_ASM_CX) xor %eax,%eax EXIT ENDPROC(__put_user_8) @@ -81,9 +82,9 @@ bad_put_user: END(bad_put_user) .section __ex_table,"a" - .long 1b,bad_put_user - .long 2b,bad_put_user - .long 3b,bad_put_user - .long 4b,bad_put_user - .long 5b,bad_put_user + _ASM_PTR 1b,bad_put_user + _ASM_PTR 2b,bad_put_user + _ASM_PTR 3b,bad_put_user + _ASM_PTR 4b,bad_put_user + _ASM_PTR 5b,bad_put_user .previous diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S index c18fc0f5256..d496cc8e730 100644 --- a/arch/x86/lib/putuser_64.S +++ b/arch/x86/lib/putuser_64.S @@ -30,64 +30,65 @@ #include #include #include +#include #define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%rbx) + GET_THREAD_INFO(%_ASM_BX) #define EXIT ret ; \ CFI_ENDPROC .text ENTRY(__put_user_1) ENTER - cmp TI_addr_limit(%rbx),%rcx + cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user -1: movb %al,(%rcx) +1: movb %al,(%_ASM_CX) xor %eax,%eax EXIT ENDPROC(__put_user_1) ENTRY(__put_user_2) ENTER - mov TI_addr_limit(%rbx),%rbx - sub $1, %rbx - cmp %rbx ,%rcx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $1, %_ASM_BX + cmp %_ASM_BX ,%_ASM_CX jae bad_put_user -2: movw %ax,(%rcx) +2: movw %ax,(%_ASM_CX) xor %eax,%eax EXIT ENDPROC(__put_user_2) ENTRY(__put_user_4) ENTER - mov TI_addr_limit(%rbx),%rbx - sub $3, %rbx - cmp %rbx, %rcx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $3, %_ASM_BX + cmp %_ASM_BX, %_ASM_CX jae bad_put_user -3: movl %eax,(%rcx) +3: movl %eax,(%_ASM_CX) xor %eax,%eax EXIT ENDPROC(__put_user_4) ENTRY(__put_user_8) ENTER - mov TI_addr_limit(%rbx),%rbx - sub $7, %rbx - cmp %rbx, %rcx + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $7, %_ASM_BX + cmp %_ASM_BX, %_ASM_CX jae bad_put_user -4: movq %rax,(%rcx) +4: movq %_ASM_AX,(%_ASM_CX) xor %eax,%eax EXIT ENDPROC(__put_user_8) bad_put_user: CFI_STARTPROC - mov $(-EFAULT),%rax + mov $(-EFAULT),%eax EXIT END(bad_put_user) .section __ex_table,"a" - .quad 1b,bad_put_user - .quad 2b,bad_put_user - .quad 3b,bad_put_user - .quad 4b,bad_put_user + _ASM_PTR 1b,bad_put_user + _ASM_PTR 2b,bad_put_user + _ASM_PTR 3b,bad_put_user + _ASM_PTR 4b,bad_put_user .previous -- cgit v1.2.3 From 5cbbc3b1eb37bdc72eefd2de03b39f5e784400c2 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 24 Jun 2008 17:40:14 -0300 Subject: x86: merge putuser asm functions. putuser_32.S and putuser_64.S are merged into putuser.S. Signed-off-by: Glauber Costa Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/lib/Makefile | 2 +- arch/x86/lib/putuser.S | 97 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/putuser_32.S | 90 ------------------------------------------- arch/x86/lib/putuser_64.S | 94 --------------------------------------------- 4 files changed, 98 insertions(+), 185 deletions(-) create mode 100644 arch/x86/lib/putuser.S delete mode 100644 arch/x86/lib/putuser_32.S delete mode 100644 arch/x86/lib/putuser_64.S (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e92948203a5..83226e0a7ce 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_SMP) := msr-on-cpu.o lib-y := delay.o -lib-y += usercopy_$(BITS).o getuser.o putuser_$(BITS).o +lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o ifeq ($(CONFIG_X86_32),y) diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S new file mode 100644 index 00000000000..36b0d15ae6e --- /dev/null +++ b/arch/x86/lib/putuser.S @@ -0,0 +1,97 @@ +/* + * __put_user functions. + * + * (C) Copyright 2005 Linus Torvalds + * (C) Copyright 2005 Andi Kleen + * (C) Copyright 2008 Glauber Costa + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ +#include +#include +#include +#include +#include + + +/* + * __put_user_X + * + * Inputs: %eax[:%edx] contains the data + * %ecx contains the address + * + * Outputs: %eax is error code (0 or -EFAULT) + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +#define ENTER CFI_STARTPROC ; \ + GET_THREAD_INFO(%_ASM_BX) +#define EXIT ret ; \ + CFI_ENDPROC + +.text +ENTRY(__put_user_1) + ENTER + cmp TI_addr_limit(%_ASM_BX),%_ASM_CX + jae bad_put_user +1: movb %al,(%_ASM_CX) + xor %eax,%eax + EXIT +ENDPROC(__put_user_1) + +ENTRY(__put_user_2) + ENTER + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $1,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user +2: movw %ax,(%_ASM_CX) + xor %eax,%eax + EXIT +ENDPROC(__put_user_2) + +ENTRY(__put_user_4) + ENTER + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $3,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user +3: movl %eax,(%_ASM_CX) + xor %eax,%eax + EXIT +ENDPROC(__put_user_4) + +ENTRY(__put_user_8) + ENTER + mov TI_addr_limit(%_ASM_BX),%_ASM_BX + sub $7,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jae bad_put_user +4: mov %_ASM_AX,(%_ASM_CX) +#ifdef CONFIG_X86_32 +5: movl %edx,4(%_ASM_CX) +#endif + xor %eax,%eax + EXIT +ENDPROC(__put_user_8) + +bad_put_user: + CFI_STARTPROC + movl $-EFAULT,%eax + EXIT +END(bad_put_user) + +.section __ex_table,"a" + _ASM_PTR 1b,bad_put_user + _ASM_PTR 2b,bad_put_user + _ASM_PTR 3b,bad_put_user + _ASM_PTR 4b,bad_put_user +#ifdef CONFIG_X86_32 + _ASM_PTR 5b,bad_put_user +#endif +.previous diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S deleted file mode 100644 index e7eda34feb3..00000000000 --- a/arch/x86/lib/putuser_32.S +++ /dev/null @@ -1,90 +0,0 @@ -/* - * __put_user functions. - * - * (C) Copyright 2005 Linus Torvalds - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ -#include -#include -#include -#include - - -/* - * __put_user_X - * - * Inputs: %eax[:%edx] contains the data - * %ecx contains the address - * - * Outputs: %eax is error code (0 or -EFAULT) - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -#define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ - CFI_ENDPROC - -.text -ENTRY(__put_user_1) - ENTER - cmp TI_addr_limit(%_ASM_BX),%_ASM_CX - jae bad_put_user -1: movb %al,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_1) - -ENTRY(__put_user_2) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $1,%_ASM_BX - cmp %_ASM_BX,%_ASM_CX - jae bad_put_user -2: movw %ax,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_2) - -ENTRY(__put_user_4) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $3,%_ASM_BX - cmp %_ASM_BX,%_ASM_CX - jae bad_put_user -3: movl %eax,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_4) - -ENTRY(__put_user_8) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $7,%_ASM_BX - cmp %_ASM_BX,%_ASM_CX - jae bad_put_user -4: movl %_ASM_AX,(%_ASM_CX) -5: movl %edx,4(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_8) - -bad_put_user: - CFI_STARTPROC - movl $-14,%eax - EXIT -END(bad_put_user) - -.section __ex_table,"a" - _ASM_PTR 1b,bad_put_user - _ASM_PTR 2b,bad_put_user - _ASM_PTR 3b,bad_put_user - _ASM_PTR 4b,bad_put_user - _ASM_PTR 5b,bad_put_user -.previous diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S deleted file mode 100644 index d496cc8e730..00000000000 --- a/arch/x86/lib/putuser_64.S +++ /dev/null @@ -1,94 +0,0 @@ -/* - * __put_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * (C) Copyright 2005 Andi Kleen - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ - -/* - * __put_user_X - * - * Inputs: %rcx contains the address - * %rdx contains new value - * - * Outputs: %rax is error code (0 or -EFAULT) - * - * %rbx is destroyed. - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ - CFI_ENDPROC - - .text -ENTRY(__put_user_1) - ENTER - cmp TI_addr_limit(%_ASM_BX),%_ASM_CX - jae bad_put_user -1: movb %al,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_1) - -ENTRY(__put_user_2) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $1, %_ASM_BX - cmp %_ASM_BX ,%_ASM_CX - jae bad_put_user -2: movw %ax,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_2) - -ENTRY(__put_user_4) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $3, %_ASM_BX - cmp %_ASM_BX, %_ASM_CX - jae bad_put_user -3: movl %eax,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_4) - -ENTRY(__put_user_8) - ENTER - mov TI_addr_limit(%_ASM_BX),%_ASM_BX - sub $7, %_ASM_BX - cmp %_ASM_BX, %_ASM_CX - jae bad_put_user -4: movq %_ASM_AX,(%_ASM_CX) - xor %eax,%eax - EXIT -ENDPROC(__put_user_8) - -bad_put_user: - CFI_STARTPROC - mov $(-EFAULT),%eax - EXIT -END(bad_put_user) - -.section __ex_table,"a" - _ASM_PTR 1b,bad_put_user - _ASM_PTR 2b,bad_put_user - _ASM_PTR 3b,bad_put_user - _ASM_PTR 4b,bad_put_user -.previous -- cgit v1.2.3 From 1129585a08baf58582c0da91e572cb29e3179acf Mon Sep 17 00:00:00 2001 From: Vitaly Mayatskikh Date: Wed, 2 Jul 2008 15:48:21 +0200 Subject: x86: introduce copy_user_handle_tail() routine Introduce generic C routine for handling necessary tail operations after protection fault in copy_*_user on x86. Signed-off-by: Vitaly Mayatskikh Acked-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy_64.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 0c89d1bb028..f4df6e7c718 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -158,3 +158,26 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le } EXPORT_SYMBOL(copy_in_user); +/* + * Try to copy last bytes and clear the rest if needed. + * Since protection fault in copy_from/to_user is not a normal situation, + * it is not necessary to optimize tail handling. + */ +unsigned long +copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) +{ + char c; + unsigned zero_len; + + for (; len; --len) { + if (__get_user_nocheck(c, from++, sizeof(char))) + break; + if (__put_user_nocheck(c, to++, sizeof(char))) + break; + } + + for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) + if (__put_user_nocheck(c, to++, sizeof(char))) + break; + return len; +} -- cgit v1.2.3 From ad2fc2cd925300b8127cf682f5a1c7511ae9dd27 Mon Sep 17 00:00:00 2001 From: Vitaly Mayatskikh Date: Wed, 2 Jul 2008 15:53:13 +0200 Subject: x86: fix copy_user on x86 Switch copy_user_generic_string(), copy_user_generic_unrolled() and __copy_user_nocache() from custom tail handlers to generic copy_user_tail_handle(). Signed-off-by: Vitaly Mayatskikh Acked-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/lib/copy_user_64.S | 427 +++++++++++++++--------------------- arch/x86/lib/copy_user_nocache_64.S | 283 +++++++++--------------- 2 files changed, 275 insertions(+), 435 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 7eaaf0123b4..e5afb4ad3f1 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -1,8 +1,10 @@ -/* Copyright 2002 Andi Kleen, SuSE Labs. +/* + * Copyright 2008 Vitaly Mayatskikh + * Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU Public License v2. - * - * Functions to copy from and to user space. - */ + * + * Functions to copy from and to user space. + */ #include #include @@ -20,60 +22,88 @@ .long \orig-1f /* by default jump to orig */ 1: .section .altinstr_replacement,"ax" -2: .byte 0xe9 /* near jump with 32bit immediate */ +2: .byte 0xe9 /* near jump with 32bit immediate */ .long \alt-1b /* offset */ /* or alternatively to alt */ .previous .section .altinstructions,"a" .align 8 .quad 0b .quad 2b - .byte \feature /* when feature is set */ + .byte \feature /* when feature is set */ .byte 5 .byte 5 .previous .endm -/* Standard copy_to_user with segment limit checking */ + .macro ALIGN_DESTINATION +#ifdef FIX_ALIGNMENT + /* check for bad alignment of destination */ + movl %edi,%ecx + andl $7,%ecx + jz 102f /* already aligned */ + subl $8,%ecx + negl %ecx + subl %ecx,%edx +100: movb (%rsi),%al +101: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 100b +102: + .section .fixup,"ax" +103: addl %r8d,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous + + .section __ex_table,"a" + .align 8 + .quad 100b,103b + .quad 101b,103b + .previous +#endif + .endm + +/* Standard copy_to_user with segment limit checking */ ENTRY(copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx addq %rdx,%rcx - jc bad_to_user + jc bad_to_user cmpq TI_addr_limit(%rax),%rcx jae bad_to_user - xorl %eax,%eax /* clear zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENTRY(copy_user_generic) +/* Standard copy_from_user with segment limit checking */ +ENTRY(copy_from_user) CFI_STARTPROC - movl $1,%ecx /* set zero flag */ + GET_THREAD_INFO(%rax) + movq %rsi,%rcx + addq %rdx,%rcx + jc bad_from_user + cmpq TI_addr_limit(%rax),%rcx + jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC +ENDPROC(copy_from_user) -ENTRY(__copy_from_user_inatomic) +ENTRY(copy_user_generic) CFI_STARTPROC - xorl %ecx,%ecx /* clear zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC +ENDPROC(copy_user_generic) -/* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(__copy_from_user_inatomic) CFI_STARTPROC - GET_THREAD_INFO(%rax) - movq %rsi,%rcx - addq %rdx,%rcx - jc bad_from_user - cmpq TI_addr_limit(%rax),%rcx - jae bad_from_user - movl $1,%ecx /* set zero flag */ ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) - +ENDPROC(__copy_from_user_inatomic) + .section .fixup,"ax" /* must zero dest */ +ENTRY(bad_from_user) bad_from_user: CFI_STARTPROC movl %edx,%ecx @@ -81,271 +111,158 @@ bad_from_user: rep stosb bad_to_user: - movl %edx,%eax + movl %edx,%eax ret CFI_ENDPROC -END(bad_from_user) +ENDPROC(bad_from_user) .previous - - + /* * copy_user_generic_unrolled - memory copy with exception handling. - * This version is for CPUs like P4 that don't have efficient micro code for rep movsq - * - * Input: + * This version is for CPUs like P4 that don't have efficient micro + * code for rep movsq + * + * Input: * rdi destination * rsi source * rdx count - * ecx zero flag -- if true zero destination on error * - * Output: - * eax uncopied bytes or 0 if successful. + * Output: + * eax uncopied bytes or 0 if successfull. */ ENTRY(copy_user_generic_unrolled) CFI_STARTPROC - pushq %rbx - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rbx, 0 - pushq %rcx - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rcx, 0 - xorl %eax,%eax /*zero for the exception handler */ - -#ifdef FIX_ALIGNMENT - /* check for bad alignment of destination */ - movl %edi,%ecx - andl $7,%ecx - jnz .Lbad_alignment -.Lafter_bad_alignment: -#endif - - movq %rdx,%rcx - - movl $64,%ebx - shrq $6,%rdx - decq %rdx - js .Lhandle_tail - - .p2align 4 -.Lloop: -.Ls1: movq (%rsi),%r11 -.Ls2: movq 1*8(%rsi),%r8 -.Ls3: movq 2*8(%rsi),%r9 -.Ls4: movq 3*8(%rsi),%r10 -.Ld1: movq %r11,(%rdi) -.Ld2: movq %r8,1*8(%rdi) -.Ld3: movq %r9,2*8(%rdi) -.Ld4: movq %r10,3*8(%rdi) - -.Ls5: movq 4*8(%rsi),%r11 -.Ls6: movq 5*8(%rsi),%r8 -.Ls7: movq 6*8(%rsi),%r9 -.Ls8: movq 7*8(%rsi),%r10 -.Ld5: movq %r11,4*8(%rdi) -.Ld6: movq %r8,5*8(%rdi) -.Ld7: movq %r9,6*8(%rdi) -.Ld8: movq %r10,7*8(%rdi) - - decq %rdx - + cmpl $8,%edx + jb 20f /* less then 8 bytes, go to byte copy loop */ + ALIGN_DESTINATION + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx + jz 17f +1: movq (%rsi),%r8 +2: movq 1*8(%rsi),%r9 +3: movq 2*8(%rsi),%r10 +4: movq 3*8(%rsi),%r11 +5: movq %r8,(%rdi) +6: movq %r9,1*8(%rdi) +7: movq %r10,2*8(%rdi) +8: movq %r11,3*8(%rdi) +9: movq 4*8(%rsi),%r8 +10: movq 5*8(%rsi),%r9 +11: movq 6*8(%rsi),%r10 +12: movq 7*8(%rsi),%r11 +13: movq %r8,4*8(%rdi) +14: movq %r9,5*8(%rdi) +15: movq %r10,6*8(%rdi) +16: movq %r11,7*8(%rdi) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi - - jns .Lloop - - .p2align 4 -.Lhandle_tail: - movl %ecx,%edx - andl $63,%ecx - shrl $3,%ecx - jz .Lhandle_7 - movl $8,%ebx - .p2align 4 -.Lloop_8: -.Ls9: movq (%rsi),%r8 -.Ld9: movq %r8,(%rdi) decl %ecx - leaq 8(%rdi),%rdi + jnz 1b +17: movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +18: movq (%rsi),%r8 +19: movq %r8,(%rdi) leaq 8(%rsi),%rsi - jnz .Lloop_8 - -.Lhandle_7: + leaq 8(%rdi),%rdi + decl %ecx + jnz 18b +20: andl %edx,%edx + jz 23f movl %edx,%ecx - andl $7,%ecx - jz .Lende - .p2align 4 -.Lloop_1: -.Ls10: movb (%rsi),%bl -.Ld10: movb %bl,(%rdi) - incq %rdi +21: movb (%rsi),%al +22: movb %al,(%rdi) incq %rsi + incq %rdi decl %ecx - jnz .Lloop_1 - - CFI_REMEMBER_STATE -.Lende: - popq %rcx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE rcx - popq %rbx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE rbx + jnz 21b +23: xor %eax,%eax ret - CFI_RESTORE_STATE -#ifdef FIX_ALIGNMENT - /* align destination */ - .p2align 4 -.Lbad_alignment: - movl $8,%r9d - subl %ecx,%r9d - movl %r9d,%ecx - cmpq %r9,%rdx - jz .Lhandle_7 - js .Lhandle_7 -.Lalign_1: -.Ls11: movb (%rsi),%bl -.Ld11: movb %bl,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .Lalign_1 - subq %r9,%rdx - jmp .Lafter_bad_alignment -#endif + .section .fixup,"ax" +30: shll $6,%ecx + addl %ecx,%edx + jmp 60f +40: leal (%edx,%ecx,8),%edx + jmp 60f +50: movl %ecx,%edx +60: jmp copy_user_handle_tail /* ecx is zerorest also */ + .previous - /* table sorted by exception address */ .section __ex_table,"a" .align 8 - .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */ - .quad .Ls2,.Ls1e - .quad .Ls3,.Ls1e - .quad .Ls4,.Ls1e - .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */ - .quad .Ld2,.Ls2e - .quad .Ld3,.Ls3e - .quad .Ld4,.Ls4e - .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */ - .quad .Ls6,.Ls5e - .quad .Ls7,.Ls5e - .quad .Ls8,.Ls5e - .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */ - .quad .Ld6,.Ls6e - .quad .Ld7,.Ls7e - .quad .Ld8,.Ls8e - .quad .Ls9,.Le_quad - .quad .Ld9,.Le_quad - .quad .Ls10,.Le_byte - .quad .Ld10,.Le_byte -#ifdef FIX_ALIGNMENT - .quad .Ls11,.Lzero_rest - .quad .Ld11,.Lzero_rest -#endif - .quad .Le5,.Le_zero + .quad 1b,30b + .quad 2b,30b + .quad 3b,30b + .quad 4b,30b + .quad 5b,30b + .quad 6b,30b + .quad 7b,30b + .quad 8b,30b + .quad 9b,30b + .quad 10b,30b + .quad 11b,30b + .quad 12b,30b + .quad 13b,30b + .quad 14b,30b + .quad 15b,30b + .quad 16b,30b + .quad 18b,40b + .quad 19b,40b + .quad 21b,50b + .quad 22b,50b .previous - - /* eax: zero, ebx: 64 */ -.Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */ -.Ls2e: addl $8,%eax -.Ls3e: addl $8,%eax -.Ls4e: addl $8,%eax -.Ls5e: addl $8,%eax -.Ls6e: addl $8,%eax -.Ls7e: addl $8,%eax -.Ls8e: addl $8,%eax - addq %rbx,%rdi /* +64 */ - subq %rax,%rdi /* correct destination with computed offset */ - - shlq $6,%rdx /* loop counter * 64 (stride length) */ - addq %rax,%rdx /* add offset to loopcnt */ - andl $63,%ecx /* remaining bytes */ - addq %rcx,%rdx /* add them */ - jmp .Lzero_rest - - /* exception on quad word loop in tail handling */ - /* ecx: loopcnt/8, %edx: length, rdi: correct */ -.Le_quad: - shll $3,%ecx - andl $7,%edx - addl %ecx,%edx - /* edx: bytes to zero, rdi: dest, eax:zero */ -.Lzero_rest: - cmpl $0,(%rsp) - jz .Le_zero - movq %rdx,%rcx -.Le_byte: - xorl %eax,%eax -.Le5: rep - stosb - /* when there is another exception while zeroing the rest just return */ -.Le_zero: - movq %rdx,%rax - jmp .Lende CFI_ENDPROC -ENDPROC(copy_user_generic) +ENDPROC(copy_user_generic_unrolled) - - /* Some CPUs run faster using the string copy instructions. - This is also a lot simpler. Use them when possible. - Patch in jmps to this code instead of copying it fully - to avoid unwanted aliasing in the exception tables. */ - - /* rdi destination - * rsi source - * rdx count - * ecx zero flag - * - * Output: - * eax uncopied bytes or 0 if successfull. - * - * Only 4GB of copy is supported. This shouldn't be a problem - * because the kernel normally only writes from/to page sized chunks - * even if user space passed a longer buffer. - * And more would be dangerous because both Intel and AMD have - * errata with rep movsq > 4GB. If someone feels the need to fix - * this please consider this. - */ +/* Some CPUs run faster using the string copy instructions. + * This is also a lot simpler. Use them when possible. + * + * Only 4GB of copy is supported. This shouldn't be a problem + * because the kernel normally only writes from/to page sized chunks + * even if user space passed a longer buffer. + * And more would be dangerous because both Intel and AMD have + * errata with rep movsq > 4GB. If someone feels the need to fix + * this please consider this. + * + * Input: + * rdi destination + * rsi source + * rdx count + * + * Output: + * eax uncopied bytes or 0 if successful. + */ ENTRY(copy_user_generic_string) CFI_STARTPROC - movl %ecx,%r8d /* save zero flag */ + andl %edx,%edx + jz 4f + cmpl $8,%edx + jb 2f /* less than 8 bytes, go to byte copy loop */ + ALIGN_DESTINATION movl %edx,%ecx shrl $3,%ecx - andl $7,%edx - jz 10f -1: rep - movsq - movl %edx,%ecx -2: rep - movsb -9: movl %ecx,%eax - ret - - /* multiple of 8 byte */ -10: rep + andl $7,%edx +1: rep movsq - xor %eax,%eax +2: movl %edx,%ecx +3: rep + movsb +4: xorl %eax,%eax ret - /* exception handling */ -3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ - jmp 6f -5: movl %ecx,%eax /* exception on byte loop */ - /* eax: left over bytes */ -6: testl %r8d,%r8d /* zero flag set? */ - jz 7f - movl %eax,%ecx /* initialize x86 loop counter */ - push %rax - xorl %eax,%eax -8: rep - stosb /* zero the rest */ -11: pop %rax -7: ret - CFI_ENDPROC -END(copy_user_generic_c) + .section .fixup,"ax" +11: leal (%edx,%ecx,8),%ecx +12: movl %ecx,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous .section __ex_table,"a" - .quad 1b,3b - .quad 2b,5b - .quad 8b,11b - .quad 10b,3b + .align 8 + .quad 1b,11b + .quad 3b,12b .previous + CFI_ENDPROC +ENDPROC(copy_user_generic_string) diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 9d3d1ab8376..93353d6a526 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -1,4 +1,6 @@ -/* Copyright 2002 Andi Kleen, SuSE Labs. +/* + * Copyright 2008 Vitaly Mayatskikh + * Copyright 2002 Andi Kleen, SuSE Labs. * Subject to the GNU Public License v2. * * Functions to copy from and to user space. @@ -12,204 +14,125 @@ #include #include #include -#include - -/* - * copy_user_nocache - Uncached memory copy with exception handling - * This will force destination/source out of cache for more performance. - * - * Input: - * rdi destination - * rsi source - * rdx count - * rcx zero flag when 1 zero on exception - * - * Output: - * eax uncopied bytes or 0 if successful. - */ -ENTRY(__copy_user_nocache) - CFI_STARTPROC - pushq %rbx - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rbx, 0 - pushq %rcx /* save zero flag */ - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rcx, 0 - - xorl %eax,%eax /* zero for the exception handler */ + .macro ALIGN_DESTINATION #ifdef FIX_ALIGNMENT /* check for bad alignment of destination */ movl %edi,%ecx andl $7,%ecx - jnz .Lbad_alignment -.Lafter_bad_alignment: -#endif - - movq %rdx,%rcx - - movl $64,%ebx - shrq $6,%rdx - decq %rdx - js .Lhandle_tail - - .p2align 4 -.Lloop: -.Ls1: movq (%rsi),%r11 -.Ls2: movq 1*8(%rsi),%r8 -.Ls3: movq 2*8(%rsi),%r9 -.Ls4: movq 3*8(%rsi),%r10 -.Ld1: movnti %r11,(%rdi) -.Ld2: movnti %r8,1*8(%rdi) -.Ld3: movnti %r9,2*8(%rdi) -.Ld4: movnti %r10,3*8(%rdi) - -.Ls5: movq 4*8(%rsi),%r11 -.Ls6: movq 5*8(%rsi),%r8 -.Ls7: movq 6*8(%rsi),%r9 -.Ls8: movq 7*8(%rsi),%r10 -.Ld5: movnti %r11,4*8(%rdi) -.Ld6: movnti %r8,5*8(%rdi) -.Ld7: movnti %r9,6*8(%rdi) -.Ld8: movnti %r10,7*8(%rdi) + jz 102f /* already aligned */ + subl $8,%ecx + negl %ecx + subl %ecx,%edx +100: movb (%rsi),%al +101: movb %al,(%rdi) + incq %rsi + incq %rdi + decl %ecx + jnz 100b +102: + .section .fixup,"ax" +103: addl %r8d,%edx /* ecx is zerorest also */ + jmp copy_user_handle_tail + .previous - dec %rdx + .section __ex_table,"a" + .align 8 + .quad 100b,103b + .quad 101b,103b + .previous +#endif + .endm +/* + * copy_user_nocache - Uncached memory copy with exception handling + * This will force destination/source out of cache for more performance. + */ +ENTRY(__copy_user_nocache) + CFI_STARTPROC + cmpl $8,%edx + jb 20f /* less then 8 bytes, go to byte copy loop */ + ALIGN_DESTINATION + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx + jz 17f +1: movq (%rsi),%r8 +2: movq 1*8(%rsi),%r9 +3: movq 2*8(%rsi),%r10 +4: movq 3*8(%rsi),%r11 +5: movnti %r8,(%rdi) +6: movnti %r9,1*8(%rdi) +7: movnti %r10,2*8(%rdi) +8: movnti %r11,3*8(%rdi) +9: movq 4*8(%rsi),%r8 +10: movq 5*8(%rsi),%r9 +11: movq 6*8(%rsi),%r10 +12: movq 7*8(%rsi),%r11 +13: movnti %r8,4*8(%rdi) +14: movnti %r9,5*8(%rdi) +15: movnti %r10,6*8(%rdi) +16: movnti %r11,7*8(%rdi) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi - - jns .Lloop - - .p2align 4 -.Lhandle_tail: - movl %ecx,%edx - andl $63,%ecx - shrl $3,%ecx - jz .Lhandle_7 - movl $8,%ebx - .p2align 4 -.Lloop_8: -.Ls9: movq (%rsi),%r8 -.Ld9: movnti %r8,(%rdi) decl %ecx - leaq 8(%rdi),%rdi + jnz 1b +17: movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +18: movq (%rsi),%r8 +19: movnti %r8,(%rdi) leaq 8(%rsi),%rsi - jnz .Lloop_8 - -.Lhandle_7: + leaq 8(%rdi),%rdi + decl %ecx + jnz 18b +20: andl %edx,%edx + jz 23f movl %edx,%ecx - andl $7,%ecx - jz .Lende - .p2align 4 -.Lloop_1: -.Ls10: movb (%rsi),%bl -.Ld10: movb %bl,(%rdi) - incq %rdi +21: movb (%rsi),%al +22: movb %al,(%rdi) incq %rsi + incq %rdi decl %ecx - jnz .Lloop_1 - - CFI_REMEMBER_STATE -.Lende: - popq %rcx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE %rcx - popq %rbx - CFI_ADJUST_CFA_OFFSET -8 - CFI_RESTORE rbx + jnz 21b +23: xorl %eax,%eax sfence ret - CFI_RESTORE_STATE -#ifdef FIX_ALIGNMENT - /* align destination */ - .p2align 4 -.Lbad_alignment: - movl $8,%r9d - subl %ecx,%r9d - movl %r9d,%ecx - cmpq %r9,%rdx - jz .Lhandle_7 - js .Lhandle_7 -.Lalign_1: -.Ls11: movb (%rsi),%bl -.Ld11: movb %bl,(%rdi) - incq %rsi - incq %rdi - decl %ecx - jnz .Lalign_1 - subq %r9,%rdx - jmp .Lafter_bad_alignment -#endif + .section .fixup,"ax" +30: shll $6,%ecx + addl %ecx,%edx + jmp 60f +40: leal (%edx,%ecx,8),%edx + jmp 60f +50: movl %ecx,%edx +60: sfence + movl %r8d,%ecx + jmp copy_user_handle_tail + .previous - /* table sorted by exception address */ .section __ex_table,"a" - .align 8 - .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */ - .quad .Ls2,.Ls1e - .quad .Ls3,.Ls1e - .quad .Ls4,.Ls1e - .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */ - .quad .Ld2,.Ls2e - .quad .Ld3,.Ls3e - .quad .Ld4,.Ls4e - .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */ - .quad .Ls6,.Ls5e - .quad .Ls7,.Ls5e - .quad .Ls8,.Ls5e - .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */ - .quad .Ld6,.Ls6e - .quad .Ld7,.Ls7e - .quad .Ld8,.Ls8e - .quad .Ls9,.Le_quad - .quad .Ld9,.Le_quad - .quad .Ls10,.Le_byte - .quad .Ld10,.Le_byte -#ifdef FIX_ALIGNMENT - .quad .Ls11,.Lzero_rest - .quad .Ld11,.Lzero_rest -#endif - .quad .Le5,.Le_zero + .quad 1b,30b + .quad 2b,30b + .quad 3b,30b + .quad 4b,30b + .quad 5b,30b + .quad 6b,30b + .quad 7b,30b + .quad 8b,30b + .quad 9b,30b + .quad 10b,30b + .quad 11b,30b + .quad 12b,30b + .quad 13b,30b + .quad 14b,30b + .quad 15b,30b + .quad 16b,30b + .quad 18b,40b + .quad 19b,40b + .quad 21b,50b + .quad 22b,50b .previous - - /* eax: zero, ebx: 64 */ -.Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */ -.Ls2e: addl $8,%eax -.Ls3e: addl $8,%eax -.Ls4e: addl $8,%eax -.Ls5e: addl $8,%eax -.Ls6e: addl $8,%eax -.Ls7e: addl $8,%eax -.Ls8e: addl $8,%eax - addq %rbx,%rdi /* +64 */ - subq %rax,%rdi /* correct destination with computed offset */ - - shlq $6,%rdx /* loop counter * 64 (stride length) */ - addq %rax,%rdx /* add offset to loopcnt */ - andl $63,%ecx /* remaining bytes */ - addq %rcx,%rdx /* add them */ - jmp .Lzero_rest - - /* exception on quad word loop in tail handling */ - /* ecx: loopcnt/8, %edx: length, rdi: correct */ -.Le_quad: - shll $3,%ecx - andl $7,%edx - addl %ecx,%edx - /* edx: bytes to zero, rdi: dest, eax:zero */ -.Lzero_rest: - cmpl $0,(%rsp) /* zero flag set? */ - jz .Le_zero - movq %rdx,%rcx -.Le_byte: - xorl %eax,%eax -.Le5: rep - stosb - /* when there is another exception while zeroing the rest just return */ -.Le_zero: - movq %rdx,%rax - jmp .Lende CFI_ENDPROC ENDPROC(__copy_user_nocache) - - -- cgit v1.2.3 From 27cb0a75ba252ea7294d67232c4bbbac3f2b2656 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 10 Jul 2008 12:52:52 -0700 Subject: x86: fix compile error in current tip.git Gas 2.15 complains about 32-bit registers being used in lea. AS arch/x86/lib/copy_user_64.o /local/scratch-2/jeremy/hg/xen/paravirt/linux/arch/x86/lib/copy_user_64.S: Assembler messages: /local/scratch-2/jeremy/hg/xen/paravirt/linux/arch/x86/lib/copy_user_64.S:188: Error: `(%edx,%ecx,8)' is not a valid 64 bit base/index expression /local/scratch-2/jeremy/hg/xen/paravirt/linux/arch/x86/lib/copy_user_64.S:257: Error: `(%edx,%ecx,8)' is not a valid 64 bit base/index expression AS arch/x86/lib/copy_user_nocache_64.o /local/scratch-2/jeremy/hg/xen/paravirt/linux/arch/x86/lib/copy_user_nocache_64.S: Assembler messages: /local/scratch-2/jeremy/hg/xen/paravirt/linux/arch/x86/lib/copy_user_nocache_64.S:107: Error: `(%edx,%ecx,8)' is not a valid 64 bit base/index expression Signed-off-by: Jeremy Fitzhardinge Cc: Vitaly Mayatskikh Signed-off-by: Ingo Molnar --- arch/x86/lib/copy_user_64.S | 4 ++-- arch/x86/lib/copy_user_nocache_64.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index e5afb4ad3f1..dfdf428975c 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -185,7 +185,7 @@ ENTRY(copy_user_generic_unrolled) 30: shll $6,%ecx addl %ecx,%edx jmp 60f -40: leal (%edx,%ecx,8),%edx +40: lea (%rdx,%rcx,8),%rdx jmp 60f 50: movl %ecx,%edx 60: jmp copy_user_handle_tail /* ecx is zerorest also */ @@ -254,7 +254,7 @@ ENTRY(copy_user_generic_string) ret .section .fixup,"ax" -11: leal (%edx,%ecx,8),%ecx +11: lea (%rdx,%rcx,8),%rcx 12: movl %ecx,%edx /* ecx is zerorest also */ jmp copy_user_handle_tail .previous diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 93353d6a526..40e0e309d27 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -104,7 +104,7 @@ ENTRY(__copy_user_nocache) 30: shll $6,%ecx addl %ecx,%edx jmp 60f -40: leal (%edx,%ecx,8),%edx +40: lea (%rdx,%rcx,8),%rdx jmp 60f 50: movl %ecx,%edx 60: sfence -- cgit v1.2.3