diff options
Diffstat (limited to 'arch/x86/xen/xen-asm_32.S')
-rw-r--r-- | arch/x86/xen/xen-asm_32.S | 343 |
1 files changed, 133 insertions, 210 deletions
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 42786f59d9c..88e15deb8b8 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -1,117 +1,43 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ -#include <linux/linkage.h> - -#include <asm/asm-offsets.h> #include <asm/thread_info.h> -#include <asm/percpu.h> #include <asm/processor-flags.h> #include <asm/segment.h> #include <xen/interface/xen.h> -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) +#include "xen-asm.h" /* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). + * Force an event check by making a hypercall, but preserve regs + * before making the call. */ -ENTRY(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) +check_events: + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) /* - We can't use sysexit directly, because we're not running in ring0. - But we can easily fake it up using iret. Assuming xen_sysexit - is jumped to with a standard stack frame, we can just strip it - back to a standard iret frame and use iret. + * We can't use sysexit directly, because we're not running in ring0. + * But we can easily fake it up using iret. Assuming xen_sysexit is + * jumped to with a standard stack frame, we can just strip it back to + * a standard iret frame and use iret. */ ENTRY(xen_sysexit) movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ @@ -122,33 +48,31 @@ ENTRY(xen_sysexit) ENDPROC(xen_sysexit) /* - This is run where a normal iret would be run, with the same stack setup: - 8: eflags - 4: cs - esp-> 0: eip - - This attempts to make sure that any pending events are dealt - with on return to usermode, but there is a small window in - which an event can happen just before entering usermode. If - the nested interrupt ends up setting one of the TIF_WORK_MASK - pending work flags, they will not be tested again before - returning to usermode. This means that a process can end up - with pending work, which will be unprocessed until the process - enters and leaves the kernel again, which could be an - unbounded amount of time. This means that a pending signal or - reschedule event could be indefinitely delayed. - - The fix is to notice a nested interrupt in the critical - window, and if one occurs, then fold the nested interrupt into - the current interrupt stack frame, and re-process it - iteratively rather than recursively. This means that it will - exit via the normal path, and all pending work will be dealt - with appropriately. - - Because the nested interrupt handler needs to deal with the - current stack state in whatever form its in, we keep things - simple by only using a single register which is pushed/popped - on the stack. + * This is run where a normal iret would be run, with the same stack setup: + * 8: eflags + * 4: cs + * esp-> 0: eip + * + * This attempts to make sure that any pending events are dealt with + * on return to usermode, but there is a small window in which an + * event can happen just before entering usermode. If the nested + * interrupt ends up setting one of the TIF_WORK_MASK pending work + * flags, they will not be tested again before returning to + * usermode. This means that a process can end up with pending work, + * which will be unprocessed until the process enters and leaves the + * kernel again, which could be an unbounded amount of time. This + * means that a pending signal or reschedule event could be + * indefinitely delayed. + * + * The fix is to notice a nested interrupt in the critical window, and + * if one occurs, then fold the nested interrupt into the current + * interrupt stack frame, and re-process it iteratively rather than + * recursively. This means that it will exit via the normal path, and + * all pending work will be dealt with appropriately. + * + * Because the nested interrupt handler needs to deal with the current + * stack state in whatever form its in, we keep things simple by only + * using a single register which is pushed/popped on the stack. */ ENTRY(xen_iret) /* test eflags for special cases */ @@ -158,13 +82,15 @@ ENTRY(xen_iret) push %eax ESP_OFFSET=4 # bytes pushed onto stack - /* Store vcpu_info pointer for easy access. Do it this - way to avoid having to reload %fs */ + /* + * Store vcpu_info pointer for easy access. Do it this way to + * avoid having to reload %fs + */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax),%eax - movl __per_cpu_offset(,%eax,4),%eax - mov per_cpu__xen_vcpu(%eax),%eax + movl TI_cpu(%eax), %eax + movl __per_cpu_offset(,%eax,4), %eax + mov per_cpu__xen_vcpu(%eax), %eax #else movl per_cpu__xen_vcpu, %eax #endif @@ -172,37 +98,46 @@ ENTRY(xen_iret) /* check IF state we're restoring */ testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - /* Maybe enable events. Once this happens we could get a - recursive event, so the critical region starts immediately - afterwards. However, if that happens we don't end up - resuming the code, so we don't have to be worried about - being preempted to another CPU. */ + /* + * Maybe enable events. Once this happens we could get a + * recursive event, so the critical region starts immediately + * afterwards. However, if that happens we don't end up + * resuming the code, so we don't have to be worried about + * being preempted to another CPU. + */ setz XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ cmpw $0x0001, XEN_vcpu_info_pending(%eax) - /* If there's something pending, mask events again so we - can jump back into xen_hypervisor_callback */ + /* + * If there's something pending, mask events again so we can + * jump back into xen_hypervisor_callback + */ sete XEN_vcpu_info_mask(%eax) popl %eax - /* From this point on the registers are restored and the stack - updated, so we don't need to worry about it if we're preempted */ + /* + * From this point on the registers are restored and the stack + * updated, so we don't need to worry about it if we're + * preempted + */ iret_restore_end: - /* Jump to hypervisor_callback after fixing up the stack. - Events are masked, so jumping out of the critical - region is OK. */ + /* + * Jump to hypervisor_callback after fixing up the stack. + * Events are masked, so jumping out of the critical region is + * OK. + */ je xen_hypervisor_callback 1: iret xen_iret_end_crit: -.section __ex_table,"a" +.section __ex_table, "a" .align 4 - .long 1b,iret_exc + .long 1b, iret_exc .previous hyper_iret: @@ -212,55 +147,55 @@ hyper_iret: .globl xen_iret_start_crit, xen_iret_end_crit /* - This is called by xen_hypervisor_callback in entry.S when it sees - that the EIP at the time of interrupt was between xen_iret_start_crit - and xen_iret_end_crit. We're passed the EIP in %eax so we can do - a more refined determination of what to do. - - The stack format at this point is: - ---------------- - ss : (ss/esp may be present if we came from usermode) - esp : - eflags } outer exception info - cs } - eip } - ---------------- <- edi (copy dest) - eax : outer eax if it hasn't been restored - ---------------- - eflags } nested exception info - cs } (no ss/esp because we're nested - eip } from the same ring) - orig_eax }<- esi (copy src) - - - - - - - - - - fs } - es } - ds } SAVE_ALL state - eax } - : : - ebx }<- esp - ---------------- - - In order to deliver the nested exception properly, we need to shift - everything from the return addr up to the error code so it - sits just under the outer exception info. This means that when we - handle the exception, we do it in the context of the outer exception - rather than starting a new one. - - The only caveat is that if the outer eax hasn't been - restored yet (ie, it's still on stack), we need to insert - its value into the SAVE_ALL state before going on, since - it's usermode state which we eventually need to restore. + * This is called by xen_hypervisor_callback in entry.S when it sees + * that the EIP at the time of interrupt was between + * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in + * %eax so we can do a more refined determination of what to do. + * + * The stack format at this point is: + * ---------------- + * ss : (ss/esp may be present if we came from usermode) + * esp : + * eflags } outer exception info + * cs } + * eip } + * ---------------- <- edi (copy dest) + * eax : outer eax if it hasn't been restored + * ---------------- + * eflags } nested exception info + * cs } (no ss/esp because we're nested + * eip } from the same ring) + * orig_eax }<- esi (copy src) + * - - - - - - - - + * fs } + * es } + * ds } SAVE_ALL state + * eax } + * : : + * ebx }<- esp + * ---------------- + * + * In order to deliver the nested exception properly, we need to shift + * everything from the return addr up to the error code so it sits + * just under the outer exception info. This means that when we + * handle the exception, we do it in the context of the outer + * exception rather than starting a new one. + * + * The only caveat is that if the outer eax hasn't been restored yet + * (ie, it's still on stack), we need to insert its value into the + * SAVE_ALL state before going on, since it's usermode state which we + * eventually need to restore. */ ENTRY(xen_iret_crit_fixup) /* - Paranoia: Make sure we're really coming from kernel space. - One could imagine a case where userspace jumps into the - critical range address, but just before the CPU delivers a GP, - it decides to deliver an interrupt instead. Unlikely? - Definitely. Easy to avoid? Yes. The Intel documents - explicitly say that the reported EIP for a bad jump is the - jump instruction itself, not the destination, but some virtual - environments get this wrong. + * Paranoia: Make sure we're really coming from kernel space. + * One could imagine a case where userspace jumps into the + * critical range address, but just before the CPU delivers a + * GP, it decides to deliver an interrupt instead. Unlikely? + * Definitely. Easy to avoid? Yes. The Intel documents + * explicitly say that the reported EIP for a bad jump is the + * jump instruction itself, not the destination, but some + * virtual environments get this wrong. */ movl PT_CS(%esp), %ecx andl $SEGMENT_RPL_MASK, %ecx @@ -270,15 +205,17 @@ ENTRY(xen_iret_crit_fixup) lea PT_ORIG_EAX(%esp), %esi lea PT_EFLAGS(%esp), %edi - /* If eip is before iret_restore_end then stack - hasn't been restored yet. */ + /* + * If eip is before iret_restore_end then stack + * hasn't been restored yet. + */ cmp $iret_restore_end, %eax jae 1f - movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ + movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ movl %eax, PT_EAX(%esp) - lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ + lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ /* set up the copy */ 1: std @@ -286,20 +223,6 @@ ENTRY(xen_iret_crit_fixup) rep movsl cld - lea 4(%edi),%esp /* point esp to new frame */ + lea 4(%edi), %esp /* point esp to new frame */ 2: jmp xen_do_upcall - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret |