From 93fa7636dfdc059b25df148f230c0991096afdef Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 8 Apr 2008 11:01:58 +0200 Subject: x86, ptrace: PEBS support Polish the ds.h interface and add support for PEBS. Ds.c is meant to be the resource allocator for per-thread and per-cpu BTS and PEBS recording. It is used by ptrace/utrace to provide execution tracing of debugged tasks. It will be used by profilers (e.g. perfmon2). It may be used by kernel debuggers to provide a kernel execution trace. Changes in detail: - guard DS and ptrace by CONFIG macros - separate DS and BTS more clearly - simplify field accesses - add functions to manage PEBS buffers - add simple protection/allocation mechanism - added support for Atom Opens: - buffer overflow handling Currently, only circular buffers are supported. This is all we need for debugging. Profilers would want an overflow notification. This is planned to be added when perfmon2 is made to use the ds.h interface. - utrace intermediate layer Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/ds.h | 258 +++++++++++++++++++++++++++++++++++-------- include/asm-x86/processor.h | 12 +- include/asm-x86/ptrace-abi.h | 14 ++- include/asm-x86/ptrace.h | 38 ++++++- 4 files changed, 265 insertions(+), 57 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h index 7881368142f..72c5a190bf4 100644 --- a/include/asm-x86/ds.h +++ b/include/asm-x86/ds.h @@ -2,71 +2,237 @@ * Debug Store (DS) support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger , Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger , 2007-2008 */ #ifndef _ASM_X86_DS_H #define _ASM_X86_DS_H +#ifdef CONFIG_X86_DS + #include #include -struct cpuinfo_x86; +struct task_struct; -/* a branch trace record entry +/* + * Request BTS or PEBS + * + * Due to alignement constraints, the actual buffer may be slightly + * smaller than the requested or provided buffer. * - * In order to unify the interface between various processor versions, - * we use the below data structure for all processors. + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to request recording for; + * NULL for per-cpu recording on the current cpu + * base: the base pointer for the (non-pageable) buffer; + * NULL if buffer allocation requested + * size: the size of the requested or provided buffer + * ovfl: pointer to a function to be called on buffer overflow; + * NULL if cyclic buffer requested */ -enum bts_qualifier { - BTS_INVALID = 0, - BTS_BRANCH, - BTS_TASK_ARRIVES, - BTS_TASK_DEPARTS -}; +typedef void (*ds_ovfl_callback_t)(struct task_struct *); +extern int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); +extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); + +/* + * Release BTS or PEBS resources + * + * Frees buffers allocated on ds_request. + * + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to release resources for; + * NULL to release resources for the current cpu + */ +extern int ds_release_bts(struct task_struct *task); +extern int ds_release_pebs(struct task_struct *task); + +/* + * Return the (array) index of the write pointer. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_index(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); + +/* + * Return the (array) index one record beyond the end of the array. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_end(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); + +/* + * Provide a pointer to the BTS/PEBS record at parameter index. + * (assuming an array of BTS/PEBS records) + * + * The pointer points directly into the buffer. The user is + * responsible for copying the record. + * + * Returns the size of a single record on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * index: the index of the requested record + * record (out): pointer to the requested record + */ +extern int ds_access_bts(struct task_struct *task, + size_t index, const void **record); +extern int ds_access_pebs(struct task_struct *task, + size_t index, const void **record); + +/* + * Write one or more BTS/PEBS records at the write pointer index and + * advance the write pointer. + * + * If size is not a multiple of the record size, trailing bytes are + * zeroed out. + * + * May result in one or more overflow notifications. + * + * If called during overflow handling, that is, with index >= + * interrupt threshold, the write will wrap around. + * + * An overflow notification is given if and when the interrupt + * threshold is reached during or after the write. + * + * Returns the number of bytes written or -Eerrno. + * + * task: the task to access; + * NULL to access the current cpu + * buffer: the buffer to write + * size: the size of the buffer + */ +extern int ds_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Same as ds_write_bts/pebs, but omit ownership checks. + * + * This is needed to have some other task than the owner of the + * BTS/PEBS buffer or the parameter task itself write into the + * respective buffer. + */ +extern int ds_unchecked_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_unchecked_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Reset the write pointer of the BTS/PEBS buffer. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_reset_bts(struct task_struct *task); +extern int ds_reset_pebs(struct task_struct *task); + +/* + * Clear the BTS/PEBS buffer and reset the write pointer. + * The entire buffer will be zeroed out. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_clear_bts(struct task_struct *task); +extern int ds_clear_pebs(struct task_struct *task); + +/* + * Provide the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value (out): the counter reset value + */ +extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); + +/* + * Set the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value: the new counter reset value + */ +extern int ds_set_pebs_reset(struct task_struct *task, u64 value); + +/* + * Initialization + */ +struct cpuinfo_x86; +extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); + + -struct bts_struct { - u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - u64 from_ip; - u64 to_ip; - } lbr; - /* BTS_TASK_ARRIVES or - BTS_TASK_DEPARTS */ - u64 jiffies; - } variant; +/* + * The DS context - part of struct thread_struct. + */ +struct ds_context { + /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + unsigned char *ds; + /* the owner of the BTS and PEBS configuration, respectively */ + struct task_struct *owner[2]; + /* buffer overflow notification function for BTS and PEBS */ + ds_ovfl_callback_t callback[2]; + /* the original buffer address */ + void *buffer[2]; + /* the number of allocated pages for on-request allocated buffers */ + unsigned int pages[2]; + /* use count */ + unsigned long count; + /* a pointer to the context location inside the thread_struct + * or the per_cpu context array */ + struct ds_context **this; + /* a pointer to the task owning this context, or NULL, if the + * context is owned by a cpu */ + struct task_struct *task; }; -/* Overflow handling mechanisms */ -#define DS_O_SIGNAL 1 /* send overflow signal */ -#define DS_O_WRAP 2 /* wrap around */ - -extern int ds_allocate(void **, size_t); -extern int ds_free(void **); -extern int ds_get_bts_size(void *); -extern int ds_get_bts_end(void *); -extern int ds_get_bts_index(void *); -extern int ds_set_overflow(void *, int); -extern int ds_get_overflow(void *); -extern int ds_clear(void *); -extern int ds_read_bts(void *, int, struct bts_struct *); -extern int ds_write_bts(void *, const struct bts_struct *); -extern unsigned long ds_debugctl_mask(void); -extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c); +/* called by exit_thread() to free leftover contexts */ +extern void ds_free(struct ds_context *context); + +#else /* CONFIG_X86_DS */ + +#define ds_init_intel(config) do {} while (0) +#endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 559105220a4..beaccb71628 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -20,6 +20,7 @@ struct mm_struct; #include #include #include +#include #include #include @@ -415,9 +416,14 @@ struct thread_struct { unsigned io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; -/* Debug Store - if not 0 points to a DS Save Area configuration; - * goes into MSR_IA32_DS_AREA */ - unsigned long ds_area_msr; +#ifdef CONFIG_X86_DS +/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ + struct ds_context *ds_ctx; +#endif /* CONFIG_X86_DS */ +#ifdef CONFIG_X86_PTRACE_BTS +/* the signal to send on a bts buffer overflow */ + unsigned int bts_ovfl_signal; +#endif /* CONFIG_X86_PTRACE_BTS */ }; static inline unsigned long native_get_debugreg(int regno) diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index f224eb3c315..9bcaa75cbca 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -80,8 +80,9 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ -#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_PTRACE_BTS +#ifndef __ASSEMBLY__ #include /* configuration/status structure used in PTRACE_BTS_CONFIG and @@ -97,20 +98,20 @@ struct ptrace_bts_config { /* actual size of bts_struct in bytes */ __u32 bts_size; }; -#endif +#endif /* __ASSEMBLY__ */ #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG on buffer overflow instead of wrapping around */ -#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available - instead of failing */ +#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ #define PTRACE_BTS_CONFIG 40 /* Configure branch trace recording. ADDR points to a struct ptrace_bts_config. DATA gives the size of that buffer. - A new buffer is allocated, iff the size changes. + A new buffer is allocated, if requested in the flags. + An overflow signal may only be requested for new buffers. Returns the number of bytes read. */ #define PTRACE_BTS_STATUS 41 @@ -119,7 +120,7 @@ struct ptrace_bts_config { Returns the number of bytes written. */ #define PTRACE_BTS_SIZE 42 -/* Return the number of available BTS records. +/* Return the number of available BTS records for draining. DATA and ADDR are ignored. */ #define PTRACE_BTS_GET 43 @@ -139,5 +140,6 @@ struct ptrace_bts_config { BTS records are read from oldest to newest. Returns number of BTS records drained. */ +#endif /* CONFIG_X86_PTRACE_BTS */ #endif diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 9f922b0b95d..6303701d18e 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -125,14 +125,48 @@ struct pt_regs { #endif /* __KERNEL__ */ #endif /* !__i386__ */ + +#ifdef CONFIG_X86_PTRACE_BTS +/* a branch trace record entry + * + * In order to unify the interface between various processor versions, + * we use the below data structure for all processors. + */ +enum bts_qualifier { + BTS_INVALID = 0, + BTS_BRANCH, + BTS_TASK_ARRIVES, + BTS_TASK_DEPARTS +}; + +struct bts_struct { + __u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + __u64 from_ip; + __u64 to_ip; + } lbr; + /* BTS_TASK_ARRIVES or + BTS_TASK_DEPARTS */ + __u64 jiffies; + } variant; +}; +#endif /* CONFIG_X86_PTRACE_BTS */ + #ifdef __KERNEL__ -/* the DS BTS struct is used for ptrace as well */ -#include +#include +struct cpuinfo_x86; struct task_struct; +#ifdef CONFIG_X86_PTRACE_BTS +extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); +#else +#define ptrace_bts_init_intel(config) do {} while (0) +#endif /* CONFIG_X86_PTRACE_BTS */ extern unsigned long profile_pc(struct pt_regs *regs); -- cgit v1.2.3 From 32f71aff77b6470d272f80ac28f43f9601c4d140 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 21 Jul 2008 00:52:49 +0100 Subject: x86: PIC, L-APIC and I/O APIC debug information Dump all the PIC, local APIC and I/O APIC information at the fs_initcall() level, which is after ACPI (if used) has initialised PCI information, making the point of invocation consistent across MP-table and ACPI platforms. Remove explicit calls to print_IO_APIC() from elsewhere. Make the interface of all the functions involved consistent between 32-bit and 64-bit versions and make them all static by default by the means of a New-and-Improved(TM) __apicdebuginit() macro. Note that like print_IO_APIC() all these only output anything if "apic=debug" has been passed to the kernel through the command line. Signed-off-by: Maciej W. Rozycki Cc: Chuck Ebbert Signed-off-by: Ingo Molnar --- include/asm-x86/hw_irq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 77ba51df566..83e0048dca7 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -64,7 +64,6 @@ extern unsigned long io_apic_irqs; extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern void disable_IO_APIC(void); -extern void print_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); -- cgit v1.2.3 From 2ae111cdd8d83ebf9de72e36e68a8c84b6ebbeea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 11 Aug 2008 18:34:08 +0400 Subject: x86: apic interrupts - move assignments to irqinit_32.c, v2 64bit mode APIC interrupt handlers are set within irqinit_64.c. Lets do tha same for 32bit mode which would help in furter code merging. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- include/asm-x86/arch_hooks.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h index 72adc3a109c..de4596b24c2 100644 --- a/include/asm-x86/arch_hooks.h +++ b/include/asm-x86/arch_hooks.h @@ -12,8 +12,6 @@ /* these aren't arch hooks, they are generic routines * that can be used by the hooks */ extern void init_ISA_irqs(void); -extern void apic_intr_init(void); -extern void smp_intr_init(void); extern irqreturn_t timer_interrupt(int irq, void *dev_id); /* these are the defined hooks */ -- cgit v1.2.3 From 1ac2f7d55b7ee1613c90631e87fea22ec06781e5 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 4 Aug 2008 14:51:24 +0800 Subject: introduce two APIs for page attribute Introduce two APIs for page attribute. flushing tlb/cache in every page attribute is expensive. AGP gart usually will do a lot of operations to change a page to uc, new APIs can reduce flush. Signed-off-by: Shaohua Li Cc: airlied@linux.ie Cc: Andrew Morton Cc: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/asm-x86/cacheflush.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index f4c0ab50d2c..57bac7b68c4 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -57,6 +57,8 @@ int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wb(unsigned long addr, int numpages); int set_memory_uc(unsigned long addr, int numpages); +int set_memory_uc_noflush(unsigned long addr, int numpages); +void set_memory_flush_all(void); int set_memory_wc(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); @@ -87,6 +89,7 @@ int set_memory_4k(unsigned long addr, int numpages); */ int set_pages_uc(struct page *page, int numpages); +int set_pages_uc_noflush(struct page *page, int numpages); int set_pages_wb(struct page *page, int numpages); int set_pages_x(struct page *page, int numpages); int set_pages_nx(struct page *page, int numpages); -- cgit v1.2.3 From 466ae837424dcc538b1af2a0eaf53be32edcdbe7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 4 Aug 2008 14:51:30 +0800 Subject: reduce tlb/cache flush times of agpgart memory allocation To reduce tlb/cache flush, makes agp memory allocation do one flush after all pages in a region are changed to uc. All agp drivers except agp-sgi uses agp_generic_alloc_page() for .agp_alloc_page, so the patch should work for them. agp-sgi is only for ia64, so not a problem too. Signed-off-by: Shaohua Li Cc: airlied@linux.ie Cc: Andrew Morton Cc: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/asm-x86/agp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h index e4004a9f6a9..181b9e984b3 100644 --- a/include/asm-x86/agp.h +++ b/include/asm-x86/agp.h @@ -15,6 +15,9 @@ #define map_page_into_agp(page) set_pages_uc(page, 1) #define unmap_page_from_agp(page) set_pages_wb(page, 1) +#define map_page_into_agp_noflush(page) set_pages_uc_noflush(page, 1) +#define map_page_into_agp_global_flush() set_memory_flush_all() + /* * Could use CLFLUSH here if the cpu supports it. But then it would * need to be called for each cacheline of the whole page so it may -- cgit v1.2.3 From 1f49a2c2aeb22d5abc6d4ea574ff63d37ca55fbe Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 15 Aug 2008 12:45:09 -0400 Subject: x86: revert replace LOCK_PREFIX in futex.h Since we now use DS prefixes instead of NOP to remove LOCK prefixes, there are no longer any issues with instruction boundaries moving around. Depends on : x86 alternatives : fix LOCK_PREFIX race with preemptible kernel and CPU hotplug On Thu, 14 Aug 2008, Mathieu Desnoyers wrote: > > Changing the 0x90 (single-byte nop) currently used into a 0x3E DS segment > override prefix should fix this issue. Since the default of the atomic > instructions is to use the DS segment anyway, it should not affect the > behavior. Ok, so I think this is an _excellent_ patch, but I'd like to also then use LOCK_PREFIX in include/asm-x86/futex.h. See commit 9d55b9923a1b7ea8193b8875c57ec940dc2ff027. Linus Applies to 2.6.27-rc2 (and -rc3 unless hell broke loose in futex.h between rc2 and rc3). Signed-off-by: Mathieu Desnoyers CC: Linus Torvalds CC: H. Peter Anvin CC: Jeremy Fitzhardinge CC: Roland McGrath CC: Ingo Molnar Cc: Steven Rostedt CC: Steven Rostedt CC: Thomas Gleixner CC: Peter Zijlstra CC: Andrew Morton CC: David Miller CC: Ulrich Drepper CC: Rusty Russell CC: Gregory Haskins CC: Arnaldo Carvalho de Melo CC: "Luis Claudio R. Goncalves" CC: Clark Williams CC: Christoph Lameter CC: Andi Kleen CC: Harvey Harrison Signed-off-by: H. Peter Anvin --- include/asm-x86/futex.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h index e7a76b37b33..d1b988ce080 100644 --- a/include/asm-x86/futex.h +++ b/include/asm-x86/futex.h @@ -25,7 +25,7 @@ asm volatile("1:\tmovl %2, %0\n" \ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ - "2:\tlock; cmpxchgl %3, %2\n" \ + "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t1b\n" \ "3:\t.section .fixup,\"ax\"\n" \ "4:\tmov\t%5, %1\n" \ @@ -64,7 +64,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op1("lock; xaddl %0, %2", ret, oldval, + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: @@ -122,7 +122,7 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - asm volatile("1:\tlock; cmpxchgl %3, %1\n" + asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" "2:\t.section .fixup, \"ax\"\n" "3:\tmov %2, %0\n" "\tjmp 2b\n" -- cgit v1.2.3 From 5bbd4c3724008c93cf3efdfc38a3402e245ab506 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 15 Aug 2008 12:56:59 -0400 Subject: x86: spinlock use LOCK_PREFIX Since we are now using DS prefixes instead of NOP to remove LOCK prefixes, there is no longer any problems with instruction boundaries moving around. * Linus Torvalds (torvalds@linux-foundation.org) wrote: > > > On Thu, 14 Aug 2008, Mathieu Desnoyers wrote: > > > > Changing the 0x90 (single-byte nop) currently used into a 0x3E DS segment > > override prefix should fix this issue. Since the default of the atomic > > instructions is to use the DS segment anyway, it should not affect the > > behavior. > > Ok, so I think this is an _excellent_ patch, but I'd like to also then use > LOCK_PREFIX in include/asm-x86/futex.h. > > See commit 9d55b9923a1b7ea8193b8875c57ec940dc2ff027. > > Linus Unless there a rationale for this, I think these be changed to LOCK_PREFIX too. grep "lock ;" include/asm-x86/spinlock.h "lock ; cmpxchgw %w1,%2\n\t" asm volatile("lock ; xaddl %0, %1\n" "lock ; cmpxchgl %1,%2\n\t" Applies to 2.6.27-rc2. Signed-off-by: Mathieu Desnoyers Acked-by: Linus Torvalds CC: Linus Torvalds CC: H. Peter Anvin CC: Jeremy Fitzhardinge CC: Roland McGrath CC: Ingo Molnar Cc: Steven Rostedt CC: Steven Rostedt CC: Thomas Gleixner CC: Peter Zijlstra CC: Andrew Morton CC: David Miller CC: Ulrich Drepper CC: Rusty Russell CC: Gregory Haskins CC: Arnaldo Carvalho de Melo CC: "Luis Claudio R. Goncalves" CC: Clark Williams CC: Christoph Lameter CC: Andi Kleen CC: Harvey Harrison Signed-off-by: H. Peter Anvin --- include/asm-x86/spinlock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index 4f9a9861799..0b4d59a93d2 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h @@ -97,7 +97,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) "jne 1f\n\t" "movw %w0,%w1\n\t" "incb %h1\n\t" - "lock ; cmpxchgw %w1,%2\n\t" + LOCK_PREFIX "cmpxchgw %w1,%2\n\t" "1:" "sete %b1\n\t" "movzbl %b1,%0\n\t" @@ -135,7 +135,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) int inc = 0x00010000; int tmp; - asm volatile("lock ; xaddl %0, %1\n" + asm volatile(LOCK_PREFIX "xaddl %0, %1\n" "movzwl %w0, %2\n\t" "shrl $16, %0\n\t" "1:\t" @@ -162,7 +162,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) "cmpl %0,%1\n\t" "jne 1f\n\t" "addl $0x00010000, %1\n\t" - "lock ; cmpxchgl %1,%2\n\t" + LOCK_PREFIX "cmpxchgl %1,%2\n\t" "1:" "sete %b1\n\t" "movzbl %b1,%0\n\t" -- cgit v1.2.3 From e621bd18958ef5dbace3129ebe17a0a475e127d9 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Wed, 20 Aug 2008 16:43:03 -0700 Subject: i386: vmalloc size fix Booting kernel with vmalloc=[any size<=16m] will oops on my pc (i386/1G memory). BUG_ON in arch/x86/mm/init_32.c triggered: BUG_ON((unsigned long)high_memory > VMALLOC_START); It's due to the vm area hole. In include/asm-x86/pgtable_32.h: #define VMALLOC_OFFSET (8 * 1024 * 1024) #define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \ & ~(VMALLOC_OFFSET - 1)) There's several related point: 1. MAXMEM : (-__PAGE_OFFSET - __VMALLOC_RESERVE). The space after VMALLOC_END is included as well, I set it to (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) 2. VMALLOC_OFFSET is not considered in __VMALLOC_RESERVE fixed by adding VMALLOC_OFFSET to it. 3. VMALLOC_START : (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) & ~(VMALLOC_OFFSET - 1)) So it's not always 8M, bigger than 8M possible. I set it to ((unsigned long)high_memory + VMALLOC_OFFSET) 4. the VMALLOC_RESERVE is an unused macro, so remove it here. Signed-off-by: Dave Young Cc: akpm@linux-foundation.org Cc: hidave.darkstar@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/asm-x86/page_32.h | 3 --- include/asm-x86/pgtable_32.h | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index ab8528793f0..632feb156cc 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -89,9 +89,6 @@ extern int nx_enabled; extern unsigned int __VMALLOC_RESERVE; extern int sysctl_legacy_va_layout; -#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) -#define MAXMEM (-__PAGE_OFFSET - __VMALLOC_RESERVE) - extern void find_low_pfn_range(void); extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 5c3b26567a9..9bb5269475c 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -56,8 +56,7 @@ void paging_init(void); * area for the same reason. ;) */ #define VMALLOC_OFFSET (8 * 1024 * 1024) -#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \ - & ~(VMALLOC_OFFSET - 1)) +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) #ifdef CONFIG_X86_PAE #define LAST_PKMAP 512 #else @@ -73,6 +72,8 @@ void paging_init(void); # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) #endif +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + /* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are -- cgit v1.2.3 From 11494547b1754c4f3bd7f707ab869e2adf54d52f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 01:01:19 -0700 Subject: x86: fix apic version warning after following patch, commit 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Author: Cyrill Gorcunov Date: Mon Aug 18 20:45:57 2008 +0400 x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now we are getting warning for acpi path on 64 bit system. make the 64-bit side fill in apic_version[] as well. [ mingo@elte.hu: build fix ] Signed-off-by: Ingo Molnar --- include/asm-x86/mpspec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index 118da365e37..be2241a818f 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -5,11 +5,12 @@ #include +extern int apic_version[MAX_APICS]; + #ifdef CONFIG_X86_32 #include extern unsigned int def_to_bigsmp; -extern int apic_version[MAX_APICS]; extern u8 apicid_2_node[]; extern int pic_mode; -- cgit v1.2.3 From 671eef85a3e885dff4ce210d8774ad50a91d5967 Mon Sep 17 00:00:00 2001 From: "Cihula, Joseph" Date: Wed, 20 Aug 2008 16:43:07 -0700 Subject: x86, e820: add support for AddressRangeUnusuable ACPI memory type Add support for the E820_UNUSABLE memory type, which is defined in Revision 3.0b (Oct. 10, 2006) of the ACPI Specification on p. 394 Table 14-1: AddressRangeUnusuable This range of address contains memory in which errors have been detected. This range must not be used by the OSPM. Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang Signed-off-by: Gang Wei Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/asm-x86/e820.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index f52daf176bc..ca433c36af7 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -43,6 +43,7 @@ #define E820_RESERVED 2 #define E820_ACPI 3 #define E820_NVS 4 +#define E820_UNUSABLE 5 /* reserved RAM used by kernel itself */ #define E820_RESERVED_KERN 128 -- cgit v1.2.3 From 9326d61bf64c4293f834e86c11f52db5be9798d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Aug 2008 13:46:25 +0200 Subject: Revert "reduce tlb/cache flush times of agpgart memory allocation" This reverts commit 466ae837424dcc538b1af2a0eaf53be32edcdbe7. --- include/asm-x86/agp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h index 181b9e984b3..e4004a9f6a9 100644 --- a/include/asm-x86/agp.h +++ b/include/asm-x86/agp.h @@ -15,9 +15,6 @@ #define map_page_into_agp(page) set_pages_uc(page, 1) #define unmap_page_from_agp(page) set_pages_wb(page, 1) -#define map_page_into_agp_noflush(page) set_pages_uc_noflush(page, 1) -#define map_page_into_agp_global_flush() set_memory_flush_all() - /* * Could use CLFLUSH here if the cpu supports it. But then it would * need to be called for each cacheline of the whole page so it may -- cgit v1.2.3 From cacf890694a36124ceddce44ff4c7b02d372ce7c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Aug 2008 13:46:33 +0200 Subject: Revert "introduce two APIs for page attribute" This reverts commit 1ac2f7d55b7ee1613c90631e87fea22ec06781e5. --- include/asm-x86/cacheflush.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index 57bac7b68c4..f4c0ab50d2c 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -57,8 +57,6 @@ int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wb(unsigned long addr, int numpages); int set_memory_uc(unsigned long addr, int numpages); -int set_memory_uc_noflush(unsigned long addr, int numpages); -void set_memory_flush_all(void); int set_memory_wc(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); @@ -89,7 +87,6 @@ int set_memory_4k(unsigned long addr, int numpages); */ int set_pages_uc(struct page *page, int numpages); -int set_pages_uc_noflush(struct page *page, int numpages); int set_pages_wb(struct page *page, int numpages); int set_pages_x(struct page *page, int numpages); int set_pages_nx(struct page *page, int numpages); -- cgit v1.2.3 From d75586ad01e6c5a30e7337fb87d61e03556a1ecb Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 21 Aug 2008 10:46:06 +0800 Subject: x86, pageattr: introduce APIs to change pageattr of a page array Add array interface APIs of pageattr. page based cache flush is quite slow for a lot of pages. If pages are more than 1024 (4M), the patch will use a wbinvd(). We have a simple test here (run a 3d game - open arena), nearly all agp memory allocation are small (< 1M), so suppose this will not impact runtime performance. Signed-off-by: Dave Airlie Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- include/asm-x86/cacheflush.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index f4c0ab50d2c..0a5f71817b3 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -66,6 +66,9 @@ int set_memory_rw(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); +int set_memory_array_uc(unsigned long *addr, int addrinarray); +int set_memory_array_wb(unsigned long *addr, int addrinarray); + /* * For legacy compatibility with the old APIs, a few functions * are provided that work on a "struct page". -- cgit v1.2.3 From f86399396ce7a4f4069828b7dceac5aa5113dfb5 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 30 Jul 2008 18:32:27 -0300 Subject: x86, paravirt_ops: use unsigned long instead of u32 for alloc_p*() pfn args This patch changes the pfn args from 'u32' to 'unsigned long' on alloc_p*() functions on paravirt_ops, and the corresponding implementations for Xen and VMI. The prototypes for CONFIG_PARAVIRT=n are already using unsigned long, so paravirt.h now matches the prototypes on asm-x86/pgalloc.h. It shouldn't result in any changes on generated code on 32-bit, with or without CONFIG_PARAVIRT. On both cases, 'codiff -f' didn't show any change after applying this patch. On 64-bit, there are (expected) binary changes only when CONFIG_PARAVIRT is enabled, as the patch is really supposed to change the size of the pfn args. [ v2: KVM_GUEST: use the right parameter type on kvm_release_pt() ] Signed-off-by: Eduardo Habkost Acked-by: Jeremy Fitzhardinge Acked-by: Zachary Amsden Signed-off-by: Ingo Molnar --- include/asm-x86/paravirt.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index fbbde93f12d..497aea0f41a 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -257,13 +257,13 @@ struct pv_mmu_ops { * Hooks for allocating/releasing pagetable pages when they're * attached to a pagetable */ - void (*alloc_pte)(struct mm_struct *mm, u32 pfn); - void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); - void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); - void (*alloc_pud)(struct mm_struct *mm, u32 pfn); - void (*release_pte)(u32 pfn); - void (*release_pmd)(u32 pfn); - void (*release_pud)(u32 pfn); + void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); + void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); + void (*release_pte)(unsigned long pfn); + void (*release_pmd)(unsigned long pfn); + void (*release_pud)(unsigned long pfn); /* Pagetable manipulation functions */ void (*set_pte)(pte_t *ptep, pte_t pteval); @@ -993,35 +993,35 @@ static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); } -static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); } -static inline void paravirt_release_pte(unsigned pfn) +static inline void paravirt_release_pte(unsigned long pfn) { PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); } -static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn) +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); } -static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn, - unsigned start, unsigned count) +static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, + unsigned long start, unsigned long count) { PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); } -static inline void paravirt_release_pmd(unsigned pfn) +static inline void paravirt_release_pmd(unsigned long pfn) { PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); } -static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn) +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); } -static inline void paravirt_release_pud(unsigned pfn) +static inline void paravirt_release_pud(unsigned long pfn) { PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); } -- cgit v1.2.3 From 6c505ce3930c6a6b455cda53fab3e88ae44f8221 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:45 +0200 Subject: x86: move dma_*_coherent functions to include file All the x86 DMA-API functions are defined in asm/dma-mapping.h. This patch moves the dma_*_coherent functions also to this header file because they are now small enough to do so. This is done as a separate patch because it also includes some renaming and restructuring of the dma-mapping.h file. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 47 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 10 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index ad9cd6d49bf..8e16095d1fa 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -9,10 +9,11 @@ #include #include #include +#include extern dma_addr_t bad_dma_address; extern int iommu_merge; -extern struct device fallback_dev; +extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; extern int force_iommu; @@ -87,13 +88,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - +#define dma_is_consistent(d, h) (1) extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); @@ -247,7 +242,39 @@ static inline int dma_get_cache_alignment(void) return boot_cpu_data.x86_clflush_size; } -#define dma_is_consistent(d, h) (1) +static inline void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + void *memory; + + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) + return memory; + + if (!dev) { + dev = &x86_dma_fallback_dev; + gfp |= GFP_DMA; + } + + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, + dma_handle, gfp); + return NULL; +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) + return; + + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, bus); +} -#include #endif -- cgit v1.2.3 From 766af9fa812f49feb4a3e62cf92f3d37f33c7fb6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 00:12:09 +0900 Subject: dma-mapping.h, x86: remove last user of dma_mapping_ops->map_simple pci-dma.c doesn't use map_simple hook any more so we can remove it from struct dma_mapping_ops now. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 8e16095d1fa..3a9a6f5e681 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -26,9 +26,6 @@ struct dma_mapping_ops { void *vaddr, dma_addr_t dma_handle); dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr, size_t size, int direction); - /* like map_single, but doesn't check the device mask */ - dma_addr_t (*map_simple)(struct device *hwdev, phys_addr_t ptr, - size_t size, int direction); void (*unmap_single)(struct device *dev, dma_addr_t addr, size_t size, int direction); void (*sync_single_for_cpu)(struct device *hwdev, -- cgit v1.2.3 From b05f78f5c713eda2c34e495d92495ee4f1c3b5e1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 22 Aug 2008 01:32:50 -0700 Subject: x86_64: printout msr -v2 commandline show_msr=1 for bsp, show_msr=32 for all 32 cpus. [ mingo@elte.hu: added documentation ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/asm-x86/msr.h | 23 +++++++++++++++++++++++ include/asm-x86/paravirt.h | 12 ++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h index ca110ee73f0..a30e586df93 100644 --- a/include/asm-x86/msr.h +++ b/include/asm-x86/msr.h @@ -63,6 +63,22 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, return EAX_EDX_VAL(val, low, high); } +static inline unsigned long long native_read_msr_amd_safe(unsigned int msr, + int *err) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("2: rdmsr ; xor %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT)); + return EAX_EDX_VAL(val, low, high); +} + static inline void native_write_msr(unsigned int msr, unsigned low, unsigned high) { @@ -158,6 +174,13 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) *p = native_read_msr_safe(msr, &err); return err; } +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = native_read_msr_amd_safe(msr, &err); + return err; +} #define rdtscl(low) \ ((low) = (u32)native_read_tsc()) diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index fbbde93f12d..d5cfc5e3eb5 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -137,6 +137,7 @@ struct pv_cpu_ops { /* MSR, PMC and TSR operations. err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + u64 (*read_msr_amd)(unsigned int msr, int *err); u64 (*read_msr)(unsigned int msr, int *err); int (*write_msr)(unsigned int msr, unsigned low, unsigned high); @@ -726,6 +727,10 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err) { return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); } +static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) +{ + return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); +} static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); @@ -771,6 +776,13 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) *p = paravirt_read_msr(msr, &err); return err; } +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = paravirt_read_msr_amd(msr, &err); + return err; +} static inline u64 paravirt_read_tsc(void) { -- cgit v1.2.3 From bbb65d2d365efe9951290e61678dcf81ec60add4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:10 +0200 Subject: x86: use cpuid vector 0xb when available for detecting cpu topology cpuid leaf 0xb provides extended topology enumeration. This interface provides the 32-bit x2APIC id of the logical processor and it also provides a new mechanism to detect SMT and core siblings (which provides increased addressability). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- include/asm-x86/cpufeature.h | 1 + include/asm-x86/processor.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 5fc4d55906d..8d842af4cf7 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -81,6 +81,7 @@ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ #define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 5f58da401b4..79338fe965d 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -161,6 +161,7 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; +extern void detect_extended_topology(struct cpuinfo_x86 *c); #if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) extern void detect_ht(struct cpuinfo_x86 *c); #else -- cgit v1.2.3 From 58f7c98850a226d3fb05b1095af9f7c4ea3507ba Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Aug 2008 13:52:25 -0700 Subject: x86: split e820 reserved entries record to late v2 so could let BAR res register at first, or even pnp. v2: insert e820 reserve resources before pnp_system_init Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/asm-x86/e820.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index ca433c36af7..5abbdec06bd 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -122,6 +122,7 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn, extern u64 e820_hole_size(u64 start, u64 end); extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); +extern void e820_reserve_resources_late(void); extern void setup_memory_map(void); extern char *default_machine_specific_memory_setup(void); extern char *machine_specific_memory_setup(void); -- cgit v1.2.3 From 3da99c97763703b23cbf2bd6e96252256d4e4617 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: x86: make (early)_identify_cpu more the same between 32bit and 64 bit 1. add extended_cpuid_level for 32bit 2. add generic_identify for 64bit 3. add early_identify_cpu for 32bit 4. early_identify_cpu not be called by identify_cpu 5. remove early in get_cpu_vendor for 32bit 6. add get_cpu_cap 7. add cpu_detect for 64bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/asm-x86/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 4df3e2f6fb5..8208cc1debc 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -77,9 +77,9 @@ struct cpuinfo_x86 { __u8 x86_phys_bits; /* CPUID returned core id bits: */ __u8 x86_coreid_bits; +#endif /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; -#endif /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; __u32 x86_capability[NCAPINTS]; -- cgit v1.2.3 From 97e4db7c8719f67c52793eeca5ec4df4c3407f2a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:08:59 -0700 Subject: x86: make detect_ht depend on CONFIG_X86_HT 64-bit has X86_HT set too, so use that instead of SMP. This also removes a include/asm-x86/processor.h ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/asm-x86/processor.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index bbbbe1fc5ce..fc5e961c5b6 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -166,11 +166,7 @@ extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; extern void detect_extended_topology(struct cpuinfo_x86 *c); -#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) extern void detect_ht(struct cpuinfo_x86 *c); -#else -static inline void detect_ht(struct cpuinfo_x86 *c) {} -#endif static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) -- cgit v1.2.3 From 110e0358e7dfd9cc56d47077068f3680dae10b56 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 28 Aug 2008 13:58:39 -0700 Subject: x86: make sure the CPA test code's use of _PAGE_UNUSED1 is obvious The CPA test code uses _PAGE_UNUSED1, so make sure its obvious. Signed-off-by: Jeremy Fitzhardinge Cc: Nick Piggin Signed-off-by: Ingo Molnar --- include/asm-x86/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 04caa2f544d..eccd52406ab 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -19,6 +19,7 @@ #define _PAGE_BIT_UNUSED3 11 #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 +#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) @@ -36,6 +37,7 @@ #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) +#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) #define __HAVE_ARCH_PTE_SPECIAL #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -- cgit v1.2.3 From e51af6630848406fc97adbd71443818cdcda297b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 4 Sep 2008 09:54:37 +0100 Subject: x86: blacklist DMAR on Intel G31/G33 chipsets Some BIOSes (the Intel DG33BU, for example) wrongly claim to have DMAR when they don't. Avoid the resulting crashes when it doesn't work as expected. I'd still be grateful if someone could test it on a DG33BU with the old BIOS though, since I've killed mine. I tested the DMI version, but not this one. Signed-off-by: David Woodhouse Signed-off-by: Ingo Molnar --- include/asm-x86/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 5f888cc5be4..621a1af94c4 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -6,6 +6,7 @@ extern void no_iommu_init(void); extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern int dmar_disabled; extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); -- cgit v1.2.3 From afe73824f52d6767c77e9456f573a76075108279 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 29 Aug 2008 13:15:28 +0100 Subject: x86-64: eliminate dead code Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- include/asm-x86/mmu.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h index 00e88679e11..80a1dee5bea 100644 --- a/include/asm-x86/mmu.h +++ b/include/asm-x86/mmu.h @@ -7,14 +7,9 @@ /* * The x86 doesn't have a mmu context, but * we put the segment information here. - * - * cpu_vm_mask is used to optimize ldt flushing. */ typedef struct { void *ldt; -#ifdef CONFIG_X86_64 - rwlock_t ldtlock; -#endif int size; struct mutex lock; void *vdso; -- cgit v1.2.3 From 11fdd252bb5b461289fc5c21dc8fc87dc66f3284 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:50 -0700 Subject: x86: cpu make amd.c more like amd_64.c v2 1. make 32bit have early_init_amd_mc and amd_detect_cmp 2. seperate init_amd_k5/k6/k7 ... v2: fix compiling for !CONFIG_SMP Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/asm-x86/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index fc5e961c5b6..62531ecbde1 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -75,9 +75,9 @@ struct cpuinfo_x86 { int x86_tlbsize; __u8 x86_virt_bits; __u8 x86_phys_bits; +#endif /* CPUID returned core id bits: */ __u8 x86_coreid_bits; -#endif /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ -- cgit v1.2.3 From de9f521fb72dd091aa4989fe2e004ecf4785a850 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:11 +0900 Subject: x86: move pci-nommu's dma_mask check to common code The check to see if dev->dma_mask is NULL in pci-nommu is more appropriate for dma_alloc_coherent(). Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 3a9a6f5e681..088c56814aa 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -254,6 +254,9 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp |= GFP_DMA; } + if (!dev->dma_mask) + return NULL; + if (ops->alloc_coherent) return ops->alloc_coherent(dev, size, dma_handle, gfp); -- cgit v1.2.3 From 8a53ad675f86ee003482b557da944e070d3c4859 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:12 +0900 Subject: x86: fix nommu_alloc_coherent allocation with NULL device argument We need to use __GFP_DMA for NULL device argument (fallback_dev) with pci-nommu. It's a hack for ISA (and some old code) so we need to use GFP_DMA. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 088c56814aa..ad8b49032d1 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -246,6 +246,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, struct dma_mapping_ops *ops = get_dma_ops(dev); void *memory; + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; -- cgit v1.2.3 From 823e7e8c6ef12cd1943dc42fe7595ca74e8cc3d7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:13 +0900 Subject: x86: dma_alloc_coherent sets gfp flags properly Non real IOMMU implemenations (which doesn't do virtual mappings, e.g. swiotlb, pci-nommu, etc) need to use proper gfp flags and dma_mask to allocate pages in their own dma_alloc_coherent() (allocated page need to be suitable for device's coherent_dma_mask). This patch makes dma_alloc_coherent do this job so that IOMMUs don't need to take care of it any more. Real IOMMU implemenataions can simply ignore the gfp flags. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index ad8b49032d1..0cc022b9a4a 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -239,6 +239,29 @@ static inline int dma_get_cache_alignment(void) return boot_cpu_data.x86_clflush_size; } +static inline unsigned long dma_alloc_coherent_mask(struct device *dev, + gfp_t gfp) +{ + unsigned long dma_mask = 0; + + dma_mask = dev->coherent_dma_mask; + if (!dma_mask) + dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; + + return dma_mask; +} + +static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) +{ + unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); + +#ifdef CONFIG_X86_64 + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) + gfp |= GFP_DMA32; +#endif + return gfp; +} + static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) @@ -259,10 +282,11 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!dev->dma_mask) return NULL; - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; + if (!ops->alloc_coherent) + return NULL; + + return ops->alloc_coherent(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp)); } static inline void dma_free_coherent(struct device *dev, size_t size, -- cgit v1.2.3 From 9fcaff0e660d886e9a766460adbe558dd25de31b Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Mon, 8 Sep 2008 16:19:11 -0700 Subject: x86: unused variable in dma_alloc_coherent_gfp_flags() Fixed a warning caused by a badly placed ifdef. Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 0cc022b9a4a..56075320b81 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -253,9 +253,9 @@ static inline unsigned long dma_alloc_coherent_mask(struct device *dev, static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) { +#ifdef CONFIG_X86_64 unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); -#ifdef CONFIG_X86_64 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; #endif -- cgit v1.2.3 From 879d792b66d633bbe466974f61d1acc9aa8c78eb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:37 -0700 Subject: x86: let intel 64-bit use intel.c now that arch/x86/kernel/cpu/intel_64.c and arch/x86/kernel/cpu/intel.c are equal, drop arch/x86/kernel/cpu/intel_64.c and fix up the glue. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/asm-x86/bugs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h index ae514c76a96..dc604985f2a 100644 --- a/include/asm-x86/bugs.h +++ b/include/asm-x86/bugs.h @@ -3,7 +3,7 @@ extern void check_bugs(void); -#ifdef CONFIG_CPU_SUP_INTEL_32 +#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else static inline int ppro_with_ram_bug(void) { return 0; } -- cgit v1.2.3 From 91030ca1e739696812242c807b112ee3981a14be Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 9 Sep 2008 16:42:45 +0100 Subject: x86: unsigned long pte_pfn pte_pfn() has always been of type unsigned long, even on 32-bit PAE; but in the current tip/next/mm tree it works out to be unsigned long long on 64-bit, which gives an irritating warning if you try to printk a pfn with the usual %lx. Now use the same pte_pfn() function, moved from pgtable-3level.h to pgtable.h, for all models: as suggested by Jeremy Fitzhardinge. And pte_page() can well move along with it (remaining a macro to avoid dependence on mm_types.h). Signed-off-by: Hugh Dickins Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/asm-x86/pgtable-2level.h | 2 -- include/asm-x86/pgtable-3level.h | 7 ------- include/asm-x86/pgtable.h | 7 +++++++ include/asm-x86/pgtable_64.h | 2 -- 4 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h index 46bc52c0eae..d8c55071ceb 100644 --- a/include/asm-x86/pgtable-2level.h +++ b/include/asm-x86/pgtable-2level.h @@ -53,9 +53,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif -#define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) -#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) /* * Bits 0, 6 and 7 are taken, split up the 29 bits of offset diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h index 105057f3403..975da0dc142 100644 --- a/include/asm-x86/pgtable-3level.h +++ b/include/asm-x86/pgtable-3level.h @@ -151,18 +151,11 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte_low == b.pte_low && a.pte_high == b.pte_high; } -#define pte_page(x) pfn_to_page(pte_pfn(x)) - static inline int pte_none(pte_t pte) { return !pte.pte_low && !pte.pte_high; } -static inline unsigned long pte_pfn(pte_t pte) -{ - return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; -} - /* * Bits 0, 6 and 7 are taken in the low part of the pte, * put the 32 bits of offset into the high part. diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 04caa2f544d..efc329b8a71 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -186,6 +186,13 @@ static inline int pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } +static inline unsigned long pte_pfn(pte_t pte) +{ + return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) + static inline int pmd_large(pmd_t pte) { return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 549144d03d9..e454e4ec016 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -175,8 +175,6 @@ static inline int pmd_bad(pmd_t pmd) #define pte_present(x) (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ -#define pte_page(x) pfn_to_page(pte_pfn((x))) -#define pte_pfn(x) ((pte_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) /* * Macro to mark a page protection value as "uncacheable". -- cgit v1.2.3 From 8a493d37f049b631e19584d1cb84cec88cbcf8fc Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 00:49:47 +0900 Subject: x86: remove duplicated extern force_iommu Both iommu.h and dma-mapping.h have extern force_iommu. The latter doesn't need to do. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 56075320b81..f43420b8c57 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -15,7 +15,6 @@ extern dma_addr_t bad_dma_address; extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; -extern int force_iommu; struct dma_mapping_ops { int (*mapping_error)(struct device *dev, -- cgit v1.2.3 From 982162602b31041b426edec6480d327743abcbcc Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 00:49:48 +0900 Subject: x86: convert dma_alloc_coherent to use is_device_dma_capable Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index f43420b8c57..f408e6dd177 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -278,7 +278,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp |= GFP_DMA; } - if (!dev->dma_mask) + if (!is_device_dma_capable(dev)) return NULL; if (!ops->alloc_coherent) -- cgit v1.2.3 From 315a6558f30a264c88274fa70626615d1c7851c7 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 9 Sep 2008 14:54:53 +0800 Subject: x86: move VMX MSRs to msr-index.h They are hardware specific MSRs, and we would use them in virtualization feature detection later. Signed-off-by: Sheng Yang Signed-off-by: Ingo Molnar --- include/asm-x86/msr-index.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index 3052f058ab0..0bb43301a20 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -176,6 +176,7 @@ #define MSR_IA32_TSC 0x00000010 #define MSR_IA32_PLATFORM_ID 0x00000017 #define MSR_IA32_EBL_CR_POWERON 0x0000002a +#define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) @@ -310,4 +311,19 @@ /* Geode defined MSRs */ #define MSR_GEODE_BUSCONT_CONF0 0x00001900 +/* Intel VT MSRs */ +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c + #endif /* ASM_X86__MSR_INDEX_H */ -- cgit v1.2.3 From e38e05a85828dac23540cd007df5f20985388afc Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 10 Sep 2008 18:53:34 +0800 Subject: x86: extended "flags" to show virtualization HW feature in /proc/cpuinfo The hardware virtualization technology evolves very fast. But currently it's hard to tell if your CPU support a certain kind of HW technology without digging into the source code. The patch add a new catagory in "flags" under /proc/cpuinfo. Now "flags" can indicate the (important) HW virtulization features the CPU supported as well. Current implementation just cover Intel VMX side. Signed-off-by: Sheng Yang Signed-off-by: Ingo Molnar --- include/asm-x86/cpufeature.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 7ac4d93d20e..8d45690bef5 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -6,7 +6,7 @@ #include -#define NCAPINTS 8 /* N 32-bit words worth of info */ +#define NCAPINTS 9 /* N 32-bit words worth of info */ /* * Note: If the comment begins with a quoted string, that string is used @@ -150,6 +150,13 @@ */ #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ +/* Virtualization flags: Linux defined */ +#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ +#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ +#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ +#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ + #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #include -- cgit v1.2.3 From fbdbf709938d155c719c76b9894d28342632c797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 15 Sep 2008 22:02:43 +0200 Subject: x86, debug: gpio_free might sleep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the documentation gpio_free should only be called from task context only. To make this more explicit add a might sleep to all implementations. This patch changes the gpio_free implementations for the x86 architecture. Signed-off-by: Uwe Kleine-König Signed-off-by: Ingo Molnar --- include/asm-x86/mach-rdc321x/gpio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h index acce0b7d397..3639ece6485 100644 --- a/include/asm-x86/mach-rdc321x/gpio.h +++ b/include/asm-x86/mach-rdc321x/gpio.h @@ -1,6 +1,8 @@ #ifndef _RDC321X_GPIO_H #define _RDC321X_GPIO_H +#include + extern int rdc_gpio_get_value(unsigned gpio); extern void rdc_gpio_set_value(unsigned gpio, int value); extern int rdc_gpio_direction_input(unsigned gpio); @@ -18,6 +20,7 @@ static inline int gpio_request(unsigned gpio, const char *label) static inline void gpio_free(unsigned gpio) { + might_sleep(); rdc_gpio_free(gpio); } -- cgit v1.2.3 From 2842e5bf3115193f05dc9dac20f940e7abf44c1a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:23:43 +0200 Subject: x86: move GART TLB flushing options to generic code The GART currently implements the iommu=[no]fullflush command line parameters which influence its IO/TLB flushing strategy. This patch makes these parameters generic so that they can be used by the AMD IOMMU too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 621a1af94c4..67b2fd56c6d 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,6 +7,7 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int dmar_disabled; +extern int iommu_fullflush; extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); -- cgit v1.2.3 From 1c65577398589bb44ab0980f9b9d30804b48a5db Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 18:40:05 +0200 Subject: AMD IOMMU: implement lazy IO/TLB flushing The IO/TLB flushing on every unmaping operation is the most expensive part in AMD IOMMU code and not strictly necessary. It is sufficient to do the flush before any entries are reused. This is patch implements lazy IO/TLB flushing which does exactly this. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index dcc81206739..dcc472445ff 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -196,6 +196,9 @@ struct dma_ops_domain { * just calculate its address in constant time. */ u64 **pte_pages; + + /* This will be set to true when TLB needs to be flushed */ + bool need_flush; }; /* -- cgit v1.2.3 From 335503e57b6b8de04cec5d27eb2c3d09ff98905b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2008 14:29:07 +0200 Subject: AMD IOMMU: add event buffer allocation This patch adds the allocation of a event buffer for each AMD IOMMU in the system. The hardware will log events like device page faults or other errors to this buffer once this is enabled. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index dcc472445ff..8b8cd0c60b3 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -116,6 +116,10 @@ #define MMIO_CMD_SIZE_SHIFT 56 #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) +/* constants for event buffer handling */ +#define EVT_BUFFER_SIZE 8192 /* 512 entries */ +#define EVT_LEN_MASK (0x9ULL << 56) + #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 #define PAGE_MODE_3_LEVEL 0x03 @@ -243,6 +247,11 @@ struct amd_iommu { /* size of command buffer */ u32 cmd_buf_size; + /* event buffer virtual address */ + u8 *evt_buf; + /* size of event buffer */ + u32 evt_buf_size; + /* if one, we need to send a completion wait command */ int need_sync; -- cgit v1.2.3 From ee893c24edb8ebab9a3fb66566855572579ad616 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Sep 2008 14:48:04 +0200 Subject: AMD IOMMU: save pci segment from ACPI tables This patch adds the pci_seg field to the amd_iommu structure and fills it with the corresponding value from the ACPI table. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 8b8cd0c60b3..20814b85bbc 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -232,6 +232,9 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* pci domain of this IOMMU */ + u16 pci_seg; + /* first device this IOMMU handles. read from PCI */ u16 first_device; /* last device this IOMMU handles. read from PCI */ -- cgit v1.2.3 From 3eaf28a1cd2686aaa185b54d5a5e18e91b41f7f2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Sep 2008 15:55:10 +0200 Subject: AMD IOMMU: save pci_dev instead of devid We need the pci_dev later anyways to enable MSI for the IOMMU hardware. So remove the devid pointing to the BDF and replace it with the pci_dev structure where the IOMMU is implemented. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 20814b85bbc..a5629a21557 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -215,8 +215,9 @@ struct amd_iommu { /* locks the accesses to the hardware */ spinlock_t lock; - /* device id of this IOMMU */ - u16 devid; + /* Pointer to PCI device of this IOMMU */ + struct pci_dev *dev; + /* * Capability pointer. There could be more than one IOMMU per PCI * device function if there are more than one AMD IOMMU capability -- cgit v1.2.3 From a80dc3e0e0dc8393158de317d66ae0f345dc58f9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 16:51:41 +0200 Subject: AMD IOMMU: add MSI interrupt support The AMD IOMMU can generate interrupts for various reasons. This patch adds the basic interrupt enabling infrastructure to the driver. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu.h | 3 +++ include/asm-x86/amd_iommu_types.h | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h index 30a12049353..2fd97cb250c 100644 --- a/include/asm-x86/amd_iommu.h +++ b/include/asm-x86/amd_iommu.h @@ -20,10 +20,13 @@ #ifndef _ASM_X86_AMD_IOMMU_H #define _ASM_X86_AMD_IOMMU_H +#include + #ifdef CONFIG_AMD_IOMMU extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); extern void amd_iommu_detect(void); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index a5629a21557..8533f09b34b 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -37,6 +37,7 @@ /* Capability offsets used by the driver */ #define MMIO_CAP_HDR_OFFSET 0x00 #define MMIO_RANGE_OFFSET 0x0c +#define MMIO_MISC_OFFSET 0x10 /* Masks, shifts and macros to parse the device range capability */ #define MMIO_RANGE_LD_MASK 0xff000000 @@ -48,6 +49,7 @@ #define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) #define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) #define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) +#define MMIO_MSI_NUM(x) ((x) & 0x1f) /* Flag masks for the AMD IOMMU exclusion range */ #define MMIO_EXCL_ENABLE_MASK 0x01ULL @@ -255,10 +257,15 @@ struct amd_iommu { u8 *evt_buf; /* size of event buffer */ u32 evt_buf_size; + /* MSI number for event interrupt */ + u16 evt_msi_num; /* if one, we need to send a completion wait command */ int need_sync; + /* true if interrupts for this IOMMU are already enabled */ + bool int_enabled; + /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; }; -- cgit v1.2.3 From 90008ee4b811c944455752dcb72b291a5ba81b53 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 16:41:05 +0200 Subject: AMD IOMMU: add event handling code This patch adds code for polling and printing out events generated by the AMD IOMMU. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 8533f09b34b..d8c5a6c6995 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -71,6 +71,25 @@ /* MMIO status bits */ #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 +/* event logging constants */ +#define EVENT_ENTRY_SIZE 0x10 +#define EVENT_TYPE_SHIFT 28 +#define EVENT_TYPE_MASK 0xf +#define EVENT_TYPE_ILL_DEV 0x1 +#define EVENT_TYPE_IO_FAULT 0x2 +#define EVENT_TYPE_DEV_TAB_ERR 0x3 +#define EVENT_TYPE_PAGE_TAB_ERR 0x4 +#define EVENT_TYPE_ILL_CMD 0x5 +#define EVENT_TYPE_CMD_HARD_ERR 0x6 +#define EVENT_TYPE_IOTLB_INV_TO 0x7 +#define EVENT_TYPE_INV_DEV_REQ 0x8 +#define EVENT_DEVID_MASK 0xffff +#define EVENT_DEVID_SHIFT 0 +#define EVENT_DOMID_MASK 0xffff +#define EVENT_DOMID_SHIFT 0 +#define EVENT_FLAGS_MASK 0xfff +#define EVENT_FLAGS_SHIFT 0x10 + /* feature control bits */ #define CONTROL_IOMMU_EN 0x00ULL #define CONTROL_HT_TUN_EN 0x01ULL @@ -165,6 +184,9 @@ #define MAX_DOMAIN_ID 65536 +/* FIXME: move this macro to */ +#define PCI_BUS(x) (((x) >> 8) & 0xff) + /* * This structure contains generic data for IOMMU protection domains * independent of their use. -- cgit v1.2.3 From bd60b735c658e6e8c656e89771d281bcfcf51279 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:24:48 +0200 Subject: AMD IOMMU: don't assign preallocated protection domains to devices In isolation mode the protection domains for the devices are preallocated and preassigned. This is bad if a device should be passed to a virtualization guest because the IOMMU code does not know if it is in use by a driver. This patch changes the code to assign the device to the preallocated domain only if there are dma mapping requests for it. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index d8c5a6c6995..9aa22ead22f 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -227,6 +227,12 @@ struct dma_ops_domain { /* This will be set to true when TLB needs to be flushed */ bool need_flush; + + /* + * if this is a preallocated domain, keep the device for which it was + * preallocated in this variable + */ + u16 target_dev; }; /* -- cgit v1.2.3 From 38ddf41b198e21d3ecbe5752e875857b7ce7589e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:38:32 +0200 Subject: AMD IOMMU: some set_device_domain cleanups Remove some magic numbers and split the pte_root using standard functions. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 9aa22ead22f..f953309a636 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -130,6 +130,8 @@ #define DEV_ENTRY_NMI_PASS 0xba #define DEV_ENTRY_LINT0_PASS 0xbe #define DEV_ENTRY_LINT1_PASS 0xbf +#define DEV_ENTRY_MODE_MASK 0x07 +#define DEV_ENTRY_MODE_SHIFT 0x09 /* constants to configure the command buffer */ #define CMD_BUFFER_SIZE 8192 @@ -159,6 +161,7 @@ #define IOMMU_MAP_SIZE_L3 (1ULL << 39) #define IOMMU_PTE_P (1ULL << 0) +#define IOMMU_PTE_TV (1ULL << 1) #define IOMMU_PTE_U (1ULL << 59) #define IOMMU_PTE_FC (1ULL << 60) #define IOMMU_PTE_IR (1ULL << 61) -- cgit v1.2.3 From b3e15bdef689641e7f1bb03efbe56112c3ee82e2 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 22 Sep 2008 13:13:59 -0400 Subject: x86, NMI watchdog: setup before enabling NMI watchdog There's a small window when NMI watchdog is being set up that if any NMIs are triggered, the NMI code will make make use of not initalized wd_ops elements: void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) return; /* cheap hack to support suspend/resume */ /* if cpu0 is not active neither should the other cpus */ if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) return; switch (nmi_watchdog) { case NMI_LOCAL_APIC: /* enable it before to avoid race with handler */ --> __get_cpu_var(wd_enabled) = 1; --> if (lapic_watchdog_init(nmi_hz) < 0) { (...) asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { (...) if (nmi_watchdog_tick(regs, reason)) return; (...) notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { (...) if (!__get_cpu_var(wd_enabled)) return rc; switch (nmi_watchdog) { case NMI_LOCAL_APIC: rc |= lapic_wd_event(nmi_hz); (...) int lapic_wd_event(unsigned nmi_hz) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; --> rdmsrl(wd->perfctr_msr, ctr); and wd->*_msr will be initialized on each processor type specific setup, after enabling NMIs for PMIs. Since the counter was just set, the chances of an performance counter generated NMI is minimal, but any other unknown NMI would trigger the problem. This patch fixes the problem by setting everything up before enabling performance counter generated NMIs and will set wd_enabled using a callback function. Signed-off-by: Aristeu Rozanski Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Vivek Goyal Signed-off-by: Ingo Molnar --- include/asm-x86/nmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 21f8d0202a8..02bfc81cbd6 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -34,6 +34,7 @@ extern void stop_apic_nmi_watchdog(void *); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); +extern void cpu_nmi_set_wd_enabled(void); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; -- cgit v1.2.3 From ed6dc4981368aa8ac89b0ea61535cfa2b03533cb Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 23 Sep 2008 01:15:10 +0900 Subject: x86: remove set_bit_string() "export iommu_area_reserve helper funciton" patch converted all the users of set_bit_string, GART, Calgary and AMD IOMMU drivers, to use iommu_area_reserve helper function. Now we can remove unused set_bit_string function. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/asm-x86/bitops.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index cfb2b64f76e..28ea8742b6e 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -424,16 +424,6 @@ static inline int fls(int x) #undef ADDR -static inline void set_bit_string(unsigned long *bitmap, - unsigned long i, int len) -{ - unsigned long end = i + len; - while (i < end) { - __set_bit(i, bitmap); - i++; - } -} - #ifdef __KERNEL__ #include -- cgit v1.2.3 From afa9fdc2f5f8e4d98f3e77bfa204412cbc181346 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 20 Sep 2008 01:23:30 +0900 Subject: iommu: remove fullflush and nofullflush in IOMMU generic option This patch against tip/x86/iommu virtually reverts 2842e5bf3115193f05dc9dac20f940e7abf44c1a. But just reverting the commit breaks AMD IOMMU so this patch also includes some fixes. The above commit adds new two options to x86 IOMMU generic kernel boot options, fullflush and nofullflush. But such change that affects all the IOMMUs needs more discussion (all IOMMU parties need the chance to discuss it): http://lkml.org/lkml/2008/9/19/106 Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/amd_iommu_types.h | 6 ++++++ include/asm-x86/iommu.h | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index f953309a636..4ff892f3b0a 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -376,6 +376,12 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap; /* will be 1 if device isolation is enabled */ extern int amd_iommu_isolate; +/* + * If true, the addresses will be flushed on unmap time, not when + * they are reused + */ +extern bool amd_iommu_unmap_flush; + /* takes a PCI device id and prints it out in a readable form */ static inline void print_devid(u16 devid, int nl) { diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 67b2fd56c6d..621a1af94c4 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,7 +7,6 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int dmar_disabled; -extern int iommu_fullflush; extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); -- cgit v1.2.3 From 4faac97d44ac27bdbb010a9c3597401a8f89341f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 18:54:29 +0200 Subject: x86: prevent stale state of c1e_mask across CPU offline/online Impact: hang which happens across CPU offline/online on AMD C1E systems. When a CPU goes offline then the corresponding bit in the broadcast mask is cleared. For AMD C1E enabled CPUs we do not reenable the broadcast when the CPU comes online again as we do not clear the corresponding bit in the c1e_mask, which keeps track which CPUs have been switched to broadcast already. So on those !$@#& machines we never switch back to broadcasting after a CPU offline/online cycle. Clear the bit when the CPU plays dead. Signed-off-by: Thomas Gleixner --- include/asm-x86/idle.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h index d240e5b30a4..cbb64912361 100644 --- a/include/asm-x86/idle.h +++ b/include/asm-x86/idle.h @@ -10,4 +10,6 @@ void idle_notifier_register(struct notifier_block *n); void enter_idle(void); void exit_idle(void); +void c1e_remove_cpu(int cpu); + #endif -- cgit v1.2.3 From a8d6829044901a67732904be5f1eacdf8539604f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 19:02:25 +0200 Subject: x86: prevent C-states hang on AMD C1E enabled machines Impact: System hang when AMD C1E machines switch into C2/C3 AMD C1E enabled systems do not work with normal ACPI C-states even if the BIOS is advertising them. Limit the C-states to C1 for the ACPI processor idle code. Signed-off-by: Thomas Gleixner --- include/asm-x86/acpi.h | 2 ++ include/asm-x86/cpufeature.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h index 635d764dc13..35d1743b57a 100644 --- a/include/asm-x86/acpi.h +++ b/include/asm-x86/acpi.h @@ -140,6 +140,8 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) boot_cpu_data.x86_model <= 0x05 && boot_cpu_data.x86_mask < 0x0A) return 1; + else if (boot_cpu_has(X86_FEATURE_AMDC1E)) + return 1; else return max_cstate; } diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 9489283a4bc..cfcfb0a806b 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -81,6 +81,7 @@ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ #define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ -- cgit v1.2.3 From 9f6ac57729724b58df81ca5dc005326759a806fe Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:35 +0900 Subject: x86: export pci-nommu's alloc_coherent This patch exports nommu_alloc_coherent (renamed dma_generic_alloc_coherent). GART needs this function. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- include/asm-x86/dma-mapping.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index f408e6dd177..3b808e9bb72 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -89,6 +89,9 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); +extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag); + static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, int direction) -- cgit v1.2.3 From 95dbf1dbe39ed336a3e72116c95cfa98dd3457e6 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 26 Sep 2008 10:36:42 -0500 Subject: kgdb, x86_64: gdb serial has BX and DX reversed The BX and DX registers in the gdb serial register packet need to be flipped for gdb to receive the correct data. Signed-off-by: Jason Wessel --- include/asm-x86/kgdb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h index 484c47554f3..e86b3060bdc 100644 --- a/include/asm-x86/kgdb.h +++ b/include/asm-x86/kgdb.h @@ -42,9 +42,9 @@ enum regnames { #else /* ! CONFIG_X86_32 */ enum regnames { GDB_AX, /* 0 */ - GDB_DX, /* 1 */ + GDB_BX, /* 1 */ GDB_CX, /* 2 */ - GDB_BX, /* 3 */ + GDB_DX, /* 3 */ GDB_SI, /* 4 */ GDB_DI, /* 5 */ GDB_BP, /* 6 */ -- cgit v1.2.3 From 703a1edcd1534468fc18f733c03bd91a65c8c6f0 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 26 Sep 2008 10:36:42 -0500 Subject: kgdb, x86_64: fix PS CS SS registers in gdb serial On x86_64 the gdb serial register structure defines the PS (also known as eflags), CS and SS registers as 4 bytes entities. This patch splits the x86_64 regnames enum into a 32 and 64 version to account for the 32 bit entities in the gdb serial packets. Also the program counter is properly filled in for the sleeping threads. Signed-off-by: Jason Wessel --- include/asm-x86/kgdb.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h index e86b3060bdc..94d63db1036 100644 --- a/include/asm-x86/kgdb.h +++ b/include/asm-x86/kgdb.h @@ -39,8 +39,9 @@ enum regnames { GDB_FS, /* 14 */ GDB_GS, /* 15 */ }; +#define NUMREGBYTES ((GDB_GS+1)*4) #else /* ! CONFIG_X86_32 */ -enum regnames { +enum regnames64 { GDB_AX, /* 0 */ GDB_BX, /* 1 */ GDB_CX, /* 2 */ @@ -58,18 +59,15 @@ enum regnames { GDB_R14, /* 14 */ GDB_R15, /* 15 */ GDB_PC, /* 16 */ - GDB_PS, /* 17 */ }; -#endif /* CONFIG_X86_32 */ -/* - * Number of bytes of registers: - */ -#ifdef CONFIG_X86_32 -# define NUMREGBYTES 64 -#else -# define NUMREGBYTES ((GDB_PS+1)*8) -#endif +enum regnames32 { + GDB_PS = 34, + GDB_CS, + GDB_SS, +}; +#define NUMREGBYTES ((GDB_SS+1)*4) +#endif /* CONFIG_X86_32 */ static inline void arch_kgdb_breakpoint(void) { -- cgit v1.2.3 From 237a62247c2879331986a300d6ab36ad21264c68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:53 +0200 Subject: x86/iommu: make GART driver checkpatch clean Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- include/asm-x86/gart.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 3f62a83887f..cdf4f78e081 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -29,6 +29,8 @@ extern int fix_aperture; #define AMD64_GARTCACHECTL 0x9c #define AMD64_GARTEN (1<<0) +extern int agp_amd64_init(void); + static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) { u32 tmp, ctl; -- cgit v1.2.3 From 9b1568458a3ef006361710dc12848aec891883b5 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Mon, 29 Sep 2008 14:52:03 -0400 Subject: x86, debug printouts: IOMMU setup failures should not be KERN_ERR The number of BIOSes that have an option to enable the IOMMU, or fix anything about its configuration, is vanishingly small. There's no good reason to punish quiet boot for this. Signed-off-by: Adam Jackson Signed-off-by: Ingo Molnar --- include/asm-x86/gart.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 3f62a83887f..583031cf45f 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -52,15 +52,15 @@ static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) return 0; if (aper_base + aper_size > 0x100000000ULL) { - printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); + printk(KERN_INFO "Aperture beyond 4GB. Ignoring.\n"); return 0; } if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); + printk(KERN_INFO "Aperture pointing to e820 RAM. Ignoring.\n"); return 0; } if (aper_size < min_size) { - printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n", + printk(KERN_INFO "Aperture too small (%d MB) than (%d MB)\n", aper_size>>20, min_size>>20); return 0; } -- cgit v1.2.3 From 16dbc6c9616363fe53811abcbd935336dc0a0f01 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 2 Oct 2008 14:50:12 -0700 Subject: inotify: fix lock ordering wrt do_page_fault's mmap_sem Fix inotify lock order reversal with mmap_sem due to holding locks over copy_to_user. Signed-off-by: Nick Piggin Reported-by: "Daniel J Blueman" Tested-by: "Daniel J Blueman" Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-x86/uaccess_64.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index 515d4dce96b..45806d60bcb 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -7,6 +7,7 @@ #include #include #include +#include #include /* -- cgit v1.2.3 From b2bc27314664c4d1a2f02e6f4cd0c32e4681d61e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Sep 2008 14:00:36 -0700 Subject: x86, cpa: rename PTE attribute macros for kernel direct mapping in early boot Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- include/asm-x86/pgtable.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index eccd52406ab..0ff73e7737a 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -132,6 +132,17 @@ #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC +/* + * early identity mapping pte attrib macros. + */ +#ifdef CONFIG_X86_64 +#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC +#else +#define PTE_IDENT_ATTR 0x007 /* PRESENT+RW+USER */ +#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ +#endif + #ifndef __ASSEMBLY__ /* -- cgit v1.2.3 From 3a85e770aa77e4f1a4096275c97b64c10cd7323e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Sep 2008 14:00:37 -0700 Subject: x86, cpa: remove USER permission from the very early identity mapping attribute remove USER from the PTE/PDE attributes for the very early identity mapping. We overwrite these mappings with KERNEL attribute later in the boot. Just being paranoid here as there is no need for USER bit to be set. If this breaks something(don't know the history), then we can simply drop this change. Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- include/asm-x86/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 0ff73e7737a..bbf0f591d1b 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -138,8 +138,8 @@ #ifdef CONFIG_X86_64 #define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC #else -#define PTE_IDENT_ATTR 0x007 /* PRESENT+RW+USER */ -#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ +#define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ #endif -- cgit v1.2.3 From 8311eb84bf842d345f543f4c62ca2b6ea26f638c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Sep 2008 14:00:41 -0700 Subject: x86, cpa: remove cpa pool code Interrupt context no longer splits large page in cpa(). So we can do away with cpa memory pool code. Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- include/asm-x86/cacheflush.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/asm-x86') diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index 0a5f71817b3..8e205a13125 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -99,8 +99,6 @@ int set_pages_rw(struct page *page, int numpages); void clflush_cache_range(void *addr, unsigned int size); -void cpa_init(void); - #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); extern const int rodata_test_data; -- cgit v1.2.3 From 9542ada803198e6eba29d3289abb39ea82047b92 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 24 Sep 2008 08:53:33 -0700 Subject: x86: track memtype for RAM in page struct Track the memtype for RAM pages in page struct instead of using the memtype list. This avoids the explosion in the number of entries in memtype list (of the order of 20,000 with AGP) and makes the PAT tracking simpler. We are using PG_arch_1 bit in page->flags. We still use the memtype list for non RAM pages. Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- include/asm-x86/cacheflush.h | 2 ++ include/asm-x86/page.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/asm-x86') diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index 8e205a13125..092b9b4eb00 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -24,6 +24,8 @@ #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy((dst), (src), (len)) +#define PG_non_WB PG_arch_1 +PAGEFLAG(NonWB, non_WB) /* * The set_memory_* API can be used to change various attributes of a virtual diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index 49982110e4d..3407ac12ba3 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -57,6 +57,7 @@ typedef struct { pgdval_t pgd; } pgd_t; typedef struct { pgprotval_t pgprot; } pgprot_t; extern int page_is_ram(unsigned long pagenr); +extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot); -- cgit v1.2.3