From 93fa7636dfdc059b25df148f230c0991096afdef Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 8 Apr 2008 11:01:58 +0200 Subject: x86, ptrace: PEBS support Polish the ds.h interface and add support for PEBS. Ds.c is meant to be the resource allocator for per-thread and per-cpu BTS and PEBS recording. It is used by ptrace/utrace to provide execution tracing of debugged tasks. It will be used by profilers (e.g. perfmon2). It may be used by kernel debuggers to provide a kernel execution trace. Changes in detail: - guard DS and ptrace by CONFIG macros - separate DS and BTS more clearly - simplify field accesses - add functions to manage PEBS buffers - add simple protection/allocation mechanism - added support for Atom Opens: - buffer overflow handling Currently, only circular buffers are supported. This is all we need for debugging. Profilers would want an overflow notification. This is planned to be added when perfmon2 is made to use the ds.h interface. - utrace intermediate layer Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 3 +- arch/x86/kernel/ds.c | 953 ++++++++++++++++++++++++++++++------------- arch/x86/kernel/process_32.c | 25 +- arch/x86/kernel/process_64.c | 25 +- arch/x86/kernel/ptrace.c | 444 ++++++++++++-------- arch/x86/kernel/setup_64.c | 3 +- 6 files changed, 991 insertions(+), 462 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fe9224c51d3..cbffa2a25a1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); } static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8ec48..5b32b6d062b 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -2,26 +2,48 @@ * Debug Store support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger , Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger , 2007-2008 */ + +#ifdef CONFIG_X86_DS + #include #include #include #include +#include + + +/* + * The configuration for a particular DS hardware implementation. + */ +struct ds_configuration { + /* the size of the DS structure in bytes */ + unsigned char sizeof_ds; + /* the size of one pointer-typed field in the DS structure in bytes; + this covers the first 8 fields related to buffer management. */ + unsigned char sizeof_field; + /* the size of a BTS/PEBS record in bytes */ + unsigned char sizeof_rec[2]; +}; +static struct ds_configuration ds_cfg; /* @@ -44,378 +66,747 @@ * (interrupt occurs when write pointer passes interrupt pointer) * - value to which counter is reset following counter overflow * - * On later architectures, the last branch recording hardware uses - * 64bit pointers even in 32bit mode. - * - * - * Branch Trace Store (BTS) records store information about control - * flow changes. They at least provide the following information: - * - source linear address - * - destination linear address + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. * - * Netburst supported a predicated bit that had been dropped in later - * architectures. We do not suppor it. * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * - an offset giving the start of the respective region * - * In order to abstract from the actual DS and BTS layout, we describe - * the access to the relevant fields. - * Thanks to Andi Kleen for proposing this design. + * This offset is further used to index various arrays holding + * information for BTS and PEBS at the respective index. * - * The implementation, however, is not as general as it might seem. In - * order to stay somewhat simple and efficient, we assume an - * underlying unsigned type (mostly a pointer type) and we expect the - * field to be at least as big as that type. + * On later 32bit processors, we only access the lower 32bit of the + * 64bit pointer fields. The upper halves will be zeroed out. */ -/* - * A special from_ip address to indicate that the BTS record is an - * info record that needs to be interpreted or skipped. - */ -#define BTS_ESCAPE_ADDRESS (-1) +enum ds_field { + ds_buffer_base = 0, + ds_index, + ds_absolute_maximum, + ds_interrupt_threshold, +}; -/* - * A field access descriptor - */ -struct access_desc { - unsigned char offset; - unsigned char size; +enum ds_qualifier { + ds_bts = 0, + ds_pebs }; +static inline unsigned long ds_get(const unsigned char *base, + enum ds_qualifier qual, enum ds_field field) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + return *(unsigned long *)base; +} + +static inline void ds_set(unsigned char *base, enum ds_qualifier qual, + enum ds_field field, unsigned long value) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + (*(unsigned long *)base) = value; +} + + /* - * The configuration for a particular DS/BTS hardware implementation. + * Locking is done only for allocating BTS or PEBS resources and for + * guarding context and buffer memory allocation. + * + * Most functions require the current task to own the ds context part + * they are going to access. All the locking is done when validating + * access to the context. */ -struct ds_configuration { - /* the DS configuration */ - unsigned char sizeof_ds; - struct access_desc bts_buffer_base; - struct access_desc bts_index; - struct access_desc bts_absolute_maximum; - struct access_desc bts_interrupt_threshold; - /* the BTS configuration */ - unsigned char sizeof_bts; - struct access_desc from_ip; - struct access_desc to_ip; - /* BTS variants used to store additional information like - timestamps */ - struct access_desc info_type; - struct access_desc info_data; - unsigned long debugctl_mask; -}; +static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); /* - * The global configuration used by the below accessor functions + * Validate that the current task is allowed to access the BTS/PEBS + * buffer of the parameter task. + * + * Returns 0, if access is granted; -Eerrno, otherwise. */ -static struct ds_configuration ds_cfg; +static inline int ds_validate_access(struct ds_context *context, + enum ds_qualifier qual) +{ + if (!context) + return -EPERM; + + if (context->owner[qual] == current) + return 0; + + return -EPERM; +} + /* - * Accessor functions for some DS and BTS fields using the above - * global ptrace_bts_cfg. + * We either support (system-wide) per-cpu or per-thread allocation. + * We distinguish the two based on the task_struct pointer, where a + * NULL pointer indicates per-cpu allocation for the current cpu. + * + * Allocations are use-counted. As soon as resources are allocated, + * further allocations must be of the same type (per-cpu or + * per-thread). We model this by counting allocations (i.e. the number + * of tracers of a certain type) for one type negatively: + * =0 no tracers + * >0 number of per-thread tracers + * <0 number of per-cpu tracers + * + * The below functions to get and put tracers and to check the + * allocation type require the ds_lock to be held by the caller. + * + * Tracers essentially gives the number of ds contexts for a certain + * type of allocation. */ -static inline unsigned long get_bts_buffer_base(char *base) +static long tracers; + +static inline void get_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); + tracers += (task ? 1 : -1); } -static inline void set_bts_buffer_base(char *base, unsigned long value) + +static inline void put_tracer(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; + tracers -= (task ? 1 : -1); } -static inline unsigned long get_bts_index(char *base) + +static inline int check_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_index.offset); + return (task ? (tracers >= 0) : (tracers <= 0)); } -static inline void set_bts_index(char *base, unsigned long value) + + +/* + * The DS context is either attached to a thread or to a cpu: + * - in the former case, the thread_struct contains a pointer to the + * attached context. + * - in the latter case, we use a static array of per-cpu context + * pointers. + * + * Contexts are use-counted. They are allocated on first access and + * deallocated when the last user puts the context. + * + * We distinguish between an allocating and a non-allocating get of a + * context: + * - the allocating get is used for requesting BTS/PEBS resources. It + * requires the caller to hold the global ds_lock. + * - the non-allocating get is used for all other cases. A + * non-existing context indicates an error. It acquires and releases + * the ds_lock itself for obtaining the context. + * + * A context and its DS configuration are allocated and deallocated + * together. A context always has a DS configuration of the + * appropriate size. + */ +static DEFINE_PER_CPU(struct ds_context *, system_context); + +#define this_system_context per_cpu(system_context, smp_processor_id()) + +/* + * Returns the pointer to the parameter task's context or to the + * system-wide context, if task is NULL. + * + * Increases the use count of the returned context, if not NULL. + */ +static inline struct ds_context *ds_get_context(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; + struct ds_context *context; + + spin_lock(&ds_lock); + + context = (task ? task->thread.ds_ctx : this_system_context); + if (context) + context->count++; + + spin_unlock(&ds_lock); + + return context; } -static inline unsigned long get_bts_absolute_maximum(char *base) + +/* + * Same as ds_get_context, but allocates the context and it's DS + * structure, if necessary; returns NULL; if out of memory. + * + * pre: requires ds_lock to be held + */ +static inline struct ds_context *ds_alloc_context(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); + struct ds_context **p_context = + (task ? &task->thread.ds_ctx : &this_system_context); + struct ds_context *context = *p_context; + + if (!context) { + context = kzalloc(sizeof(*context), GFP_KERNEL); + + if (!context) + return 0; + + context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + if (!context->ds) { + kfree(context); + return 0; + } + + *p_context = context; + + context->this = p_context; + context->task = task; + + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + + if (!task || (task == current)) + wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + + get_tracer(task); + } + + context->count++; + + return context; } -static inline void set_bts_absolute_maximum(char *base, unsigned long value) + +/* + * Decreases the use count of the parameter context, if not NULL. + * Deallocates the context, if the use count reaches zero. + */ +static inline void ds_put_context(struct ds_context *context) { - (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; + if (!context) + return; + + spin_lock(&ds_lock); + + if (--context->count) + goto out; + + *(context->this) = 0; + + if (context->task) + clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + + if (!context->task || (context->task == current)) + wrmsrl(MSR_IA32_DS_AREA, 0); + + put_tracer(context->task); + + /* free any leftover buffers from tracers that did not + * deallocate them properly. */ + kfree(context->buffer[ds_bts]); + kfree(context->buffer[ds_pebs]); + kfree(context->ds); + kfree(context); + out: + spin_unlock(&ds_lock); } -static inline unsigned long get_bts_interrupt_threshold(char *base) + + +/* + * Handle a buffer overflow + * + * task: the task whose buffers are overflowing; + * NULL for a buffer overflow on the current cpu + * context: the ds context + * qual: the buffer type + */ +static void ds_overflow(struct task_struct *task, struct ds_context *context, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); + if (!context) + return; + + if (context->callback[qual]) + (*context->callback[qual])(task); + + /* todo: do some more overflow handling */ } -static inline void set_bts_interrupt_threshold(char *base, unsigned long value) + + +/* + * Allocate a non-pageable buffer of the parameter size. + * Checks the memory and the locked memory rlimit. + * + * Returns the buffer, if successful; + * NULL, if out of memory or rlimit exceeded. + * + * size: the requested buffer size in bytes + * pages (out): if not NULL, contains the number of pages reserved + */ +static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) { - (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; + unsigned long rlim, vm, pgsz; + void *buffer; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + return 0; + + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + return 0; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + return 0; + + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + if (pages) + *pages = pgsz; + + return buffer; } -static inline unsigned long get_from_ip(char *base) + +static int ds_request(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl, enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.from_ip.offset); + struct ds_context *context; + unsigned long buffer, adj; + const unsigned long alignment = (1 << 3); + int error = 0; + + if (!ds_cfg.sizeof_ds) + return -EOPNOTSUPP; + + /* we require some space to do alignment adjustments below */ + if (size < (alignment + ds_cfg.sizeof_rec[qual])) + return -EINVAL; + + /* buffer overflow notification is not yet implemented */ + if (ovfl) + return -EOPNOTSUPP; + + + spin_lock(&ds_lock); + + if (!check_tracer(task)) + return -EPERM; + + error = -ENOMEM; + context = ds_alloc_context(task); + if (!context) + goto out_unlock; + + error = -EALREADY; + if (context->owner[qual] == current) + goto out_unlock; + error = -EPERM; + if (context->owner[qual] != 0) + goto out_unlock; + context->owner[qual] = current; + + spin_unlock(&ds_lock); + + + error = -ENOMEM; + if (!base) { + base = ds_allocate_buffer(size, &context->pages[qual]); + if (!base) + goto out_release; + + context->buffer[qual] = base; + } + error = 0; + + context->callback[qual] = ovfl; + + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; + + adj = ALIGN(buffer, alignment) - buffer; + buffer += adj; + size -= adj; + + size /= ds_cfg.sizeof_rec[qual]; + size *= ds_cfg.sizeof_rec[qual]; + + ds_set(context->ds, qual, ds_buffer_base, buffer); + ds_set(context->ds, qual, ds_index, buffer); + ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + + if (ovfl) { + /* todo: select a suitable interrupt threshold */ + } else + ds_set(context->ds, qual, + ds_interrupt_threshold, buffer + size + 1); + + /* we keep the context until ds_release */ + return error; + + out_release: + context->owner[qual] = 0; + ds_put_context(context); + return error; + + out_unlock: + spin_unlock(&ds_lock); + ds_put_context(context); + return error; } -static inline void set_from_ip(char *base, unsigned long value) + +int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; + return ds_request(task, base, size, ovfl, ds_bts); } -static inline unsigned long get_to_ip(char *base) + +int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - return *(unsigned long *)(base + ds_cfg.to_ip.offset); + return ds_request(task, base, size, ovfl, ds_pebs); } -static inline void set_to_ip(char *base, unsigned long value) + +static int ds_release(struct task_struct *task, enum ds_qualifier qual) { - (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; + struct ds_context *context; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + kfree(context->buffer[qual]); + context->buffer[qual] = 0; + + current->mm->total_vm -= context->pages[qual]; + current->mm->locked_vm -= context->pages[qual]; + context->pages[qual] = 0; + context->owner[qual] = 0; + + /* + * we put the context twice: + * once for the ds_get_context + * once for the corresponding ds_request + */ + ds_put_context(context); + out: + ds_put_context(context); + return error; } -static inline unsigned char get_info_type(char *base) + +int ds_release_bts(struct task_struct *task) { - return *(unsigned char *)(base + ds_cfg.info_type.offset); + return ds_release(task, ds_bts); } -static inline void set_info_type(char *base, unsigned char value) + +int ds_release_pebs(struct task_struct *task) { - (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; + return ds_release(task, ds_pebs); } -static inline unsigned long get_info_data(char *base) + +static int ds_get_index(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.info_data.offset); + struct ds_context *context; + unsigned long base, index; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + + error = ((index - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; } -static inline void set_info_data(char *base, unsigned long value) + +int ds_get_bts_index(struct task_struct *task, size_t *pos) { - (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; + return ds_get_index(task, pos, ds_bts); } +int ds_get_pebs_index(struct task_struct *task, size_t *pos) +{ + return ds_get_index(task, pos, ds_pebs); +} -int ds_allocate(void **dsp, size_t bts_size_in_bytes) +static int ds_get_end(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - size_t bts_size_in_records; - unsigned long bts; - void *ds; + struct ds_context *context; + unsigned long base, end; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); + + error = ((end - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +int ds_get_bts_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_bts); +} - if (bts_size_in_bytes < 0) - return -EINVAL; +int ds_get_pebs_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_pebs); +} - bts_size_in_records = - bts_size_in_bytes / ds_cfg.sizeof_bts; - bts_size_in_bytes = - bts_size_in_records * ds_cfg.sizeof_bts; +static int ds_access(struct task_struct *task, size_t index, + const void **record, enum ds_qualifier qual) +{ + struct ds_context *context; + unsigned long base, idx; + int error; - if (bts_size_in_bytes <= 0) + if (!record) return -EINVAL; - bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (!bts) - return -ENOMEM; + base = ds_get(context->ds, qual, ds_buffer_base); + idx = base + (index * ds_cfg.sizeof_rec[qual]); - ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + error = -EINVAL; + if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) + goto out; - if (!ds) { - kfree((void *)bts); - return -ENOMEM; - } - - set_bts_buffer_base(ds, bts); - set_bts_index(ds, bts); - set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); - set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); - - *dsp = ds; - return 0; + *record = (const void *)idx; + error = ds_cfg.sizeof_rec[qual]; + out: + ds_put_context(context); + return error; } -int ds_free(void **dsp) +int ds_access_bts(struct task_struct *task, size_t index, const void **record) { - if (*dsp) { - kfree((void *)get_bts_buffer_base(*dsp)); - kfree(*dsp); - *dsp = NULL; - } - return 0; + return ds_access(task, index, record, ds_bts); } -int ds_get_bts_size(void *ds) +int ds_access_pebs(struct task_struct *task, size_t index, const void **record) { - int size_in_bytes; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (!ds) - return 0; - - size_in_bytes = - get_bts_absolute_maximum(ds) - - get_bts_buffer_base(ds); - return size_in_bytes; + return ds_access(task, index, record, ds_pebs); } -int ds_get_bts_end(void *ds) +static int ds_write(struct task_struct *task, const void *record, size_t size, + enum ds_qualifier qual, int force) { - int size_in_bytes = ds_get_bts_size(ds); - - if (size_in_bytes <= 0) - return size_in_bytes; + struct ds_context *context; + int error; - return size_in_bytes / ds_cfg.sizeof_bts; -} + if (!record) + return -EINVAL; -int ds_get_bts_index(void *ds) -{ - int index_offset_in_bytes; + error = -EPERM; + context = ds_get_context(task); + if (!context) + goto out; - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + if (!force) { + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + } - index_offset_in_bytes = - get_bts_index(ds) - - get_bts_buffer_base(ds); + error = 0; + while (size) { + unsigned long base, index, end, write_end, int_th; + unsigned long write_size, adj_write_size; + + /* + * write as much as possible without producing an + * overflow interrupt. + * + * interrupt_threshold must either be + * - bigger than absolute_maximum or + * - point to a record between buffer_base and absolute_maximum + * + * index points to a valid record. + */ + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + end = ds_get(context->ds, qual, ds_absolute_maximum); + int_th = ds_get(context->ds, qual, ds_interrupt_threshold); + + write_end = min(end, int_th); + + /* if we are already beyond the interrupt threshold, + * we fill the entire buffer */ + if (write_end <= index) + write_end = end; + + if (write_end <= index) + goto out; + + write_size = min((unsigned long) size, write_end - index); + memcpy((void *)index, record, write_size); + + record = (const char *)record + write_size; + size -= write_size; + error += write_size; + + adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; + adj_write_size *= ds_cfg.sizeof_rec[qual]; + + /* zero out trailing bytes */ + memset((char *)index + write_size, 0, + adj_write_size - write_size); + index += adj_write_size; + + if (index >= end) + index = base; + ds_set(context->ds, qual, ds_index, index); + + if (index >= int_th) + ds_overflow(task, context, qual); + } - return index_offset_in_bytes / ds_cfg.sizeof_bts; + out: + ds_put_context(context); + return error; } -int ds_set_overflow(void *ds, int method) +int ds_write_bts(struct task_struct *task, const void *record, size_t size) { - switch (method) { - case DS_O_SIGNAL: - return -EOPNOTSUPP; - case DS_O_WRAP: - return 0; - default: - return -EINVAL; - } + return ds_write(task, record, size, ds_bts, /* force = */ 0); } -int ds_get_overflow(void *ds) +int ds_write_pebs(struct task_struct *task, const void *record, size_t size) { - return DS_O_WRAP; + return ds_write(task, record, size, ds_pebs, /* force = */ 0); } -int ds_clear(void *ds) +int ds_unchecked_write_bts(struct task_struct *task, + const void *record, size_t size) { - int bts_size = ds_get_bts_size(ds); - unsigned long bts_base; - - if (bts_size <= 0) - return bts_size; - - bts_base = get_bts_buffer_base(ds); - memset((void *)bts_base, 0, bts_size); - - set_bts_index(ds, bts_base); - return 0; + return ds_write(task, record, size, ds_bts, /* force = */ 1); } -int ds_read_bts(void *ds, int index, struct bts_struct *out) +int ds_unchecked_write_pebs(struct task_struct *task, + const void *record, size_t size) { - void *bts; + return ds_write(task, record, size, ds_pebs, /* force = */ 1); +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +static int ds_reset_or_clear(struct task_struct *task, + enum ds_qualifier qual, int clear) +{ + struct ds_context *context; + unsigned long base, end; + int error; - if (index < 0) - return -EINVAL; + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (index >= ds_get_bts_size(ds)) - return -EINVAL; + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); - bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); + if (clear) + memset((void *)base, 0, end - base); - memset(out, 0, sizeof(*out)); - if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { - out->qualifier = get_info_type(bts); - out->variant.jiffies = get_info_data(bts); - } else { - out->qualifier = BTS_BRANCH; - out->variant.lbr.from_ip = get_from_ip(bts); - out->variant.lbr.to_ip = get_to_ip(bts); - } + ds_set(context->ds, qual, ds_index, base); - return sizeof(*out);; + error = 0; + out: + ds_put_context(context); + return error; } -int ds_write_bts(void *ds, const struct bts_struct *in) +int ds_reset_bts(struct task_struct *task) { - unsigned long bts; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (ds_get_bts_size(ds) <= 0) - return -ENXIO; + return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); +} - bts = get_bts_index(ds); +int ds_reset_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); +} - memset((void *)bts, 0, ds_cfg.sizeof_bts); - switch (in->qualifier) { - case BTS_INVALID: - break; +int ds_clear_bts(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); +} - case BTS_BRANCH: - set_from_ip((void *)bts, in->variant.lbr.from_ip); - set_to_ip((void *)bts, in->variant.lbr.to_ip); - break; +int ds_clear_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); +} - case BTS_TASK_ARRIVES: - case BTS_TASK_DEPARTS: - set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); - set_info_type((void *)bts, in->qualifier); - set_info_data((void *)bts, in->variant.jiffies); - break; +int ds_get_pebs_reset(struct task_struct *task, u64 *value) +{ + struct ds_context *context; + int error; - default: + if (!value) return -EINVAL; - } - bts = bts + ds_cfg.sizeof_bts; - if (bts >= get_bts_absolute_maximum(ds)) - bts = get_bts_buffer_base(ds); - set_bts_index(ds, bts); + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; - return ds_cfg.sizeof_bts; + *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); + + error = 0; + out: + ds_put_context(context); + return error; } -unsigned long ds_debugctl_mask(void) +int ds_set_pebs_reset(struct task_struct *task, u64 value) { - return ds_cfg.debugctl_mask; -} + struct ds_context *context; + int error; -#ifdef __i386__ -static const struct ds_configuration ds_cfg_netburst = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<2)|(1<<3) -}; + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; -static const struct ds_configuration ds_cfg_pentium_m = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<6)|(1<<7) + *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; + + error = 0; + out: + ds_put_context(context); + return error; +} + +static const struct ds_configuration ds_cfg_var = { + .sizeof_ds = sizeof(long) * 12, + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, + .sizeof_rec[ds_pebs] = sizeof(long) * 10 }; -#endif /* _i386_ */ - -static const struct ds_configuration ds_cfg_core2 = { - .sizeof_ds = 9 * 8, - .bts_buffer_base = { 0, 8 }, - .bts_index = { 8, 8 }, - .bts_absolute_maximum = { 16, 8 }, - .bts_interrupt_threshold = { 24, 8 }, - .sizeof_bts = 3 * 8, - .from_ip = { 0, 8 }, - .to_ip = { 8, 8 }, - .info_type = { 8, 1 }, - .info_data = { 16, 8 }, - .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +static const struct ds_configuration ds_cfg_64 = { + .sizeof_ds = 8 * 12, + .sizeof_field = 8, + .sizeof_rec[ds_bts] = 8 * 3, + .sizeof_rec[ds_pebs] = 8 * 10 }; static inline void @@ -429,14 +820,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { -#ifdef __i386__ case 0xD: case 0xE: /* Pentium M */ - ds_configure(&ds_cfg_pentium_m); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ case 0xF: /* Core2 */ - ds_configure(&ds_cfg_core2); + case 0x1C: /* Atom */ + ds_configure(&ds_cfg_64); break; default: /* sorry, don't know about them */ @@ -445,13 +835,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; case 0xF: switch (c->x86_model) { -#ifdef __i386__ case 0x0: case 0x1: case 0x2: /* Netburst */ - ds_configure(&ds_cfg_netburst); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ default: /* sorry, don't know about them */ break; @@ -462,3 +850,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; } } + +void ds_free(struct ds_context *context) +{ + /* This is called when the task owning the parameter context + * is dying. There should not be any user of that context left + * to disturb us, anymore. */ + unsigned long leftovers = context->count; + while (leftovers--) + ds_put_context(context); +} +#endif /* CONFIG_X86_DS */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60..5cec8a75a4e 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -316,6 +316,14 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(current->thread.ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(current->thread.ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -482,18 +490,27 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, { struct thread_struct *prev, *next; unsigned long debugctl; + unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread; next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { + +#ifdef CONFIG_X86_DS + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; update_debugctlmsr(0); - wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); + wrmsr(MSR_IA32_DS_AREA, ds_next, 0); } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -517,13 +534,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988..ad213494a22 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -267,6 +267,14 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(t->ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(t->ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -492,18 +500,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, { struct thread_struct *prev, *next; unsigned long debugctl; + unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread, next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { + +#ifdef CONFIG_X86_DS + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { /* we clear debugctl to make sure DS * is not in use when we change it */ debugctl = 0; update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); + wrmsrl(MSR_IA32_DS_AREA, ds_next); } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -541,13 +558,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ } /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fb03ef380f0..b7ff783dc5f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child, return 0; } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS +/* + * The configuration for a particular BTS hardware implementation. + */ +struct bts_configuration { + /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ + unsigned char sizeof_bts; + /* the size of a field in the BTS record in bytes */ + unsigned char sizeof_field; + /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ + unsigned long debugctl_mask; +}; +static struct bts_configuration bts_cfg; + +#define BTS_MAX_RECORD_SIZE (8 * 3) + + +/* + * Branch Trace Store (BTS) uses the following format. Different + * architectures vary in the size of those fields. + * - source linear address + * - destination linear address + * - flags + * + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. + * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * + * In order to store additional information in the BTS buffer, we use + * a special source address to indicate that the record requires + * special interpretation. + * + * Netburst indicated via a bit in the flags field whether the branch + * was predicted; this is ignored. + */ -static int ptrace_bts_get_size(struct task_struct *child) +enum bts_field { + bts_from = 0, + bts_to, + bts_flags, + + bts_escape = (unsigned long)-1, + bts_qual = bts_to, + bts_jiffies = bts_flags +}; + +static inline unsigned long bts_get(const char *base, enum bts_field field) { - if (!child->thread.ds_area_msr) - return -ENXIO; + base += (bts_cfg.sizeof_field * field); + return *(unsigned long *)base; +} - return ds_get_bts_index((void *)child->thread.ds_area_msr); +static inline void bts_set(char *base, enum bts_field field, unsigned long val) +{ + base += (bts_cfg.sizeof_field * field);; + (*(unsigned long *)base) = val; } -static int ptrace_bts_read_record(struct task_struct *child, - long index, +/* + * Translate a BTS record from the raw format into the bts_struct format + * + * out (out): bts_struct interpretation + * raw: raw BTS record + */ +static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) +{ + memset(out, 0, sizeof(*out)); + if (bts_get(raw, bts_from) == bts_escape) { + out->qualifier = bts_get(raw, bts_qual); + out->variant.jiffies = bts_get(raw, bts_jiffies); + } else { + out->qualifier = BTS_BRANCH; + out->variant.lbr.from_ip = bts_get(raw, bts_from); + out->variant.lbr.to_ip = bts_get(raw, bts_to); + } +} + +static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { struct bts_struct ret; - int retval; - int bts_end; - int bts_index; - - if (!child->thread.ds_area_msr) - return -ENXIO; + const void *bts_record; + size_t bts_index, bts_end; + int error; - if (index < 0) - return -EINVAL; + error = ds_get_bts_end(child, &bts_end); + if (error < 0) + return error; - bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); if (bts_end <= index) return -EINVAL; + error = ds_get_bts_index(child, &bts_index); + if (error < 0) + return error; + /* translate the ptrace bts index into the ds bts index */ - bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); - bts_index -= (index + 1); - if (bts_index < 0) - bts_index += bts_end; + bts_index += bts_end - (index + 1); + if (bts_end <= bts_index) + bts_index -= bts_end; - retval = ds_read_bts((void *)child->thread.ds_area_msr, - bts_index, &ret); - if (retval < 0) - return retval; + error = ds_access_bts(child, bts_index, &bts_record); + if (error < 0) + return error; + + ptrace_bts_translate_record(&ret, bts_record); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; @@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child, return sizeof(ret); } -static int ptrace_bts_clear(struct task_struct *child) -{ - if (!child->thread.ds_area_msr) - return -ENXIO; - - return ds_clear((void *)child->thread.ds_area_msr); -} - static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { - int end, i; - void *ds = (void *)child->thread.ds_area_msr; - - if (!ds) - return -ENXIO; + struct bts_struct ret; + const unsigned char *raw; + size_t end, i; + int error; - end = ds_get_bts_index(ds); - if (end <= 0) - return end; + error = ds_get_bts_index(child, &end); + if (error < 0) + return error; if (size < (end * sizeof(struct bts_struct))) return -EIO; - for (i = 0; i < end; i++, out++) { - struct bts_struct ret; - int retval; + error = ds_access_bts(child, 0, (const void **)&raw); + if (error < 0) + return error; - retval = ds_read_bts(ds, i, &ret); - if (retval < 0) - return retval; + for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { + ptrace_bts_translate_record(&ret, raw); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; } - ds_clear(ds); + error = ds_clear_bts(child); + if (error < 0) + return error; return end; } +static void ptrace_bts_ovfl(struct task_struct *child) +{ + send_sig(child->thread.bts_ovfl_signal, child, 0); +} + static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) { struct ptrace_bts_config cfg; - int bts_size, ret = 0; - void *ds; + int error = 0; + + error = -EOPNOTSUPP; + if (!bts_cfg.sizeof_bts) + goto errout; + error = -EIO; if (cfg_size < sizeof(cfg)) - return -EIO; + goto errout; + error = -EFAULT; if (copy_from_user(&cfg, ucfg, sizeof(cfg))) - return -EFAULT; + goto errout; - if ((int)cfg.size < 0) - return -EINVAL; + error = -EINVAL; + if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && + !(cfg.flags & PTRACE_BTS_O_ALLOC)) + goto errout; - bts_size = 0; - ds = (void *)child->thread.ds_area_msr; - if (ds) { - bts_size = ds_get_bts_size(ds); - if (bts_size < 0) - return bts_size; - } - cfg.size = PAGE_ALIGN(cfg.size); + if (cfg.flags & PTRACE_BTS_O_ALLOC) { + ds_ovfl_callback_t ovfl = 0; + unsigned int sig = 0; - if (bts_size != cfg.size) { - ret = ptrace_bts_realloc(child, cfg.size, - cfg.flags & PTRACE_BTS_O_CUT_SIZE); - if (ret < 0) + /* we ignore the error in case we were not tracing child */ + (void)ds_release_bts(child); + + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { + if (!cfg.signal) + goto errout; + + sig = cfg.signal; + ovfl = ptrace_bts_ovfl; + } + + error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl); + if (error < 0) goto errout; - ds = (void *)child->thread.ds_area_msr; + child->thread.bts_ovfl_signal = sig; } - if (cfg.flags & PTRACE_BTS_O_SIGNAL) - ret = ds_set_overflow(ds, DS_O_SIGNAL); - else - ret = ds_set_overflow(ds, DS_O_WRAP); - if (ret < 0) + error = -EINVAL; + if (!child->thread.ds_ctx && cfg.flags) goto errout; if (cfg.flags & PTRACE_BTS_O_TRACE) - child->thread.debugctlmsr |= ds_debugctl_mask(); + child->thread.debugctlmsr |= bts_cfg.debugctl_mask; else - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; if (cfg.flags & PTRACE_BTS_O_SCHED) set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); else clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - ret = sizeof(cfg); + error = sizeof(cfg); out: if (child->thread.debugctlmsr) @@ -702,10 +777,10 @@ out: else clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - return ret; + return error; errout: - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); goto out; } @@ -714,29 +789,40 @@ static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { - void *ds = (void *)child->thread.ds_area_msr; struct ptrace_bts_config cfg; + size_t end; + const void *base, *max; + int error; if (cfg_size < sizeof(cfg)) return -EIO; - memset(&cfg, 0, sizeof(cfg)); + error = ds_get_bts_end(child, &end); + if (error < 0) + return error; - if (ds) { - cfg.size = ds_get_bts_size(ds); + error = ds_access_bts(child, /* index = */ 0, &base); + if (error < 0) + return error; - if (ds_get_overflow(ds) == DS_O_SIGNAL) - cfg.flags |= PTRACE_BTS_O_SIGNAL; + error = ds_access_bts(child, /* index = */ end, &max); + if (error < 0) + return error; - if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && - child->thread.debugctlmsr & ds_debugctl_mask()) - cfg.flags |= PTRACE_BTS_O_TRACE; + memset(&cfg, 0, sizeof(cfg)); + cfg.size = (max - base); + cfg.signal = child->thread.bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); - if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) - cfg.flags |= PTRACE_BTS_O_SCHED; - } + if (cfg.signal) + cfg.flags |= PTRACE_BTS_O_SIGNAL; - cfg.bts_size = sizeof(struct bts_struct); + if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && + child->thread.debugctlmsr & bts_cfg.debugctl_mask) + cfg.flags |= PTRACE_BTS_O_TRACE; + + if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) + cfg.flags |= PTRACE_BTS_O_SCHED; if (copy_to_user(ucfg, &cfg, sizeof(cfg))) return -EFAULT; @@ -744,89 +830,38 @@ static int ptrace_bts_status(struct task_struct *child, return sizeof(cfg); } - static int ptrace_bts_write_record(struct task_struct *child, const struct bts_struct *in) { - int retval; + unsigned char bts_record[BTS_MAX_RECORD_SIZE]; - if (!child->thread.ds_area_msr) - return -ENXIO; + BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); - retval = ds_write_bts((void *)child->thread.ds_area_msr, in); - if (retval) - return retval; + memset(bts_record, 0, bts_cfg.sizeof_bts); + switch (in->qualifier) { + case BTS_INVALID: + break; - return sizeof(*in); -} + case BTS_BRANCH: + bts_set(bts_record, bts_from, in->variant.lbr.from_ip); + bts_set(bts_record, bts_to, in->variant.lbr.to_ip); + break; -static int ptrace_bts_realloc(struct task_struct *child, - int size, int reduce_size) -{ - unsigned long rlim, vm; - int ret, old_size; + case BTS_TASK_ARRIVES: + case BTS_TASK_DEPARTS: + bts_set(bts_record, bts_from, bts_escape); + bts_set(bts_record, bts_qual, in->qualifier); + bts_set(bts_record, bts_jiffies, in->variant.jiffies); + break; - if (size < 0) + default: return -EINVAL; - - old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); - if (old_size < 0) - return old_size; - - ret = ds_free((void **)&child->thread.ds_area_msr); - if (ret < 0) - goto out; - - size >>= PAGE_SHIFT; - old_size >>= PAGE_SHIFT; - - current->mm->total_vm -= old_size; - current->mm->locked_vm -= old_size; - - if (size == 0) - goto out; - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->total_vm; - if (size <= 0) - goto out; } - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->locked_vm; - if (size <= 0) - goto out; - } - - ret = ds_allocate((void **)&child->thread.ds_area_msr, - size << PAGE_SHIFT); - if (ret < 0) - goto out; - - current->mm->total_vm += size; - current->mm->locked_vm += size; - -out: - if (child->thread.ds_area_msr) - set_tsk_thread_flag(child, TIF_DS_AREA_MSR); - else - clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); - - return ret; + /* The writing task will be the switched-to task on a context + * switch. It needs to write into the switched-from task's BTS + * buffer. */ + return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); } void ptrace_bts_take_timestamp(struct task_struct *tsk, @@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, ptrace_bts_write_record(tsk, &rec); } -#endif /* X86_BTS */ + +static const struct bts_configuration bts_cfg_netburst = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<2)|(1<<3)|(1<<5) +}; + +static const struct bts_configuration bts_cfg_pentium_m = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<6)|(1<<7) +}; + +static const struct bts_configuration bts_cfg_core2 = { + .sizeof_bts = 8 * 3, + .sizeof_field = 8, + .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +}; + +static inline void bts_configure(const struct bts_configuration *cfg) +{ + bts_cfg = *cfg; +} + +void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 0x6: + switch (c->x86_model) { + case 0xD: + case 0xE: /* Pentium M */ + bts_configure(&bts_cfg_pentium_m); + break; + case 0xF: /* Core2 */ + case 0x1C: /* Atom */ + bts_configure(&bts_cfg_core2); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + case 0xF: + switch (c->x86_model) { + case 0x0: + case 0x1: + case 0x2: /* Netburst */ + bts_configure(&bts_cfg_netburst); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + default: + /* sorry, don't know about them */ + break; + } +} +#endif /* CONFIG_X86_PTRACE_BTS */ /* * Called by kernel/ptrace.c when detaching.. @@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - if (child->thread.ds_area_msr) { -#ifdef X86_BTS - ptrace_bts_realloc(child, 0, 0); -#endif - child->thread.debugctlmsr &= ~ds_debugctl_mask(); - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - } +#ifdef CONFIG_X86_PTRACE_BTS + (void)ds_release_bts(child); + + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); +#endif /* CONFIG_X86_PTRACE_BTS */ } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* * These bits need more cooking - not enabled yet: */ -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS case PTRACE_BTS_CONFIG: ret = ptrace_bts_config (child, data, (struct ptrace_bts_config __user *)addr); @@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_SIZE: - ret = ptrace_bts_get_size(child); + ret = ds_get_bts_index(child, /* pos = */ 0); break; case PTRACE_BTS_GET: @@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ptrace_bts_clear(child); + ret = ds_clear_bts(child); break; case PTRACE_BTS_DRAIN: ret = ptrace_bts_drain (child, data, (struct bts_struct __user *) addr); break; -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ default: ret = ptrace_request(child, request, addr, data); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index f2fc8feb727..f7aebe13c99 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -920,11 +920,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); n = c->extended_cpuid_level; if (n >= 0x80000008) { -- cgit v1.2.3 From 970e725098a6da5a9c1f8128102c812e31a0444c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 16 Apr 2008 16:40:17 -0700 Subject: x86, ptrace: PEBS support, warning fix arch/x86/kernel/process_32.c:566: warning: unused variable 'ds_next' arch/x86/kernel/process_32.c:566: warning: unused variable 'ds_prev' Cc: Markus Metzger Cc: Andi Kleen Cc: "H. Peter Anvin" Cc: "Siddha, Suresh B" Cc: Roland McGrath Cc: Michael Kerrisk Cc: Cc: stephane eranian Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5cec8a75a4e..496ea3110aa 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -484,20 +484,13 @@ int set_tsc_mode(unsigned int val) return 0; } -static noinline void -__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +#ifdef CONFIG_X86_DS +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) { - struct thread_struct *prev, *next; - unsigned long debugctl; - unsigned long ds_prev = 0, ds_next = 0; + unsigned long ds_prev = 0; + unsigned long ds_next = 0; - prev = &prev_p->thread; - next = &next_p->thread; - - debugctl = prev->debugctlmsr; - -#ifdef CONFIG_X86_DS if (prev->ds_ctx) ds_prev = (unsigned long)prev->ds_ctx->ds; if (next->ds_ctx) @@ -510,8 +503,28 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, update_debugctlmsr(0); wrmsr(MSR_IA32_DS_AREA, ds_next, 0); } + return debugctl; +} +#else +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) +{ + return debugctl; +} #endif /* CONFIG_X86_DS */ +static noinline void +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + unsigned long debugctl; + + prev = &prev_p->thread; + next = &next_p->thread; + + debugctl = update_debugctl(prev, next, prev->debugctlmsr); + if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); -- cgit v1.2.3 From 573da4224e8c3800e613d715e909c3179a7e3cb2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 28 Apr 2008 23:15:04 +0400 Subject: x86: DS cleanup - dont treat 0 as NULL Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ds.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 5b32b6d062b..24a323c9599 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -238,12 +238,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) - return 0; + return NULL; context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); if (!context->ds) { kfree(context); - return 0; + return NULL; } *p_context = context; @@ -279,7 +279,7 @@ static inline void ds_put_context(struct ds_context *context) if (--context->count) goto out; - *(context->this) = 0; + *(context->this) = NULL; if (context->task) clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); @@ -341,16 +341,16 @@ static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; vm = current->mm->total_vm + pgsz; if (rlim < vm) - return 0; + return NULL; rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; vm = current->mm->locked_vm + pgsz; if (rlim < vm) - return 0; + return NULL; buffer = kzalloc(size, GFP_KERNEL); if (!buffer) - return 0; + return NULL; current->mm->total_vm += pgsz; current->mm->locked_vm += pgsz; @@ -395,7 +395,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, if (context->owner[qual] == current) goto out_unlock; error = -EPERM; - if (context->owner[qual] != 0) + if (context->owner[qual] != NULL) goto out_unlock; context->owner[qual] = current; @@ -445,7 +445,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return error; out_release: - context->owner[qual] = 0; + context->owner[qual] = NULL; ds_put_context(context); return error; @@ -825,7 +825,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) ds_configure(&ds_cfg_var); break; case 0xF: /* Core2 */ - case 0x1C: /* Atom */ + case 0x1C: /* Atom */ ds_configure(&ds_cfg_64); break; default: -- cgit v1.2.3 From 34b2cd5b688b012975fcfc3b3970fc3508fa82c4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 17 May 2008 08:30:07 +0200 Subject: x86: PEBS cleanup Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad213494a22..4a93c98a60a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -500,7 +500,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, { struct thread_struct *prev, *next; unsigned long debugctl; - unsigned long ds_prev = 0, ds_next = 0; prev = &prev_p->thread, next = &next_p->thread; @@ -508,17 +507,23 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, debugctl = prev->debugctlmsr; #ifdef CONFIG_X86_DS - if (prev->ds_ctx) - ds_prev = (unsigned long)prev->ds_ctx->ds; - if (next->ds_ctx) - ds_next = (unsigned long)next->ds_ctx->ds; - - if (ds_next != ds_prev) { - /* we clear debugctl to make sure DS - * is not in use when we change it */ - debugctl = 0; - update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, ds_next); + { + unsigned long ds_prev = 0, ds_next = 0; + + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { + /* + * We clear debugctl to make sure DS + * is not in use when we change it: + */ + debugctl = 0; + update_debugctlmsr(0); + wrmsrl(MSR_IA32_DS_AREA, ds_next); + } } #endif /* CONFIG_X86_DS */ -- cgit v1.2.3 From 63cc8c75156462d4b42cbdd76c293b7eee7ddbfe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 May 2008 15:44:40 +0200 Subject: percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro While examining holes in percpu section I found this : c05f5000 D per_cpu__current_task c05f5000 D __per_cpu_start c05f5004 D per_cpu__cpu_number c05f5008 D per_cpu__irq_regs c05f500c d per_cpu__cpu_devices c05f5040 D per_cpu__cyc2ns c05f6000 d per_cpu__cpuid4_info c05f6004 d per_cpu__cache_kobject c05f6008 d per_cpu__index_kobject c05f7000 D per_cpu__gdt_page This is because gdt_page is a percpu variable, defined with a page alignement, and linker is doing its job, two times because of .o nesting in the build process. I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid wasting this space. All page aligned variables (only one at this time) are put in a separate subsection .data.percpu.page_aligned, at the very begining of percpu zone. Before patch , on a x86_32 machine : .data.percpu 30232 3227471872 .data.percpu 22168 3227471872 Thats 8064 bytes saved for each CPU. Signed-off-by: Eric Dumazet Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/vmlinux_32.lds.S | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d0463a94624..b2f54fafb8b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,7 +21,7 @@ #include "cpu.h" -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index ce5ed083a1e..0f7c29a8c4e 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -189,6 +189,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; -- cgit v1.2.3 From 05d3ed0a1fe3ea05ab9f3b8d32576a0bc2e19660 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 21 Jul 2008 10:15:22 -0400 Subject: x86, pci: iommu fix potential overflow in alloc_iommu() It is possible that alloc_iommu()'s boundary_size overflows as dma_get_seg_boundary can return 0xffffffff. In that case, further usage of boundary_size triggers a BUG_ON() in the iommu code. Signed-off-by: Prarit Bhargava Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d..1062dc1e639 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -93,7 +93,7 @@ static unsigned long alloc_iommu(struct device *dev, int size) base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); -- cgit v1.2.3 From e0a5a5d9b006fd441e61685a051fa85d52fb172c Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Tue, 22 Jul 2008 18:14:16 +0200 Subject: x86, 64-bit, dwarf2: push pushes 8 bytes and popf pops 8 The CFI_ADJUST_CFA_OFFSET dwarf2 annotation of a push/popf pair in ret_from_fork wrongly used a value of 4. It should have been 8. Fix that. Signed-off-by: Alexander van Heukelum Cc: Andi Kleen Cc: heukelum@fastmail.fm Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 89434d43960..cf3a0b2d005 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64) ENTRY(ret_from_fork) CFI_DEFAULT_STACK push kernel_eflags(%rip) - CFI_ADJUST_CFA_OFFSET 4 + CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags - CFI_ADJUST_CFA_OFFSET -4 + CFI_ADJUST_CFA_OFFSET -8 call schedule_tail GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) -- cgit v1.2.3 From 32f71aff77b6470d272f80ac28f43f9601c4d140 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 21 Jul 2008 00:52:49 +0100 Subject: x86: PIC, L-APIC and I/O APIC debug information Dump all the PIC, local APIC and I/O APIC information at the fs_initcall() level, which is after ACPI (if used) has initialised PCI information, making the point of invocation consistent across MP-table and ACPI platforms. Remove explicit calls to print_IO_APIC() from elsewhere. Make the interface of all the functions involved consistent between 32-bit and 64-bit versions and make them all static by default by the means of a New-and-Improved(TM) __apicdebuginit() macro. Note that like print_IO_APIC() all these only output anything if "apic=debug" has been passed to the kernel through the command line. Signed-off-by: Maciej W. Rozycki Cc: Chuck Ebbert Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 29 +++++++++++++++++++---------- arch/x86/kernel/io_apic_64.c | 31 +++++++++++++++++++------------ 2 files changed, 38 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c..d54455ec985 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -50,6 +50,8 @@ #include #include +#define __apicdebuginit(type) static type __init + int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; @@ -1345,7 +1347,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __init print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1460,9 +1463,7 @@ void __init print_IO_APIC(void) return; } -#if 0 - -static void print_APIC_bitfield(int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1483,7 +1484,7 @@ static void print_APIC_bitfield(int base) } } -void /*__init*/ print_local_APIC(void *dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1567,12 +1568,12 @@ void /*__init*/ print_local_APIC(void *dummy) printk("\n"); } -void print_all_local_APICs(void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void /*__init*/ print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1604,7 +1605,17 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + static void __init enable_IO_APIC(void) { @@ -2333,8 +2344,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } /* diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8269434d170..8cdcc4f287c 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -53,6 +53,8 @@ #include #include +#define __apicdebuginit(type) static type __init + struct irq_cfg { cpumask_t domain; cpumask_t old_domain; @@ -87,8 +89,6 @@ int first_system_vector = 0xfe; char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; -#define __apicdebuginit __init - int sis_apic_bug; /* not actually supported, dummy for compile */ static int no_timer_check; @@ -965,7 +965,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, ioapic_write_entry(apic, pin, entry); } -void __apicdebuginit print_IO_APIC(void) + +__apicdebuginit(void) print_IO_APIC(void) { int apic, i; union IO_APIC_reg_00 reg_00; @@ -1059,9 +1060,7 @@ void __apicdebuginit print_IO_APIC(void) return; } -#if 0 - -static __apicdebuginit void print_APIC_bitfield (int base) +__apicdebuginit(void) print_APIC_bitfield(int base) { unsigned int v; int i, j; @@ -1082,7 +1081,7 @@ static __apicdebuginit void print_APIC_bitfield (int base) } } -void __apicdebuginit print_local_APIC(void * dummy) +__apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; @@ -1159,12 +1158,12 @@ void __apicdebuginit print_local_APIC(void * dummy) printk("\n"); } -void print_all_local_APICs (void) +__apicdebuginit(void) print_all_local_APICs(void) { on_each_cpu(print_local_APIC, NULL, 1); } -void __apicdebuginit print_PIC(void) +__apicdebuginit(void) print_PIC(void) { unsigned int v; unsigned long flags; @@ -1196,7 +1195,17 @@ void __apicdebuginit print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -#endif /* 0 */ +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + void __init enable_IO_APIC(void) { @@ -1849,8 +1858,6 @@ void __init setup_IO_APIC(void) setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); } struct sysfs_ioapic_data { -- cgit v1.2.3 From 36a028de785c6e6f15ac84f8b3b087b89137ea26 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:28 +0200 Subject: x86: apic unification - merge down lapic_get_maxlvt No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++-- arch/x86/kernel/apic_64.c | 9 ++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c89835837..447dd8c5c0e 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -193,9 +193,13 @@ int get_physical_broadcast(void) */ int lapic_get_maxlvt(void) { - unsigned int v = apic_read(APIC_LVR); + unsigned int v; - /* 82489DXs do not report # of LVT entries. */ + v = apic_read(APIC_LVR); + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7f1f030da7e..4fa2a8620c2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -158,11 +158,14 @@ void __cpuinit enable_NMI_through_LVT0(void) */ int lapic_get_maxlvt(void) { - unsigned int v, maxlvt; + unsigned int v; v = apic_read(APIC_LVR); - maxlvt = GET_APIC_MAXLVT(v); - return maxlvt; + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } /* -- cgit v1.2.3 From d4c63ec060f3315653c0ae5bc3a7fe2419a2282f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 24 Jul 2008 13:52:29 +0200 Subject: x86: apic unification - merge down enable_NMI_through_LVT0 No code change on binary level. Signed-off-by: Cyrill Gorcunov Cc: macro@linux-mips.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++-- arch/x86/kernel/apic_64.c | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 447dd8c5c0e..01708f128ee 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -172,11 +172,15 @@ u32 safe_apic_wait_icr_idle(void) */ void __cpuinit enable_NMI_through_LVT0(void) { - unsigned int v = APIC_DM_NMI; + unsigned int v; + + /* unmask and set to NMI */ + v = APIC_DM_NMI; - /* Level triggered for 82489DX */ + /* Level triggered for 82489DX (32bit mode) */ if (!lapic_is_integrated()) v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fa2a8620c2..7615b4b9c3f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,6 +150,11 @@ void __cpuinit enable_NMI_through_LVT0(void) /* unmask and set to NMI */ v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ + if (!lapic_is_integrated()) + v |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT0, v); } -- cgit v1.2.3 From 3c9339049df5cc3a468c11de6c4101a1ea8c3d83 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 25 Jul 2008 11:32:36 +0200 Subject: x86: fix ds.c build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: arch/x86/kernel/ds.c: In function ‘ds_allocate_buffer': arch/x86/kernel/ds.c:339: error: implicit declaration of function ‘PAGE_ALIGN' Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 24a323c9599..ab21c270bfa 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -29,6 +29,7 @@ #include #include #include +#include /* -- cgit v1.2.3 From a677f58a8c8c541bf7d02c658545084040f3708d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 29 Jul 2008 00:37:10 -0700 Subject: x86: print per_cpu data address to make sure per_cpu data on correct node. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f94c00..61f3966632a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -180,9 +180,16 @@ void __init setup_per_cpu_areas(void) printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); } - else + else { ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); + } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); -- cgit v1.2.3 From 7ab6af7ab69df8c9c5fbc380004fb81187742096 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 30 Jul 2008 17:36:48 -0700 Subject: x86_32: use apic_ops at print_local_APIC() Use apic_icr_read at print_local_APIC() in io_apic_32.c Signed-off-by: Hiroshi Shimamoto Cc: Suresh Siddha Cc: Yinghai Lu Signed-off-by: Ingo Molnar Cc: Suresh Siddha Cc: Yinghai Lu --- arch/x86/kernel/io_apic_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 98e4db5373f..39e063a9a28 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1486,6 +1486,7 @@ static void print_APIC_bitfield(int base) void /*__init*/ print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1536,10 +1537,9 @@ void /*__init*/ print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); -- cgit v1.2.3 From 2ae111cdd8d83ebf9de72e36e68a8c84b6ebbeea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 11 Aug 2008 18:34:08 +0400 Subject: x86: apic interrupts - move assignments to irqinit_32.c, v2 64bit mode APIC interrupt handlers are set within irqinit_64.c. Lets do tha same for 32bit mode which would help in furter code merging. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 48 ------------------------------------------- arch/x86/kernel/irqinit_32.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f93c18f5b79..9e341c9d941 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1354,54 +1354,6 @@ void smp_error_interrupt(struct pt_regs *regs) irq_exit(); } -#ifdef CONFIG_SMP -void __init smp_intr_init(void) -{ - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); - - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); -} -#endif - -/* - * Initialize APIC interrupts - */ -void __init apic_intr_init(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -} - /** * connect_bsp_APIC - attach the APIC to the interrupt system */ diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index d66914287ee..9200a1e2752 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -74,6 +74,15 @@ void __init init_ISA_irqs (void) } } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -98,6 +107,46 @@ void __init native_init_IRQ(void) set_intr_gate(vector, interrupt[i]); } +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) + /* + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ + set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); + + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPI for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for single call function */ + set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); + /* setup after call gates are initialised (usually add in * the architecture specific gates) */ -- cgit v1.2.3 From 49800efcb17afdf973f33e8aa8807b7f83993cc6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:27:30 +0200 Subject: x86: pda_init(): fix memory leak when using CPU hotplug pda->irqstackptr is allocated whenever a CPU is set online. But it is never freed. This results in a memory leak of 16K for each CPU offline/online cycle. Fix is to allocate pda->irqstackptr only once. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6f9b8924bdc..8396fd7628a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -493,17 +493,20 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } - - pda->irqstackptr += IRQSTACKSIZE-64; } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + -- cgit v1.2.3 From b55793f7528ce1b73c25b3ac8a86a6cda2a0f9a4 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:29:37 +0200 Subject: x86: cpu_init(): fix memory leak when using CPU hotplug Exception stacks are allocated each time a CPU is set online. But the allocated space is never freed. Thus with one CPU hotplug offline/online cycle there is a memory leak of 24K (6 pages) for a CPU. Fix is to allocate exception stacks only once -- when the CPU is set online for the first time. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8396fd7628a..cc6efe86249 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -606,19 +606,22 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (!orig_ist->ist[0]) { static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); -- cgit v1.2.3 From ed4e5ec177d20504c51aebb93db12d57716cde9c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: x86: apic - use SET_APIC_DEST_FIELD instead of hardcoded shift Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 69a876be506..77c5e5eb820 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -150,7 +150,7 @@ u32 safe_xapic_wait_icr_idle(void) void xapic_icr_write(u32 low, u32 id) { - apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); } -- cgit v1.2.3 From 36fef0949c14445d231a68663e8a01860f7caca3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:20 +0200 Subject: x86: apic - unify disable_apic_timer Get rid of local_apic_timer_disabled and use disable_apic_timer instead. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 17 ++++++++++++----- arch/x86/kernel/apic_64.c | 16 +++++++++++----- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 12b154822bc..6af20dd12c9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -63,7 +63,7 @@ int disable_apic; /* Local APIC timer verification ok */ static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int local_apic_timer_disabled; +static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -574,7 +574,7 @@ void __init setup_boot_APIC_clock(void) * timer as a dummy clock event source on SMP systems, so the * broadcast mechanism is used. On UP systems simply ignore it. */ - if (local_apic_timer_disabled) { + if (disable_apic_timer) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; @@ -1699,12 +1699,19 @@ static int __init parse_nolapic(char *arg) } early_param("nolapic", parse_nolapic); -static int __init parse_disable_lapic_timer(char *arg) +static int __init parse_disable_apic_timer(char *arg) { - local_apic_timer_disabled = 1; + disable_apic_timer = 1; return 0; } -early_param("nolapic_timer", parse_disable_lapic_timer); +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("nolapic_timer", parse_nolapic_timer); static int __init parse_lapic_timer_c2_ok(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 77c5e5eb820..1e925be861e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -45,6 +45,7 @@ #include #include +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; @@ -1583,14 +1584,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) } early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); -static __init int setup_noapictimer(char *str) +static int __init parse_disable_apic_timer(char *arg) { - if (str[0] != ' ' && str[0] != 0) - return 0; disable_apic_timer = 1; - return 1; + return 0; +} +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; } -__setup("noapictimer", setup_noapictimer); +early_param("nolapic_timer", parse_nolapic_timer); static __init int setup_apicpmtimer(char *s) { -- cgit v1.2.3 From f07f4f9046121ac803bc2f0ded3d77b7c2ab481b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: x86: apic - unify __setup_APIC_LVTT To be able to unify this function we RE-introduce APIC_DIVISOR for 64bit mode. This snipped was eliminated in some time ago in a sake of clenup but now we need it again since it allow up to get rid of #ifdef(s). And lapic_is_integrated call is added in apic_64.c but since we always have APIC integrated on 64bit cpu compiler will ignore this call. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e925be861e..ac399d0f65c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -244,6 +244,9 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* Clock divisor is set to 1 */ +#define APIC_DIVISOR 1 + /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call @@ -262,6 +265,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) lvtt_value = LOCAL_TIMER_VECTOR; if (!oneshot) lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!lapic_is_integrated()) + lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + if (!irqen) lvtt_value |= APIC_LVT_MASKED; @@ -276,7 +282,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | APIC_TDR_DIV_16); if (!oneshot) - apic_write(APIC_TMICT, clocks); + apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } /* @@ -446,7 +452,7 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); - calibration_result = result / HZ; + calibration_result = (result * APIC_DIVISOR) / HZ; /* * Do a sanity check on the APIC calibration result -- cgit v1.2.3 From 9ce122c6e55c44ae9a4c4c777579b87d83e7f898 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:21 +0200 Subject: x86: apic - do not clear APIC twice in lapic_shutdown There is no need to clear APIC twice since disable_local_APIC will clear it anyway. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6af20dd12c9..a151d66f948 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -830,10 +830,11 @@ void lapic_shutdown(void) return; local_irq_save(flags); - clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); + else + clear_local_APIC(); local_irq_restore(flags); } -- cgit v1.2.3 From 64e474d168e3cf31e44890b4947644d361f84e43 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: x86: apic - get rid of local_apic_timer_verify_ok We are able to use clock_event_device as it's done in 64bit apic code so lets get rid of local_apic_timer_verify_ok variable. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a151d66f948..7783c113be2 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -60,8 +60,6 @@ unsigned long mp_lapic_addr; static int force_enable_local_apic; int disable_apic; -/* Local APIC timer verification ok */ -static int local_apic_timer_verify_ok; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ @@ -301,7 +299,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned int v; /* Lapic used for broadcast ? */ - if (!local_apic_timer_verify_ok) + if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; local_irq_save(flags); @@ -514,7 +512,7 @@ static int __init calibrate_APIC_clock(void) return -1; } - local_apic_timer_verify_ok = 1; + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* We trust the pm timer based calibration */ if (!pm_referenced) { @@ -548,11 +546,11 @@ static int __init calibrate_APIC_clock(void) if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); else - local_apic_timer_verify_ok = 0; + levt->features |= CLOCK_EVT_FEAT_DUMMY; } else local_irq_enable(); - if (!local_apic_timer_verify_ok) { + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { printk(KERN_WARNING "APIC timer disabled due to verification failure.\n"); return -1; -- cgit v1.2.3 From c93baa1ae51cdba25a5f5ad37b2348e700e75daf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:22 +0200 Subject: x86: apic - unify verify_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7783c113be2..7ef7c782a42 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -878,6 +878,12 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; /* * The next two are just to see if we have sane values. -- cgit v1.2.3 From 296cb9511dcc3895fda84d0cd5b411bd926e4bb3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 13:51:23 +0200 Subject: x86: apic - unify sync_Arb_IDs Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +-- arch/x86/kernel/apic_64.c | 10 ++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 7ef7c782a42..d07488993ee 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -915,8 +915,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, - APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ac399d0f65c..41aff346063 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -742,8 +742,11 @@ int __init verify_local_APIC(void) */ void __init sync_Arb_IDs(void) { - /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ - if (modern_apic()) + /* + * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not + * needed on AMD. + */ + if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; /* @@ -752,8 +755,7 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* -- cgit v1.2.3 From 516cbf3730c49739629d66313b20bdc50c98aa2c Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Tue, 12 Aug 2008 12:52:36 -0700 Subject: x86, bootup: add built-in kernel command line for x86 (v2) Allow x86 to support a built-in kernel command line. The built-in command line can override the one provided by the boot loader, for those cases where the boot loader is broken or it is difficult to change the command line in the the boot loader. H. Peter Anvin wrote: > Ingo Molnar wrote: >> Best would be to make it really apparent in the code that nothing >> changes if this config option is not set. Preferably there should be >> no extra code at all in that case. >> > > I would like to see this: [...Nested ifdefs...] OK. This version changes absolutely nothing if CONFIG_CMDLINE_BOOL is not set (the default). Also, no space is appended even when CONFIG_CMDLINE_BOOL is set, but the builtin string is empty. This is less sloppy all the way around, IMHO. Note that I use the same option names as on other arches for this feature. [ mingo@elte.hu: build fix ] Signed-off-by: Tim Bird Cc: Matt Mackall Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 68b48e3fbcb..2f31cddd27b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -223,6 +223,9 @@ unsigned long saved_video_mode; #define RAMDISK_LOAD_FLAG 0x4000 static char __initdata command_line[COMMAND_LINE_SIZE]; +#ifdef CONFIG_CMDLINE_BOOL +static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +#endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -673,6 +676,19 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; -- cgit v1.2.3 From c9c3dddd8f9a05b25d4ce53e8e80cc0ea1759d18 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 1 Aug 2008 03:51:38 +0400 Subject: x86_64: remove empty lines from stack traces/oopses Signed-off-by: Alexey Dobriyan Cc: ak@suse.de Cc: akpm@osdl.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 3f18d73f420..8b5b3b81d43 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -339,9 +339,8 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl) { - printk("\nCall Trace:\n"); + printk("Call Trace:\n"); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); - printk("\n"); } void show_trace(struct task_struct *task, struct pt_regs *regs, @@ -386,6 +385,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -443,7 +443,6 @@ void show_registers(struct pt_regs *regs) printk("Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, regs->bp, ""); - printk("\n"); printk(KERN_EMERG "Code: "); -- cgit v1.2.3 From f88f07e0f0fd6376e081b10930d272a08fbf082f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 14 Aug 2008 16:58:15 -0400 Subject: x86: alternatives : fix LOCK_PREFIX race with preemptible kernel and CPU hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a kernel thread is preempted in single-cpu mode right after the NOP (nop about to be turned into a lock prefix), then we CPU hotplug a CPU, and then the thread is scheduled back again, a SMP-unsafe atomic operation will be used on shared SMP variables, leading to corruption. No corruption would happen in the reverse case : going from SMP to UP is ok because we split a bit instruction into tiny pieces, which does not present this condition. Changing the 0x90 (single-byte nop) currently used into a 0x3E DS segment override prefix should fix this issue. Since the default of the atomic instructions is to use the DS segment anyway, it should not affect the behavior. The exception to this are references that use ESP/RSP and EBP/RBP as the base register (they will use the SS segment), however, in Linux (a) DS == SS at all times, and (b) we do not distinguish between segment violations reported as #SS as opposed to #GP, so there is no need to disassemble the instruction to figure out the suitable segment. This patch assumes that the 0x3E prefix will leave atomic operations as-is (thus assuming they normally touch data in the DS segment). Since there seem to be no obvious ill-use of other segment override prefixes for atomic operations, it should be safe. It can be verified with a quick grep -r LOCK_PREFIX include/asm-x86/ grep -A 1 -r LOCK_PREFIX arch/x86/ Taken from This source : AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions States "Instructions that Reference a Non-Stack Segment—If an instruction encoding references any base register other than rBP or rSP, or if an instruction contains an immediate offset, the default segment is the data segment (DS). These instructions can use the segment-override prefix to select one of the non-default segments, as shown in Table 1-5." Therefore, forcing the DS segment on the atomic operations, which already use the DS segment, should not change. This source : http://wiki.osdev.org/X86_Instruction_Encoding States "In 64-bit the CS, SS, DS and ES segment overrides are ignored." Confirmed by "AMD 64-Bit Technology" A.7 http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/x86-64_overview.pdf "In 64-bit mode, the DS, ES, SS and CS segment-override prefixes have no effect. These four prefixes are no longer treated as segment-override prefixes in the context of multipleprefix rules. Instead, they are treated as null prefixes." This patch applies to 2.6.27-rc2, but would also have to be applied to earlier kernels (2.6.26, 2.6.25, ...). Performance impact of the fix : tests done on "xaddq" and "xaddl" shows it actually improves performances on Intel Xeon, AMD64, Pentium M. It does not change the performance on Pentium II, Pentium 3 and Pentium 4. Xeon E5405 2.0GHz : NR_TESTS 10000000 test empty cycles : 162207948 test test 1-byte nop xadd cycles : 170755422 test test DS override prefix xadd cycles : 170000118 * test test LOCK xadd cycles : 472012134 AMD64 2.0GHz : NR_TESTS 10000000 test empty cycles : 146674549 test test 1-byte nop xadd cycles : 150273860 test test DS override prefix xadd cycles : 149982382 * test test LOCK xadd cycles : 270000690 Pentium 4 3.0GHz NR_TESTS 10000000 test empty cycles : 290001195 test test 1-byte nop xadd cycles : 310000560 test test DS override prefix xadd cycles : 310000575 * test test LOCK xadd cycles : 1050103740 Pentium M 2.0GHz NR_TESTS 10000000 test empty cycles : 180000523 test test 1-byte nop xadd cycles : 320000345 test test DS override prefix xadd cycles : 310000374 * test test LOCK xadd cycles : 480000357 Pentium 3 550MHz NR_TESTS 10000000 test empty cycles : 510000231 test test 1-byte nop xadd cycles : 620000128 test test DS override prefix xadd cycles : 620000110 * test test LOCK xadd cycles : 800000088 Pentium II 350MHz NR_TESTS 10000000 test empty cycles : 200833494 test test 1-byte nop xadd cycles : 340000130 test test DS override prefix xadd cycles : 340000126 * test test LOCK xadd cycles : 530000078 Speed test modules can be found at http://ltt.polymtl.ca/svn/trunk/tests/kernel/test-prefix-speed-32.c http://ltt.polymtl.ca/svn/trunk/tests/kernel/test-prefix-speed.c Macro-benchmarks 2.0GHz E5405 Core 2 dual Quad-Core Xeon Summary * replace smp lock prefixes with DS segment selector prefixes no lock prefix (s) with lock prefix (s) Speedup make -j1 kernel/ 33.94 +/- 0.07 34.91 +/- 0.27 2.8 % hackbench 50 2.99 +/- 0.01 3.74 +/- 0.01 25.1 % * replace smp lock prefixes with 0x90 nops no lock prefix (s) with lock prefix (s) Speedup make -j1 kernel/ 34.16 +/- 0.32 34.91 +/- 0.27 2.2 % hackbench 50 3.00 +/- 0.01 3.74 +/- 0.01 24.7 % Detail : 1 CPU, replace smp lock prefixes with DS segment selector prefixes make -j1 kernel/ real 0m34.067s user 0m30.630s sys 0m2.980s real 0m33.867s user 0m30.582s sys 0m3.024s real 0m33.939s user 0m30.738s sys 0m2.876s real 0m33.913s user 0m30.806s sys 0m2.808s avg : 33.94s std. dev. : 0.07s hackbench 50 Time: 2.978 Time: 2.982 Time: 3.010 Time: 2.984 Time: 2.982 avg : 2.99 std. dev. : 0.01 1 CPU, noreplace-smp make -j1 kernel/ real 0m35.326s user 0m30.630s sys 0m3.260s real 0m34.325s user 0m30.802s sys 0m3.084s real 0m35.568s user 0m30.722s sys 0m3.168s real 0m34.435s user 0m30.886s sys 0m2.996s avg.: 34.91s std. dev. : 0.27s hackbench 50 Time: 3.733 Time: 3.750 Time: 3.761 Time: 3.737 Time: 3.741 avg : 3.74 std. dev. : 0.01 1 CPU, replace smp lock prefixes with 0x90 nops make -j1 kernel/ real 0m34.139s user 0m30.782s sys 0m2.820s real 0m34.010s user 0m30.630s sys 0m2.976s real 0m34.777s user 0m30.658s sys 0m2.916s real 0m33.924s user 0m30.634s sys 0m2.924s real 0m33.962s user 0m30.774s sys 0m2.800s real 0m34.141s user 0m30.770s sys 0m2.828s avg : 34.16 std. dev. : 0.32 hackbench 50 Time: 2.999 Time: 2.994 Time: 3.004 Time: 2.991 Time: 2.988 avg : 3.00 std. dev. : 0.01 I did more runs (20 runs of each) to compare the nop case to the DS prefix case. Results in seconds. They actually does not seems to show a significant difference. NOP 34.155 33.955 34.012 35.299 35.679 34.141 33.995 35.016 34.254 33.957 33.957 34.008 35.013 34.494 33.893 34.295 34.314 34.854 33.991 34.132 DS 34.080 34.304 34.374 35.095 34.291 34.135 33.940 34.208 35.276 34.288 33.861 33.898 34.610 34.709 33.851 34.256 35.161 34.283 33.865 35.078 Used http://www.graphpad.com/quickcalcs/ttest1.cfm?Format=C to do the T-test (yeah, I'm lazy) : Group Group One (DS prefix) Group Two (nops) Mean 34.37815 34.37070 SD 0.46108 0.51905 SEM 0.10310 0.11606 N 20 20 P value and statistical significance: The two-tailed P value equals 0.9620 By conventional criteria, this difference is considered to be not statistically significant. Confidence interval: The mean of Group One minus Group Two equals 0.00745 95% confidence interval of this difference: From -0.30682 to 0.32172 Intermediate values used in calculations: t = 0.0480 df = 38 standard error of difference = 0.155 So, unless these calculus are completely bogus, the difference between the nop and the DS case seems not to be statistically significant. Signed-off-by: Mathieu Desnoyers Acked-by: H. Peter Anvin CC: Linus Torvalds CC: Jeremy Fitzhardinge CC: Roland McGrath CC: Ingo Molnar Cc: Steven Rostedt CC: Steven Rostedt CC: Thomas Gleixner CC: Peter Zijlstra CC: Andrew Morton CC: David Miller CC: Ulrich Drepper CC: Rusty Russell CC: Gregory Haskins CC: Arnaldo Carvalho de Melo CC: "Luis Claudio R. Goncalves" CC: Clark Williams CC: Christoph Lameter CC: Andi Kleen CC: Harvey Harrison Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b55..7ead11f3732 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -241,25 +241,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) continue; if (*ptr > text_end) continue; - text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ + /* turn DS segment override prefix into lock prefix */ + text_poke(*ptr, ((unsigned char []){0xf0}), 1); }; } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { u8 **ptr; - char insn[1]; if (noreplace_smp) return; - add_nops(insn, 1); for (ptr = start; ptr < end; ptr++) { if (*ptr < text) continue; if (*ptr > text_end) continue; - text_poke(*ptr, insn, 1); + /* turn lock prefix into DS segment override prefix */ + text_poke(*ptr, ((unsigned char []){0x3E}), 1); }; } -- cgit v1.2.3 From 6f6da97faf29f87b3980a6992fb8cab44b4c444d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:19 +0400 Subject: x86: apic - sync_Arb_IDs style fixup No changes on binary level Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d07488993ee..60575c05151 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -909,13 +909,15 @@ void __init sync_Arb_IDs(void) */ if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; + /* * Wait for idle. */ apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 41aff346063..72e94ab0e36 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -755,7 +755,8 @@ void __init sync_Arb_IDs(void) apic_wait_icr_idle(); apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); } /* -- cgit v1.2.3 From 638c0411922540deaf8797cacf73513b17618405 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 15 Aug 2008 23:05:18 +0400 Subject: x86: apic - unify init_bsp_APIC - remove redundant read of APIC_LVR register in 64bit mode - APIC is always integrated for 64bit mode so gcc will eliminate lapic_is_integrated call Signed-off-by: Cyrill Gorcunov Acked-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 14 +++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 60575c05151..16d9721788c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -925,7 +925,7 @@ void __init sync_Arb_IDs(void) */ void __init init_bsp_APIC(void) { - unsigned long value; + unsigned int value; /* * Don't do the setup now if we have a SMP BIOS as the @@ -946,11 +946,13 @@ void __init init_bsp_APIC(void) value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; +#ifdef CONFIG_X86_32 /* This bit is reserved on P4/Xeon and should be cleared */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) value &= ~APIC_SPIV_FOCUS_DISABLED; else +#endif value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 72e94ab0e36..99d18b8976a 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -773,8 +773,6 @@ void __init init_bsp_APIC(void) if (smp_found_config || !cpu_has_apic) return; - value = apic_read(APIC_LVR); - /* * Do not trust the local APIC being empty at bootup. */ @@ -786,7 +784,15 @@ void __init init_bsp_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_VECTOR_MASK; value |= APIC_SPIV_APIC_ENABLED; - value |= APIC_SPIV_FOCUS_DISABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; apic_write(APIC_SPIV, value); @@ -795,6 +801,8 @@ void __init init_bsp_APIC(void) */ apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); } -- cgit v1.2.3 From 6764014bc8bb4849f6a4f336477e873ad5861ed2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:50 +0400 Subject: x86: apic - unify clear_local_APIC - Remove redundant masking of APIC_LVTTHMR register in apic_32.c - Add masking of APIC_LVTTHMR register to apic_64.c. We use a bit complicated #ifdef here: CONFIG_X86_MCE_P4THERMAL is 32bit specific and X86_MCE_INTEL is 64bit specific so the appropriate config variable will be set by Kconfig. - the APIC_ESR register clearing in apic_64.c now uses not straightforward way but this is allowed tradeoff. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 +----- arch/x86/kernel/apic_64.c | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 16d9721788c..3131603a4d6 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -754,7 +754,7 @@ void clear_local_APIC(void) } /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) if (maxlvt >= 5) { v = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); @@ -771,10 +771,6 @@ void clear_local_APIC(void) if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); -#endif /* Integrated APIC (!82489DX) ? */ if (lapic_is_integrated()) { if (maxlvt > 3) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 99d18b8976a..d834b758362 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -630,6 +630,13 @@ void clear_local_APIC(void) apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } + /* lets not touch this if we didn't frob it */ +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) + if (maxlvt >= 5) { + v = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); + } +#endif /* * Clean APIC state for other OSs: */ @@ -640,8 +647,14 @@ void clear_local_APIC(void) apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) apic_write(APIC_LVTPC, APIC_LVT_MASKED); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + + /* Integrated APIC (!82489DX) ? */ + if (lapic_is_integrated()) { + if (maxlvt > 3) + /* Clear ESR due to Pentium errata 3AP and 11AP */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } } /** -- cgit v1.2.3 From 92206c909ad1d4f9c355b4842722d5a355d6b76b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:51 +0400 Subject: x86: apic - unify lapic_resume Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 29 ++++++++++++++++++----------- arch/x86/kernel/apic_64.c | 19 +++++++++++++++---- 2 files changed, 33 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3131603a4d6..3d40213d3af 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1606,16 +1606,21 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1625,7 +1630,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1639,7 +1644,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d834b758362..e542a2d08de 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1412,13 +1412,22 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - if (!x2apic) { + +#ifdef CONFIG_X86_64 + if (x2apic) + enable_x2apic(); + else +#endif + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); - } else - enable_x2apic(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -1428,7 +1437,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -1442,7 +1451,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); + local_irq_restore(flags); + return 0; } -- cgit v1.2.3 From 24968cfdd4c3cf61338495dd0f9bb8a6907d2087 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:52 +0400 Subject: x86: apic - unify lapic_suspend Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 +- arch/x86/kernel/apic_64.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3d40213d3af..6cb8aaaf10f 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1582,7 +1582,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_P4THERMAL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index e542a2d08de..13dea935b4e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1390,10 +1390,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_INTEL +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); #endif + local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); -- cgit v1.2.3 From 274cfe5912eebe9d4e1a4c451fe617289a181fcf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:53 +0400 Subject: x86: apic - rearrange functions and comments Rearrange functions and comments to find differences easier. Also use apic_printk in setup_boot_APIC_clock for 64bit mode. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 69 ++++++++++++++++++++++++++--------------------- arch/x86/kernel/apic_64.c | 48 ++++++++++++++++++++++++++------- 2 files changed, 77 insertions(+), 40 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6cb8aaaf10f..9e8702eebd4 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -251,6 +251,9 @@ int lapic_get_maxlvt(void) * this function twice on the boot CPU, once with a bogus timeout * value, second time for real. The other (noncalibrating) CPUs * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. */ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { @@ -279,6 +282,36 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); } +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} + /* * Program the next event, relative to now */ @@ -298,7 +331,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, unsigned long flags; unsigned int v; - /* Lapic used for broadcast ? */ + /* Lapic used as dummy for broadcast ? */ if (evt->features & CLOCK_EVT_FEAT_DUMMY) return; @@ -680,35 +713,6 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} - /* * Local APIC start and shutdown */ @@ -1542,6 +1546,11 @@ void __cpuinit generic_processor_info(int apicid, int version) #ifdef CONFIG_PM static struct { + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 13dea935b4e..46523c0cc6f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -81,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(cpumask_t mask); static void apic_pm_activate(void); +/* + * The local apic timer can be used for any function which is CPU local. + */ static struct clock_event_device lapic_clockevent = { .name = "lapic", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT @@ -127,6 +130,11 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) @@ -175,7 +183,6 @@ static struct apic_ops xapic_ops = { }; struct apic_ops __read_mostly *apic_ops = &xapic_ops; - EXPORT_SYMBOL_GPL(apic_ops); static void x2apic_wait_icr_idle(void) @@ -244,6 +251,10 @@ int lapic_get_maxlvt(void) return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; } +/* + * Local APIC timer + */ + /* Clock divisor is set to 1 */ #define APIC_DIVISOR 1 @@ -257,7 +268,6 @@ int lapic_get_maxlvt(void) * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ - static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; @@ -474,10 +484,10 @@ static int __init calibrate_APIC_clock(void) void __init setup_boot_APIC_clock(void) { /* - * The local apic timer can be disabled via the kernel commandline. - * Register the lapic timer as a dummy clock event source on SMP - * systems, so the broadcast mechanism is used. On UP systems simply - * ignore it. + * The local apic timer can be disabled via the kernel + * commandline or from the CPU detection code. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { printk(KERN_INFO "Disabling APIC timer\n"); @@ -489,7 +499,9 @@ void __init setup_boot_APIC_clock(void) return; } - printk(KERN_INFO "Using local APIC timer interrupts.\n"); + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + if (calibrate_APIC_clock()) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) @@ -508,6 +520,7 @@ void __init setup_boot_APIC_clock(void) printk(KERN_WARNING "APIC timer registered as dummy," " due to nmi_watchdog=%d!\n", nmi_watchdog); + /* Setup the lapic or request the broadcast */ setup_APIC_timer(); } @@ -577,6 +590,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_enter(); local_apic_timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); } @@ -1248,6 +1262,13 @@ void __init connect_bsp_APIC(void) enable_apic_mode(); } +/** + * disconnect_bsp_APIC - detach the APIC from the interrupt system + * @virt_wire_setup: indicates, whether virtual wire mode is selected + * + * Virtual wire mode is necessary to deliver legacy interrupts even when the + * APIC is disabled. + */ void disconnect_bsp_APIC(int virt_wire_setup) { /* Go back to Virtual Wire compatibility mode */ @@ -1347,9 +1368,11 @@ int hard_smp_processor_id(void) #ifdef CONFIG_PM static struct { - /* 'active' is true if the local APIC was enabled by us and - not the BIOS; this signifies that we are also responsible - for disabling it before entering apm/acpi suspend */ + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ int active; /* r/w apic fields */ unsigned int apic_id; @@ -1458,6 +1481,11 @@ static int lapic_resume(struct sys_device *dev) return 0; } +/* + * This device has no shutdown method - fully functioning local APICs + * are needed on every CPU up until machine_halt/restart/poweroff. + */ + static struct sysdev_class lapic_sysclass = { .name = "lapic", .resume = lapic_resume, -- cgit v1.2.3 From 9c803869f5f5e3d7ad94b2926474b2882e30073b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:54 +0400 Subject: x86: apic - unify lapic_is_integrated Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++++ arch/x86/kernel/apic_64.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 9e8702eebd4..41134d4e6a6 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -128,7 +128,11 @@ static inline int lapic_get_version(void) */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 + return 1; +#else return APIC_INTEGRATED(lapic_get_version()); +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46523c0cc6f..18c0b8cd237 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -111,11 +111,15 @@ static inline int lapic_get_version(void) } /* - * Check, if the APIC is integrated or a seperate chip + * Check, if the APIC is integrated or a separate chip */ static inline int lapic_is_integrated(void) { +#ifdef CONFIG_X86_64 return 1; +#else + return APIC_INTEGRATED(lapic_get_version()); +#endif } /* -- cgit v1.2.3 From cf9768d7514d59b3179a886c4a47908d72e6cf76 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 16 Aug 2008 23:21:55 +0400 Subject: x86: apic - unify xapic_icr_read Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 18c0b8cd237..5e0738131e5 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -174,7 +174,7 @@ u64 xapic_icr_read(void) icr2 = apic_read(APIC_ICR2); icr1 = apic_read(APIC_ICR); - return (icr1 | ((u64)icr2 << 32)); + return icr1 | ((u64)icr2 << 32); } static struct apic_ops xapic_ops = { -- cgit v1.2.3 From 7b22ff5344fda666e0938e5261ea7b9a3dfce497 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 18 Aug 2008 00:36:18 +0900 Subject: x86 gart: allocate size-aligned address for alloc_coherent, v2 This patch changes GART IOMMU to return a size aligned address wrt dma_alloc_coherent, as DMA-mapping.txt defines: The cpu return address and the DMA bus master address are both guaranteed to be aligned to the smallest PAGE_SIZE order which is greater than or equal to the requested size. This invariant exists (for example) to guarantee that if you allocate a chunk which is smaller than or equal to 64 kilobytes, the extent of the buffer you receive will not cross a 64K boundary. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index cdab6784907..4d8efb05428 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -82,7 +82,8 @@ AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(struct device *dev, int size) +static unsigned long alloc_iommu(struct device *dev, int size, + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; @@ -95,11 +96,12 @@ static unsigned long alloc_iommu(struct device *dev, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, align_mask); if (offset == -1) { need_flush = 1; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, + align_mask); } if (offset != -1) { next_bit = offset+size; @@ -236,10 +238,10 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages); + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; if (iommu_page == -1) { @@ -262,7 +264,11 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, static dma_addr_t gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, paddr, size, dir); + dma_addr_t map; + unsigned long align_mask; + + align_mask = (1UL << get_order(size)) - 1; + map = dma_map_area(dev, paddr, size, dir, align_mask); flush_gart(); @@ -281,7 +287,8 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = gart_map_simple(dev, paddr, size, dir); + bus = dma_map_area(dev, paddr, size, dir, 0); + flush_gart(); return bus; } @@ -340,7 +347,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -362,7 +369,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages); + unsigned long iommu_start = alloc_iommu(dev, pages, 0); unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; -- cgit v1.2.3 From d5e629a6f88137fb77c4cc857be5ea7c3f27110d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 17 Aug 2008 21:12:27 -0700 Subject: x86: apic - unify lapic_resume - fix Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5e0738131e5..ad532dccd11 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1446,6 +1446,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1456,6 +1457,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); -- cgit v1.2.3 From b6c8051311e1a14d229df05ea39d0a1b2ce90cdd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:49 +0400 Subject: x86: apic - rearrange maxcpu definition Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++-- arch/x86/kernel/apic_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 41134d4e6a6..8d1febf05da 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -114,6 +114,8 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static int enabled_via_apicbase; static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; + /* * Get the LAPIC version @@ -1459,8 +1461,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) } } -unsigned int __cpuinitdata maxcpus = NR_CPUS; - void __cpuinit generic_processor_info(int apicid, int version) { int cpu; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ad532dccd11..46acb9b47ae 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -98,10 +98,10 @@ static struct clock_event_device lapic_clockevent = { static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static unsigned long apic_phys; +unsigned int __cpuinitdata maxcpus = NR_CPUS; unsigned long mp_lapic_addr; -unsigned int __cpuinitdata maxcpus = NR_CPUS; /* * Get the LAPIC version */ -- cgit v1.2.3 From f1ee37891dab6014f6b0ec77b18bc07e2369a55f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:50 +0400 Subject: x86: apic - unify setup_boot_APIC_clock Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8d1febf05da..80db3e0426c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -612,6 +612,7 @@ void __init setup_boot_APIC_clock(void) * broadcast mechanism is used. On UP systems simply ignore it. */ if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); /* No broadcast on UP ! */ if (num_possible_cpus() > 1) { lapic_clockevent.mult = 1; -- cgit v1.2.3 From 990b183e58cb513a62492b6218987750e106cbfb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:51 +0400 Subject: x86: apic - unify disable_local_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 +++- arch/x86/kernel/apic_64.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 80db3e0426c..13c4b79441d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -796,7 +796,7 @@ void clear_local_APIC(void) */ void disable_local_APIC(void) { - unsigned long value; + unsigned int value; clear_local_APIC(); @@ -808,6 +808,7 @@ void disable_local_APIC(void) value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); +#ifdef CONFIG_X86_32 /* * When LAPIC was disabled by the BIOS and enabled by the kernel, * restore the disabled state. @@ -819,6 +820,7 @@ void disable_local_APIC(void) l &= ~MSR_IA32_APICBASE_ENABLE; wrmsr(MSR_IA32_APICBASE, l, h); } +#endif } /* diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 46acb9b47ae..4fb903b2fc3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -691,6 +691,20 @@ void disable_local_APIC(void) value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; apic_write(APIC_SPIV, value); + +#ifdef CONFIG_X86_32 + /* + * When LAPIC was disabled by the BIOS and enabled by the kernel, + * restore the disabled state. + */ + if (enabled_via_apicbase) { + unsigned int l, h; + + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_ENABLE; + wrmsr(MSR_IA32_APICBASE, l, h); + } +#endif } void lapic_shutdown(void) -- cgit v1.2.3 From fe4024dcb0c01e5399394d2807406a2c13fb1eb7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:52 +0400 Subject: x86: apic - unify lapic_shutdown Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 9 ++++++--- arch/x86/kernel/apic_64.c | 14 +++++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 13c4b79441d..d4efe86adc7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -838,10 +838,13 @@ void lapic_shutdown(void) local_irq_save(flags); - if (enabled_via_apicbase) - disable_local_APIC(); - else +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4fb903b2fc3..48806546d49 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -707,6 +707,12 @@ void disable_local_APIC(void) #endif } +/* + * If Linux enabled the LAPIC against the BIOS default disable it down before + * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and + * not power-off. Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. + */ void lapic_shutdown(void) { unsigned long flags; @@ -716,7 +722,13 @@ void lapic_shutdown(void) local_irq_save(flags); - disable_local_APIC(); +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) + clear_local_APIC(); + else +#endif + disable_local_APIC(); + local_irq_restore(flags); } -- cgit v1.2.3 From 36c9d6742897fa414f51c4e9d0f20ab4e6bf942c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:53 +0400 Subject: x86: apic - unify connect_bsp_APIC Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ arch/x86/kernel/apic_64.c | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d4efe86adc7..6d230e9d0a7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1387,6 +1387,7 @@ void smp_error_interrupt(struct pt_regs *regs) */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Do not trust the local APIC being empty at bootup. @@ -1401,6 +1402,7 @@ void __init connect_bsp_APIC(void) outb(0x70, 0x22); outb(0x01, 0x23); } +#endif enable_apic_mode(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 48806546d49..5579e213b5d 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1285,10 +1285,26 @@ asmlinkage void smp_error_interrupt(void) } /** - * * connect_bsp_APIC - attach the APIC to the interrupt system - * */ + * connect_bsp_APIC - attach the APIC to the interrupt system + */ void __init connect_bsp_APIC(void) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's + * local APIC to INT and NMI lines. + */ + apic_printk(APIC_VERBOSE, "leaving PIC mode, " + "enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +#endif enable_apic_mode(); } -- cgit v1.2.3 From c43da2f5e92fe3bcc256f0c0d6cb858368da5bd9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:54 +0400 Subject: x86: apic - unify lapic_setup_esr We use 32bit code former for 64bit mode since it's much better implementation and easier to merge. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 21 +++++++++---------- arch/x86/kernel/apic_64.c | 51 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 50 insertions(+), 22 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6d230e9d0a7..3c1562afa27 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -983,6 +983,16 @@ static void __cpuinit lapic_setup_esr(void) { unsigned long oldvalue, value, maxlvt; if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } /* !82489DX */ maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ @@ -1003,16 +1013,7 @@ static void __cpuinit lapic_setup_esr(void) "vector: 0x%08lx after: 0x%08lx\n", oldvalue, value); } else { - if (esr_disable) - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - else - printk(KERN_INFO "No ESR for 82489DX.\n"); + printk(KERN_INFO "No ESR for 82489DX.\n"); } } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5579e213b5d..d74abf7e92f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -863,6 +863,45 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } +static void __cpuinit lapic_setup_esr(void) +{ + unsigned long oldvalue, value, maxlvt; + if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } + /* !82489DX */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08lx after: 0x%08lx\n", + oldvalue, value); + } else { + printk(KERN_INFO "No ESR for 82489DX.\n"); + } +} + + /** * setup_local_APIC - setup the local APIC */ @@ -968,18 +1007,6 @@ void __cpuinit setup_local_APIC(void) preempt_enable(); } -static void __cpuinit lapic_setup_esr(void) -{ - unsigned maxlvt = lapic_get_maxlvt(); - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); -} - void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); -- cgit v1.2.3 From c40aaec6868401671a0ca14ed77e9b2da2d1f223 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:55 +0400 Subject: x86: apic - unify __setup_APIC_LVTT Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 +++++++--- arch/x86/kernel/apic_64.c | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3c1562afa27..65419c7d437 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -248,8 +248,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 16 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 +#define APIC_DIVISOR 1 +#else #define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -281,8 +285,8 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) */ tmp_value = apic_read(APIC_TDCR); apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d74abf7e92f..fe57db9f3fb 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -259,8 +259,12 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* Clock divisor is set to 1 */ +/* Clock divisor */ +#ifdef CONFG_X86_64 #define APIC_DIVISOR 1 +#else +#define APIC_DIVISOR 16 +#endif /* * This function sets up the local APIC timer, with a timeout of @@ -291,9 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); + apic_write(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); if (!oneshot) apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -- cgit v1.2.3 From c177b0bc03e0e11623e2099db42903fb0caf0fd3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:56 +0400 Subject: x86: apic - unify disconnect_bsp_APIC - just #ifdef added Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 66 ++++++++++++++++++++++++----------------------- arch/x86/kernel/apic_64.c | 23 +++++++++++++++-- 2 files changed, 55 insertions(+), 34 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 65419c7d437..3095bb71eae 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1420,6 +1420,7 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 if (pic_mode) { /* * Put the board back into PIC mode (has an effect only on @@ -1431,47 +1432,48 @@ void disconnect_bsp_APIC(int virt_wire_setup) "entering PIC mode.\n"); outb(0x70, 0x22); outb(0x00, 0x23); - } else { - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + return; + } +#endif - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); + /* Go back to Virtual Wire compatibility mode */ + unsigned int value; - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + if (!virt_wire_setup) { /* - * For LVT1 make it edge triggered, active high, nmi and - * enabled + * For LVT0 make it edge triggered, active high, + * external and enabled */ - value = apic_read(APIC_LVT1); - value &= ~( - APIC_MODE_MASK | APIC_SEND_PENDING | + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); } + + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } void __cpuinit generic_processor_info(int apicid, int version) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index fe57db9f3fb..0d969103fce 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1348,8 +1348,24 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect only on + * certain older boards). Note that APIC interrupts, including + * IPIs, won't work beyond this point! The only exception are + * INIT IPIs. + */ + apic_printk(APIC_VERBOSE, "disabling APIC mode, " + "entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + return; + } +#endif + /* Go back to Virtual Wire compatibility mode */ - unsigned long value; + unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); @@ -1375,7 +1391,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT0, APIC_LVT_MASKED); } - /* For LVT1 make it edge triggered, active high, nmi and enabled */ + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ value = apic_read(APIC_LVT1); value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | -- cgit v1.2.3 From 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:57 +0400 Subject: x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 15 ++++++++------- arch/x86/kernel/apic_64.c | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3095bb71eae..c3a252b1a8b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1480,7 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) { int cpu; cpumask_t tmp_map; - physid_mask_t phys_cpu; /* * Validate version @@ -1493,9 +1492,6 @@ void __cpuinit generic_processor_info(int apicid, int version) } apic_version[apicid] = version; - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." " Processor ignored.\n", NR_CPUS); @@ -1512,17 +1508,19 @@ void __cpuinit generic_processor_info(int apicid, int version) cpus_complement(tmp_map, cpu_present_map); cpu = first_cpu(tmp_map); - if (apicid == boot_cpu_physical_apicid) + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. */ cpu = 0; - + } if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 /* * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y * but we need to work other dependencies like SMP_SUSPEND etc @@ -1542,7 +1540,9 @@ void __cpuinit generic_processor_info(int apicid, int version) def_to_bigsmp = 1; } } -#ifdef CONFIG_SMP +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1555,6 +1555,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } #endif + cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 0d969103fce..76c20773bed 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1215,6 +1215,8 @@ void __init init_apic_mappings(void) * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ +int apic_version[MAX_APICS]; + int __init APIC_init_uniprocessor(void) { if (disable_apic) { @@ -1409,15 +1411,26 @@ void __cpuinit generic_processor_info(int apicid, int version) int cpu; cpumask_t tmp_map; + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + if (num_processors >= NR_CPUS) { printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + " Processor ignored.\n", NR_CPUS); return; } if (num_processors >= maxcpus) { printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); + " Processor ignored.\n", maxcpus); return; } @@ -1437,6 +1450,29 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; +#ifdef CONFIG_X86_32 + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) /* are we being called early in kernel startup? */ if (early_per_cpu_ptr(x86_cpu_to_apicid)) { u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); @@ -1448,6 +1484,7 @@ void __cpuinit generic_processor_info(int apicid, int version) per_cpu(x86_cpu_to_apicid, cpu) = apicid; per_cpu(x86_bios_cpu_apicid, cpu) = apicid; } +#endif cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); -- cgit v1.2.3 From fa6b95fc7c6f2f3eb1560e1f33cd13197546c5a0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:58 +0400 Subject: x86: apic - unify end_local_APIC_setup Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 ++++-- arch/x86/kernel/apic_64.c | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index c3a252b1a8b..f1882329b9c 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1155,13 +1155,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { - unsigned long value; - lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; /* Disable the local apic timer */ value = apic_read(APIC_LVTT); value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, value); +#endif setup_apic_nmi_watchdog(NULL); apic_pm_activate(); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 76c20773bed..eec10b34dc4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1014,6 +1014,15 @@ void __cpuinit setup_local_APIC(void) void __cpuinit end_local_APIC_setup(void) { lapic_setup_esr(); + +#ifdef CONFIG_X86_32 + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); +#endif + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } -- cgit v1.2.3 From 0b23e8cf553f5e706b0057363f1319867bcd1a7d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:45:59 +0400 Subject: x86: apic - unify local_apic_timer_interrupt Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 4 ++++ arch/x86/kernel/apic_64.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f1882329b9c..af227bce23b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -685,7 +685,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 + add_pda(apic_timer_irqs, 1); +#else per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index eec10b34dc4..a9ad2cb4ff9 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -567,7 +567,11 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ +#ifdef CONFIG_X86_64 add_pda(apic_timer_irqs, 1); +#else + per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif evt->event_handler(evt); } -- cgit v1.2.3 From 79af9bec60e6e218a1e48e8830d603d64a7fc441 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:00 +0400 Subject: x86: apic - unify apic_set_verbosity Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 17 ++++++++++++++--- arch/x86/kernel/apic_64.c | 46 +++++++++++++++++++++++++--------------------- 2 files changed, 39 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index af227bce23b..acbc4dea2ee 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1768,13 +1768,24 @@ early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); static int __init apic_set_verbosity(char *arg) { - if (!arg) + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif return -EINVAL; + } - if (strcmp(arg, "debug") == 0) + if (strcmp("debug", arg) == 0) apic_verbosity = APIC_DEBUG; - else if (strcmp(arg, "verbose") == 0) + else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index a9ad2cb4ff9..999781810c0 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,27 +1759,6 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static int __init apic_set_verbosity(char *str) -{ - if (str == NULL) { - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; - } - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", str); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - static __init int setup_disableapic(char *str) { disable_apic = 1; @@ -1824,6 +1803,31 @@ static __init int setup_apicpmtimer(char *s) } __setup("apicpmtimer", setup_apicpmtimer); +static int __init apic_set_verbosity(char *arg) +{ + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; +#endif + return -EINVAL; + } + + if (strcmp("debug", arg) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", arg) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic", apic_set_verbosity); + static int __init lapic_insert_resource(void) { if (!apic_phys) -- cgit v1.2.3 From 789fa735712d726b5cfa5c6be57171b5637a4872 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:01 +0400 Subject: x86: apic - unify disableapic and nolapic setup handlers Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 11 +++++++++-- arch/x86/kernel/apic_64.c | 6 +++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index acbc4dea2ee..5680bda62f7 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1737,13 +1737,20 @@ static int __init parse_lapic(char *arg) } early_param("lapic", parse_lapic); -static int __init parse_nolapic(char *arg) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } -early_param("nolapic", parse_nolapic); +early_param("disableapic", setup_disableapic); + +/* same as disableapic, for compatibility */ +static int __init setup_nolapic(char *arg) +{ + return setup_disableapic(arg); +} +early_param("nolapic", setup_nolapic); static int __init parse_disable_apic_timer(char *arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 999781810c0..7a317180ba2 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1759,7 +1759,7 @@ early_param("nox2apic", setup_nox2apic); /* * APIC command line parameters */ -static __init int setup_disableapic(char *str) +static int __init setup_disableapic(char *arg) { disable_apic = 1; setup_clear_cpu_cap(X86_FEATURE_APIC); @@ -1768,9 +1768,9 @@ static __init int setup_disableapic(char *str) early_param("disableapic", setup_disableapic); /* same as disableapic, for compatibility */ -static __init int setup_nolapic(char *str) +static int __init setup_nolapic(char *arg) { - return setup_disableapic(str); + return setup_disableapic(arg); } early_param("nolapic", setup_nolapic); -- cgit v1.2.3 From 3415610b8e38b26b52a430b8846665c2d6bb3558 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:02 +0400 Subject: x86: apic - rearrange parse_lapic_timer_c2_ok Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 5680bda62f7..885e306420a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1752,6 +1752,13 @@ static int __init setup_nolapic(char *arg) } early_param("nolapic", setup_nolapic); +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + static int __init parse_disable_apic_timer(char *arg) { disable_apic_timer = 1; @@ -1766,13 +1773,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - static int __init apic_set_verbosity(char *arg) { if (!arg) { -- cgit v1.2.3 From e75bedf415f300a08e9bbcc755784e488574a73e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 20:46:03 +0400 Subject: x86: apic - lapic_resume 32bit - unification fix Just add parenthesis to be identical of current 64bit implementation (so diff will not complain). Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 885e306420a..e975562a76a 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1646,6 +1646,7 @@ static int lapic_resume(struct sys_device *dev) enable_x2apic(); else #endif + { /* * Make sure the APICBASE points to the right address * @@ -1656,6 +1657,7 @@ static int lapic_resume(struct sys_device *dev) l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); + } apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); -- cgit v1.2.3 From 1b4ee4e4096d430c4c12516c1d30a6b0b4f9e9e4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 Aug 2008 23:12:33 +0400 Subject: x86: apic - compilation warnings fix Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 15 +++++++++------ arch/x86/kernel/apic_64.c | 15 +++++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index e975562a76a..b8d80c29165 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1162,11 +1162,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1426,6 +1428,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1443,7 +1447,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 7a317180ba2..37e037606f3 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1020,11 +1020,13 @@ void __cpuinit end_local_APIC_setup(void) lapic_setup_esr(); #ifdef CONFIG_X86_32 - unsigned int value; - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, value); + { + unsigned int value; + /* Disable the local apic timer */ + value = apic_read(APIC_LVTT); + value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, value); + } #endif setup_apic_nmi_watchdog(NULL); @@ -1363,6 +1365,8 @@ void __init connect_bsp_APIC(void) */ void disconnect_bsp_APIC(int virt_wire_setup) { + unsigned int value; + #ifdef CONFIG_X86_32 if (pic_mode) { /* @@ -1380,7 +1384,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) #endif /* Go back to Virtual Wire compatibility mode */ - unsigned int value; /* For the spurious interrupt use vector F, and enable it */ value = apic_read(APIC_SPIV); -- cgit v1.2.3 From 11494547b1754c4f3bd7f707ab869e2adf54d52f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 01:01:19 -0700 Subject: x86: fix apic version warning after following patch, commit 1b313f4a6d7bee7b2c034b3f1e203bc360a71cca Author: Cyrill Gorcunov Date: Mon Aug 18 20:45:57 2008 +0400 x86: apic - generic_processor_info - use physid_set instead of phys_cpu and physids_or - set phys_cpu_present_map bit AFTER check for allowed number of processors - add checking for APIC valid version in 64bit mode (mostly not needed but added for merging purpose) - add apic_version definition for 64bit mode which is used now we are getting warning for acpi path on 64 bit system. make the 64-bit side fill in apic_version[] as well. [ mingo@elte.hu: build fix ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 12e260e8fb2..d9770a56511 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,10 +239,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled) return; } -#ifdef CONFIG_X86_32 if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; -#endif generic_processor_info(id, ver); } @@ -762,10 +760,8 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { boot_cpu_physical_apicid = read_apic_id(); -#ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); -#endif } } -- cgit v1.2.3 From 671eef85a3e885dff4ce210d8774ad50a91d5967 Mon Sep 17 00:00:00 2001 From: "Cihula, Joseph" Date: Wed, 20 Aug 2008 16:43:07 -0700 Subject: x86, e820: add support for AddressRangeUnusuable ACPI memory type Add support for the E820_UNUSABLE memory type, which is defined in Revision 3.0b (Oct. 10, 2006) of the ACPI Specification on p. 394 Table 14-1: AddressRangeUnusuable This range of address contains memory in which errors have been detected. This range must not be used by the OSPM. Signed-off-by: Joseph Cihula Signed-off-by: Shane Wang Signed-off-by: Gang Wei Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7b..291e6cd9f9c 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -148,6 +148,9 @@ void __init e820_print_map(char *who) case E820_NVS: printk(KERN_CONT "(ACPI NVS)\n"); break; + case E820_UNUSABLE: + printk("(unusable)\n"); + break; default: printk(KERN_CONT "type %u\n", e820.map[i].type); break; @@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type) case E820_RAM: return "System RAM"; case E820_ACPI: return "ACPI Tables"; case E820_NVS: return "ACPI Non-volatile Storage"; + case E820_UNUSABLE: return "Unusable memory"; default: return "reserved"; } } -- cgit v1.2.3 From f86399396ce7a4f4069828b7dceac5aa5113dfb5 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 30 Jul 2008 18:32:27 -0300 Subject: x86, paravirt_ops: use unsigned long instead of u32 for alloc_p*() pfn args This patch changes the pfn args from 'u32' to 'unsigned long' on alloc_p*() functions on paravirt_ops, and the corresponding implementations for Xen and VMI. The prototypes for CONFIG_PARAVIRT=n are already using unsigned long, so paravirt.h now matches the prototypes on asm-x86/pgalloc.h. It shouldn't result in any changes on generated code on 32-bit, with or without CONFIG_PARAVIRT. On both cases, 'codiff -f' didn't show any change after applying this patch. On 64-bit, there are (expected) binary changes only when CONFIG_PARAVIRT is enabled, as the patch is really supposed to change the size of the pfn args. [ v2: KVM_GUEST: use the right parameter type on kvm_release_pt() ] Signed-off-by: Eduardo Habkost Acked-by: Jeremy Fitzhardinge Acked-by: Zachary Amsden Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvm.c | 2 +- arch/x86/kernel/vmi_32.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8b7a3cf37d2..478bca986ec 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -178,7 +178,7 @@ static void kvm_flush_tlb(void) kvm_deferred_mmu_op(&ftlb, sizeof ftlb); } -static void kvm_release_pt(u32 pfn) +static void kvm_release_pt(unsigned long pfn) { struct kvm_mmu_op_release_pt rpt = { .header.op = KVM_MMU_OP_RELEASE_PT, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 0a1b1a9d922..340b03643b9 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); } -static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) { /* * This call comes in very early, before mem_map is setup. @@ -409,20 +409,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); } -static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) +static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) { vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); vmi_check_page_type(clonepfn, VMI_PAGE_L2); vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); } -static void vmi_release_pte(u32 pfn) +static void vmi_release_pte(unsigned long pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L1); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); } -static void vmi_release_pmd(u32 pfn) +static void vmi_release_pmd(unsigned long pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L2); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); -- cgit v1.2.3 From 94581094e774402a11887719bac10505236c2d51 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:39 +0200 Subject: x86: add alloc_coherent dma_ops callback to GART driver [ v2 - x86: make gart_alloc_coherent return zeroed memory FUJITA Tomonori pointed it out that the dma_alloc_coherent function should return memory set to zero. This patch adds this to the GART implementation too. ] Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d..076e64b2d4f 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -499,6 +499,26 @@ error: return 0; } +/* allocate and map a coherent mapping */ +static void * +gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, + gfp_t flag) +{ + void *vaddr; + + vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); + if (!vaddr) + return NULL; + + *dma_addr = gart_map_single(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL); + if (*dma_addr != bad_dma_address) + return vaddr; + + free_pages((unsigned long)vaddr, get_order(size)); + + return NULL; +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -701,6 +721,7 @@ static struct dma_mapping_ops gart_dma_ops = { .sync_sg_for_device = NULL, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, + .alloc_coherent = gart_alloc_coherent, }; void gart_iommu_shutdown(void) -- cgit v1.2.3 From 43a5a5a09b8cfd56047706cf904718d073ccfd33 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:40 +0200 Subject: x86: add free_coherent dma_ops callback to GART driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 076e64b2d4f..ef753e23358 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -519,6 +519,15 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, return NULL; } +/* free a coherent mapping */ +static void +gart_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr) +{ + gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vaddr, get_order(size)); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -722,6 +731,7 @@ static struct dma_mapping_ops gart_dma_ops = { .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .alloc_coherent = gart_alloc_coherent, + .free_coherent = gart_free_coherent, }; void gart_iommu_shutdown(void) -- cgit v1.2.3 From e4ad68b651f22fa71820c0b30bb2807999b2b49f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:41 +0200 Subject: x86: add free_coherent dma_ops callback to Calgary IOMMU driver Signed-off-by: Joerg Roedel Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 218d783ed7a..afb020fdb19 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -511,8 +511,22 @@ error: return ret; } +static void calgary_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + unsigned int npages; + struct iommu_table *tbl = find_iommu_table(dev); + + size = PAGE_ALIGN(size); + npages = size >> PAGE_SHIFT; + + iommu_free(tbl, dma_handle, npages); + free_pages((unsigned long)vaddr, get_order(size)); +} + static struct dma_mapping_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, + .free_coherent = calgary_free_coherent, .map_single = calgary_map_single, .unmap_single = calgary_unmap_single, .map_sg = calgary_map_sg, -- cgit v1.2.3 From c5e835f9641e9fcb95d1afb24906821e98b2c6a8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:42 +0200 Subject: x86: add alloc_coherent dma_ops callback to NOMMU driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 55 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3f91f71cdc3..b8ce83c9821 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,7 +72,62 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } +static void * +nommu_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_addr, gfp_t gfp) +{ + unsigned long dma_mask; + int node; + struct page *page; + + if (hwdev->dma_mask == NULL) + return NULL; + + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + gfp |= __GFP_ZERO; + + dma_mask = hwdev->coherent_dma_mask; + if (!dma_mask) + dma_mask = *(hwdev->dma_mask); + + if (dma_mask < DMA_24BIT_MASK) + return NULL; + + node = dev_to_node(hwdev); + +#ifdef CONFIG_X86_64 + if (dma_mask <= DMA_32BIT_MASK) + gfp |= GFP_DMA32; +#endif + + /* No alloc-free penalty for ISA devices */ + if (dma_mask == DMA_24BIT_MASK) + gfp |= GFP_DMA; + +again: + page = alloc_pages_node(node, gfp, get_order(size)); + if (!page) + return NULL; + + if ((page_to_phys(page) + size > dma_mask) && !(gfp & GFP_DMA)) { + free_pages((unsigned long)page_address(page), get_order(size)); + gfp |= GFP_DMA; + goto again; + } + + *dma_addr = page_to_phys(page); + if (check_addr("alloc_coherent", hwdev, *dma_addr, size)) { + flush_write_buffers(); + return page_address(page); + } + + free_pages((unsigned long)page_address(page), get_order(size)); + + return NULL; +} + struct dma_mapping_ops nommu_dma_ops = { + .alloc_coherent = nommu_alloc_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, .is_phys = 1, -- cgit v1.2.3 From a3a76532e0caa093c279806d8fe8608232538af0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:43 +0200 Subject: x86: add free_coherent dma_ops callback to NOMMU driver Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index b8ce83c9821..73853d3fdca 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -126,8 +126,15 @@ again: return NULL; } +static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + struct dma_mapping_ops nommu_dma_ops = { .alloc_coherent = nommu_alloc_coherent, + .free_coherent = nommu_free_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, .is_phys = 1, -- cgit v1.2.3 From c647c3bb2d16246a87f49035985ddb7c1eb030df Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:44 +0200 Subject: x86: cleanup dma_*_coherent functions All dma_ops implementations support the alloc_coherent and free_coherent callbacks now. This allows a big simplification of the dma_alloc_coherent function which is done with this patch. The dma_free_coherent functions is also cleaned up and calls now the free_coherent callback of the dma_ops implementation. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 121 +++++----------------------------------------- 1 file changed, 12 insertions(+), 109 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 87d4d6964ec..613332b26e3 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -241,33 +241,15 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* Allocate DMA memory on node near device */ -static noinline struct page * -dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) -{ - int node; - - node = dev_to_node(dev); - - return alloc_pages_node(node, gfp, order); -} - /* * Allocate memory for a coherent mapping. */ -void * + void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory = NULL; - struct page *page; - unsigned long dma_mask = 0; - dma_addr_t bus; - int noretry = 0; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + void *memory; if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; @@ -276,89 +258,10 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, dev = &fallback_dev; gfp |= GFP_DMA; } - dma_mask = dev->coherent_dma_mask; - if (dma_mask == 0) - dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; - - /* Device not DMA able */ - if (dev->dma_mask == NULL) - return NULL; - - /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ - if (gfp & __GFP_DMA) - noretry = 1; - -#ifdef CONFIG_X86_64 - /* Why <=? Even when the mask is smaller than 4GB it is often - larger than 16MB and in this case we have a chance of - finding fitting memory in the next higher zone first. If - not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp |= GFP_DMA32; - if (dma_mask < DMA_32BIT_MASK) - noretry = 1; - } -#endif - again: - page = dma_alloc_pages(dev, - noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); - if (page == NULL) - return NULL; - - { - int high, mmu; - bus = page_to_phys(page); - memory = page_address(page); - high = (bus + size) >= dma_mask; - mmu = high; - if (force_iommu && !(gfp & GFP_DMA)) - mmu = 1; - else if (high) { - free_pages((unsigned long)memory, - get_order(size)); - - /* Don't use the 16MB ZONE_DMA unless absolutely - needed. It's better to use remapping first. */ - if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - /* Let low level make its own zone decisions */ - gfp &= ~(GFP_DMA32|GFP_DMA); - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; - } - - memset(memory, 0, size); - if (!mmu) { - *dma_handle = bus; - return memory; - } - } - - if (ops->alloc_coherent) { - free_pages((unsigned long)memory, get_order(size)); - gfp &= ~(GFP_DMA|GFP_DMA32); - return ops->alloc_coherent(dev, size, dma_handle, gfp); - } - - if (ops->map_simple) { - *dma_handle = ops->map_simple(dev, virt_to_phys(memory), - size, - PCI_DMA_BIDIRECTIONAL); - if (*dma_handle != bad_dma_address) - return memory; - } - - if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", - (unsigned long)size); - free_pages((unsigned long)memory, get_order(size)); + if (ops->alloc_coherent) + return ops->alloc_coherent(dev, size, + dma_handle, gfp); return NULL; } EXPORT_SYMBOL(dma_alloc_coherent); @@ -368,17 +271,17 @@ EXPORT_SYMBOL(dma_alloc_coherent); * The caller must ensure that the device has finished accessing the mapping. */ void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) + void *vaddr, dma_addr_t bus) { struct dma_mapping_ops *ops = get_dma_ops(dev); - int order = get_order(size); - WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_from_coherent(dev, order, vaddr)) + WARN_ON(irqs_disabled()); /* for portability */ + + if (dma_release_from_coherent(dev, get_order(size), vaddr)) return; - if (ops->unmap_single) - ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, order); + + if (ops->free_coherent) + ops->free_coherent(dev, size, vaddr, bus); } EXPORT_SYMBOL(dma_free_coherent); -- cgit v1.2.3 From 6c505ce3930c6a6b455cda53fab3e88ae44f8221 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:45 +0200 Subject: x86: move dma_*_coherent functions to include file All the x86 DMA-API functions are defined in asm/dma-mapping.h. This patch moves the dma_*_coherent functions also to this header file because they are now small enough to do so. This is done as a separate patch because it also includes some renaming and restructuring of the dma-mapping.h file. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 49 +++---------------------------------------- arch/x86/kernel/pci-gart_64.c | 4 ++-- 2 files changed, 5 insertions(+), 48 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 613332b26e3..0a1408abcc6 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible to older i386. */ -struct device fallback_dev = { +struct device x86_dma_fallback_dev = { .bus_id = "fallback device", .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, + .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; +EXPORT_SYMBOL(x86_dma_fallback_dev); int dma_set_mask(struct device *dev, u64 mask) { @@ -241,50 +242,6 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -/* - * Allocate memory for a coherent mapping. - */ - void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - void *memory; - - if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) - return memory; - - if (!dev) { - dev = &fallback_dev; - gfp |= GFP_DMA; - } - - if (ops->alloc_coherent) - return ops->alloc_coherent(dev, size, - dma_handle, gfp); - return NULL; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -/* - * Unmap coherent memory. - * The caller must ensure that the device has finished accessing the mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) -{ - struct dma_mapping_ops *ops = get_dma_ops(dev); - - WARN_ON(irqs_disabled()); /* for portability */ - - if (dma_release_from_coherent(dev, get_order(size), vaddr)) - return; - - if (ops->free_coherent) - ops->free_coherent(dev, size, vaddr, bus); -} -EXPORT_SYMBOL(dma_free_coherent); - static int __init pci_iommu_init(void) { calgary_iommu_init(); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ef753e23358..e81df25dc06 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -276,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) unsigned long bus; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; if (!need_iommu(dev, paddr, size)) return paddr; @@ -427,7 +427,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) return 0; if (!dev) - dev = &fallback_dev; + dev = &x86_dma_fallback_dev; out = 0; start = 0; -- cgit v1.2.3 From 2cd54961caff9fe9109807c6603a0af0729b9591 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 19 Aug 2008 16:32:46 +0200 Subject: x86, AMD IOMMU: remove obsolete FIXME comment Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index de39e1f2ede..d15081c3823 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1038,8 +1038,6 @@ out: /* * The exported free_coherent function for dma_ops. - * FIXME: fix the generic x86 DMA layer so that it actually calls that - * function. */ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) -- cgit v1.2.3 From 766af9fa812f49feb4a3e62cf92f3d37f33c7fb6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 00:12:09 +0900 Subject: dma-mapping.h, x86: remove last user of dma_mapping_ops->map_simple pci-dma.c doesn't use map_simple hook any more so we can remove it from struct dma_mapping_ops now. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e81df25dc06..cfd7ec25e37 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -720,7 +720,6 @@ extern int agp_amd64_init(void); static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, - .map_simple = gart_map_simple, .unmap_single = gart_unmap_single, .sync_single_for_cpu = NULL, .sync_single_for_device = NULL, -- cgit v1.2.3 From 421076e2bec1b917bdcf83da35b24f6349bf35db Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 22 Aug 2008 16:29:10 +0900 Subject: x86: dma_*_coherent rework patchset v2, fix alloc_coherent dma_ops callback was added to GART, however, it doesn't return a size aligned address wrt dma_alloc_coherent, as DMA-mapping.txt defines. This patch fixes it. This patch also removes unused gart_map_simple (dma_mapping_ops->map_simple has gone). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 338c4f24155..4d0864900b8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -261,20 +261,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); } -static dma_addr_t -gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) -{ - dma_addr_t map; - unsigned long align_mask; - - align_mask = (1UL << get_order(size)) - 1; - map = dma_map_area(dev, paddr, size, dir, align_mask); - - flush_gart(); - - return map; -} - /* Map a single area into the IOMMU */ static dma_addr_t gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) @@ -512,12 +498,21 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { void *vaddr; + unsigned long align_mask; vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (!vaddr) return NULL; - *dma_addr = gart_map_single(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL); + align_mask = (1UL << get_order(size)) - 1; + + if (!dev) + dev = &x86_dma_fallback_dev; + + *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, + align_mask); + flush_gart(); + if (*dma_addr != bad_dma_address) return vaddr; -- cgit v1.2.3 From b05f78f5c713eda2c34e495d92495ee4f1c3b5e1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 22 Aug 2008 01:32:50 -0700 Subject: x86_64: printout msr -v2 commandline show_msr=1 for bsp, show_msr=32 for all 32 cpus. [ mingo@elte.hu: added documentation ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 51 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/paravirt.c | 1 + 2 files changed, 52 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f15017..bca2d6980e8 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -394,6 +394,49 @@ static __init int setup_noclflush(char *arg) } __setup("noclflush", setup_noclflush); +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) @@ -403,6 +446,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " stepping %02x\n", c->x86_mask); else printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52d79..c6044682e1e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = { #endif .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, + .read_msr_amd = native_read_msr_amd_safe, .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, -- cgit v1.2.3 From bbb65d2d365efe9951290e61678dcf81ec60add4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:10 +0200 Subject: x86: use cpuid vector 0xb when available for detecting cpu topology cpuid leaf 0xb provides extended topology enumeration. This interface provides the 32-bit x2APIC id of the logical processor and it also provides a new mechanism to detect SMT and core siblings (which provides increased addressability). Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 86 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 3 ++ arch/x86/kernel/cpu/intel.c | 13 +++-- arch/x86/kernel/cpu/intel_64.c | 5 +- 4 files changed, 103 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a607..aa9641ae670 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,6 +7,8 @@ #include #include +#include + struct cpuid_bit { u16 feature; u8 reg; @@ -48,6 +50,90 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) } } +/* leaf 0xb SMT level */ +#define SMT_LEVEL 0 + +/* leaf 0xb sub-leaf types */ +#define INVALID_TYPE 0 +#define SMT_TYPE 1 +#define CORE_TYPE 2 + +#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) +#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) +#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) + +/* + * Check for extended topology enumeration cpuid leaf 0xb and if it + * exists, use it for populating initial_apicid and cpu topology + * detection. + */ +void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx, sub_index; + unsigned int ht_mask_width, core_plus_mask_width; + unsigned int core_select_mask, core_level_siblings; + + if (c->cpuid_level < 0xb) + return; + + cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + + /* + * check if the cpuid leaf 0xb is actually implemented. + */ + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) + return; + + set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); + + /* + * initial apic id, which also represents 32-bit extended x2apic id. + */ + c->initial_apicid = edx; + + /* + * Populate HT related information from sub-leaf level 0. + */ + core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + + sub_index = 1; + do { + cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + + /* + * Check for the Core type in the implemented sub leaves. + */ + if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { + core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + break; + } + + sub_index++; + } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); + + core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; + +#ifdef CONFIG_X86_32 + c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) + & core_select_mask; + c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); +#else + c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; + c->phys_proc_id = phys_pkg_id(core_plus_mask_width); +#endif + c->x86_max_cores = (core_level_siblings / smp_num_siblings); + + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + return; +} + #ifdef CONFIG_X86_PAT void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index af569a964e7..a2888c7b56a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -128,6 +128,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; int index_msb, core_bits; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; + cpuid(1, &eax, &ebx, &ecx, &edx); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77618c717d7..58a6f1a0b29 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -176,9 +176,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - c->x86_max_cores = num_cpu_cores(c); - - detect_ht(c); + detect_extended_topology(c); + + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = num_cpu_cores(c); + detect_ht(c); + } /* Work around errata */ Intel_errata_workarounds(c); diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f..42d501a8c41 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -80,7 +80,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - c->x86_max_cores = intel_num_cpu_cores(c); + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); } -- cgit v1.2.3 From e17941b0c140562d92e5a3bc12b4ad88281c7926 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:11 +0200 Subject: x86: use x2apic id reported by cpuid during topology discovery use x2apic id reported by cpuid during topology discovery, instead of the apic id configured in the APIC. For most of the systems, x2apic id reported by cpuid leaf 0xb will be same as the physical apic id reported by the APIC_ID register of the APIC. We follow the suggested guidelines and use the apic id reported by the cpuid. No change to non-generic UV platforms, will use the apic id reported in the APIC_ID register as the cpuid reported apic id's may not be unique. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_cluster.c | 7 +------ arch/x86/kernel/genx2apic_phys.c | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index ef3f3182d50..0bf0a8624b8 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -120,14 +120,9 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int x2apic_read_id(void) -{ - return apic_read(APIC_ID); -} - static unsigned int phys_pkg_id(int index_msb) { - return x2apic_read_id() >> index_msb; + return current_cpu_data.initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3229c68aedd..e2401ab0c46 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -118,14 +118,9 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int x2apic_read_id(void) -{ - return apic_read(APIC_ID); -} - static unsigned int phys_pkg_id(int index_msb) { - return x2apic_read_id() >> index_msb; + return current_cpu_data.initial_apicid >> index_msb; } void x2apic_send_IPI_self(int vector) -- cgit v1.2.3 From c7ffa6c26277b403920e2255d10df849bd613380 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 25 Aug 2008 13:11:27 +0300 Subject: x86: default to reboot via ACPI Triple-fault and keyboard reset may assert INIT instead of RESET; however INIT is blocked when Intel VT is enabled. This leads to a partially reset machine when invoking emergency_restart via sysrq-b: the processor is still working but other parts of the system are dead. Default to rebooting via ACPI, which correctly asserts RESET and reboots the machine. This is safe since we will fall back to keyboard reset and triple fault if acpi is not enabled or if the reset is not successful. Signed-off-by: Avi Kivity Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 724adfc63cb..f4c93f1cfc1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -enum reboot_type reboot_type = BOOT_KBD; +/* + * Keyboard reset and triple fault may result in INIT, not RESET, which + * doesn't work when we're in vmx root mode. Try ACPI first. + */ +enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) -- cgit v1.2.3 From 11c231a962c740b3216eb6565149ae5a7944cba7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 23 Aug 2008 17:47:11 +0200 Subject: x86: use x2apic id reported by cpuid during topology discovery, fix v2: Fix for !SMP build Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index aa9641ae670..a2d1a545767 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,6 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { +#ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; @@ -132,6 +133,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); return; +#endif } #ifdef CONFIG_X86_PAT -- cgit v1.2.3 From 2c7e9fd4c6cb7f4b0bc7162e9a30847e51a1ca1b Mon Sep 17 00:00:00 2001 From: Joe Korty Date: Wed, 27 Aug 2008 10:35:06 -0400 Subject: x86: make poll_idle behave more like the other idle methods Make poll_idle() behave more like the other idle methods. Currently, poll_idle() returns immediately. The other idle methods all wait indefinately for some condition to come true before returning. poll_idle should emulate these other methods and also wait for a return condition, in this case, for need_resched() to become 'true'. Without this delay the idle loop spends all of its time in the outer loop that calls poll_idle. This outer loop, these days, does real work, some of it under rcu locks. That work should only be done when idle is entered and when idle exits, not continuously while idle is spinning. Signed-off-by: Joe Korty Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a..4e09d26748c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -185,7 +185,8 @@ static void mwait_idle(void) static void poll_idle(void) { local_irq_enable(); - cpu_relax(); + while (!need_resched()) + cpu_relax(); } /* -- cgit v1.2.3 From 1625324d22409e32e3f8eb86018cad72e1c09d61 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 27 Aug 2008 23:01:16 -0700 Subject: x86: move dir es7000 to es7000_32.c to be aligned with numaq, summit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/es7000_32.c | 345 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 346 insertions(+) create mode 100644 arch/x86/kernel/es7000_32.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a07ec14f331..dac24c4390d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o +obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c new file mode 100644 index 00000000000..849e5cd485b --- /dev/null +++ b/arch/x86/kernel/es7000_32.c @@ -0,0 +1,345 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + +#define MIP_REG 1 +#define MIP_PSAI_REG 4 + +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL +#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) + +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) + +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; + +struct part_info { + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; +}; + +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; + +struct es7000_mem_info { + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; +}; + +struct es7000_oem_table { + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; +}; + +#ifdef CONFIG_ACPI + +struct oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; + +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + +struct mip_reg { + unsigned long long off_0; + unsigned long long off_8; + unsigned long long off_10; + unsigned long long off_18; + unsigned long long off_20; + unsigned long long off_28; + unsigned long long off_30; + unsigned long long off_38; +}; + +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + +/* + * ES7000 Globals + */ + +static volatile unsigned long *psai = NULL; +static struct mip_reg *mip_reg; +static struct mip_reg *host_reg; +static int mip_port; +static unsigned long mip_addr, host_addr; + +int es7000_plat; + +/* + * GSI override for ES7000 platforms. + */ + +static unsigned int base; + +static int +es7000_rename_gsi(int ioapic, int gsi) +{ + if (es7000_plat == ES7000_ZORRO) + return gsi; + + if (!base) { + int i; + for (i = 0; i < nr_ioapics; i++) + base += nr_ioapic_registers[i]; + } + + if (!ioapic && (gsi < 16)) + gsi += base; + return gsi; +} + +void __init +setup_unisys(void) +{ + /* + * Determine the generation of the ES7000 currently running. + * + * es7000_plat = 1 if the machine is a 5xx ES7000 box + * es7000_plat = 2 if the machine is a x86_64 ES7000 box + * + */ + if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) + es7000_plat = ES7000_ZORRO; + else + es7000_plat = ES7000_CLASSIC; + ioapic_renumber_irq = es7000_rename_gsi; +} + +/* + * Parse the OEM Table + */ + +int __init +parse_unisys_oem (char *oemptr) +{ + int i; + int success = 0; + unsigned char type, size; + unsigned long val; + char *tp = NULL; + struct psai *psaip = NULL; + struct mip_reg_info *mi; + struct mip_reg *host, *mip; + + tp = oemptr; + + tp += 8; + + for (i=0; i <= 6; i++) { + type = *tp++; + size = *tp++; + tp -= 2; + switch (type) { + case MIP_REG: + mi = (struct mip_reg_info *)tp; + val = MIP_RD_LO(mi->host_reg); + host_addr = val; + host = (struct mip_reg *)val; + host_reg = __va(host); + val = MIP_RD_LO(mi->mip_reg); + mip_port = MIP_PORT(mi->mip_info); + mip_addr = val; + mip = (struct mip_reg *)val; + mip_reg = __va(mip); + pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", + (unsigned long)host_reg); + pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", + (unsigned long)mip_reg); + success++; + break; + case MIP_PSAI_REG: + psaip = (struct psai *)tp; + if (tp != NULL) { + if (psaip->addr) + psai = __va(psaip->addr); + else + psai = NULL; + success++; + } + break; + default: + break; + } + tp += size; + } + + if (success < 2) { + es7000_plat = NON_UNISYS; + } else + setup_unisys(); + return es7000_plat; +} + +#ifdef CONFIG_ACPI +int __init +find_unisys_acpi_oem_table(unsigned long *oem_addr) +{ + struct acpi_table_header *header = NULL; + int i = 0; + while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { + if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { + struct oem_table *t = (struct oem_table *)header; + *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, + t->OEMTableSize); + return 0; + } + } + return -1; +} +#endif + +static void +es7000_spin(int n) +{ + int i = 0; + + while (i++ < n) + rep_nop(); +} + +static int __init +es7000_mip_write(struct mip_reg *mip_reg) +{ + int status = 0; + int spin; + + spin = MIP_SPIN; + while (((unsigned long long)host_reg->off_38 & + (unsigned long long)MIP_VALID) != 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); + outb(1, mip_port); + + spin = MIP_SPIN; + + while (((unsigned long long)mip_reg->off_38 & + (unsigned long long)MIP_VALID) == 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + status = ((unsigned long long)mip_reg->off_0 & + (unsigned long long)0xffff0000000000ULL) >> 48; + mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & + (unsigned long long)~MIP_VALID); + return status; +} + +int +es7000_start_cpu(int cpu, unsigned long eip) +{ + unsigned long vect = 0, psaival = 0; + + if (psai == NULL) + return -1; + + vect = ((unsigned long)__pa(eip)/0x1000) << 16; + psaival = (0x1000000 | vect | cpu); + + while (*psai & 0x1000000) + ; + + *psai = psaival; + + return 0; + +} + +void __init +es7000_sw_apic(void) +{ + if (es7000_plat) { + int mip_status; + struct mip_reg es7000_mip_reg; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = (MIP_VALID); + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) + printk("es7000_sw_apic: command failed, status = %x\n", + mip_status); + return; + } +} -- cgit v1.2.3 From cce3e057242d3d46fea07b9eb3910b0076419be5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:44 +0000 Subject: x86: TSC: define the PIT latch value separate Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 346cae5ac42..aa11413e7c1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,6 +122,10 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +#define CAL_MS 50 +#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) +#define CAL_PIT_LOOPS 5000 + /* * Try to calibrate the TSC against the Programmable * Interrupt Timer and return the frequency of the TSC @@ -144,8 +148,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + outb(CAL_LATCH & 0xff, 0x42); + outb(CAL_LATCH >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -166,18 +170,18 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than 5000 + * If we were not able to read the PIT more than PIT_MIN_LOOPS * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < 5000 || tscmax > 10 * tscmin) + if (pitcnt < CAL_PIT_LOOPS || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, 50); + do_div(delta, CAL_MS); return delta; } -- cgit v1.2.3 From d683ef7afe8b6dbac6a3c681cef8a908357793ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:48 +0000 Subject: x86: TSC: separate hpet/pmtimer calculation out Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 56 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index aa11413e7c1..ebb9bf824a0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -122,6 +122,43 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) return ULLONG_MAX; } +/* + * Calculate the TSC frequency from HPET reference + */ +static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) +{ + u64 tmp; + + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tmp, 1000000); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + +/* + * Calculate the TSC frequency from PMTimer reference + */ +static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) +{ + u64 tmp; + + if (!pm1 && !pm2) + return ULONG_MAX; + + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tmp = pm2 * 1000000000LL; + do_div(tmp, PMTMR_TICKS_PER_SEC); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + #define CAL_MS 50 #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) #define CAL_PIT_LOOPS 5000 @@ -247,22 +284,11 @@ unsigned long native_calibrate_tsc(void) continue; tsc2 = (tsc2 - tsc1) * 1000000LL; + if (hpet) + tsc2 = calc_hpet_ref(tsc2, hpet1, hpet2); + else + tsc2 = calc_pmtimer_ref(tsc2, pm1, pm2); - if (hpet) { - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); - } - - do_div(tsc2, tsc1); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); } -- cgit v1.2.3 From 827014be05e4515fa0dfc32e3100c4dab2070a98 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:53 +0000 Subject: x86: TSC: use one set of reference variables Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ebb9bf824a0..52284d31fc9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *p, int hpet) { u64 t1, t2; int i; @@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) for (i = 0; i < MAX_RETRIES; i++) { t1 = get_cycles(); if (hpet) - *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; else - *pm = acpi_pm_read_early(); + *p = acpi_pm_read_early(); t2 = get_cycles(); if ((t2 - t1) < SMI_TRESHOLD) return t2; @@ -228,7 +228,7 @@ static unsigned long pit_calibrate_tsc(void) */ unsigned long native_calibrate_tsc(void) { - u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; + u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; unsigned long flags; int hpet = is_hpet_enabled(), i; @@ -267,16 +267,16 @@ unsigned long native_calibrate_tsc(void) * read the end value. */ local_irq_save(flags); - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); + tsc1 = tsc_read_refs(&ref1, hpet); tsc_pit_khz = pit_calibrate_tsc(); - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); /* Pick the lowest PIT TSC calibration so far */ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ - if (!hpet && !pm1 && !pm2) + if (!hpet && !ref1 && !ref2) continue; /* Check, whether the sampling was disturbed by an SMI */ @@ -285,9 +285,9 @@ unsigned long native_calibrate_tsc(void) tsc2 = (tsc2 - tsc1) * 1000000LL; if (hpet) - tsc2 = calc_hpet_ref(tsc2, hpet1, hpet2); + tsc2 = calc_hpet_ref(tsc2, ref1, ref2); else - tsc2 = calc_pmtimer_ref(tsc2, pm1, pm2); + tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); } @@ -301,7 +301,7 @@ unsigned long native_calibrate_tsc(void) "SMI disturbance.\n"); /* We don't have an alternative source, disable TSC */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk("TSC: No reference (HPET/PMTIMER) available\n"); return 0; } @@ -321,7 +321,7 @@ unsigned long native_calibrate_tsc(void) } /* We don't have an alternative source, use the PIT calibration value */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } -- cgit v1.2.3 From a977c400957451f3bd92b9ed6022f5fe8a6cbbf5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Sep 2008 15:18:59 +0000 Subject: x86: TSC make the calibration loop smarter The last changes made the calibration loop 250ms long which is far too much. Try to do that more clever. Experiments have shown that using a 10ms delay for the PIT based calibration gives us a good enough value. If we have a reference (HPET/PMTIMER) and the result of the PIT and the reference is close enough, then we can break out of the calibration loop on a match right away and use the reference value. Otherwise we just loop 3 times and decide then, which value to take. One caveat is that for virtualized environments the PIT calibration often does not work at all and I found out that 10us is a bit too short as well for the reference to give a sane result. The solution here is to make the last loop longer when the first two PIT calibrations failed. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 95 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 52284d31fc9..da033b5b3e1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -159,9 +159,14 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) return (unsigned long) deltatsc; } -#define CAL_MS 50 +#define CAL_MS 10 #define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) -#define CAL_PIT_LOOPS 5000 +#define CAL_PIT_LOOPS 1000 + +#define CAL2_MS 50 +#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) +#define CAL2_PIT_LOOPS 5000 + /* * Try to calibrate the TSC against the Programmable @@ -170,7 +175,7 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) * * Return ULONG_MAX on failure to calibrate. */ -static unsigned long pit_calibrate_tsc(void) +static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) { u64 tsc, t1, t2, delta; unsigned long tscmin, tscmax; @@ -185,8 +190,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb(CAL_LATCH & 0xff, 0x42); - outb(CAL_LATCH >> 8, 0x42); + outb(latch & 0xff, 0x42); + outb(latch >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -207,18 +212,18 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than PIT_MIN_LOOPS + * If we were not able to read the PIT more than loopmin * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < CAL_PIT_LOOPS || tscmax > 10 * tscmin) + if (pitcnt < loopmin || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, CAL_MS); + do_div(delta, ms); return delta; } @@ -230,8 +235,8 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags; - int hpet = is_hpet_enabled(), i; + unsigned long flags, latch, ms; + int hpet = is_hpet_enabled(), i, loopmin; /* * Run 5 calibration loops to get the lowest frequency value @@ -257,7 +262,13 @@ unsigned long native_calibrate_tsc(void) * calibration delay loop as we have to wait for a certain * amount of time anyway. */ - for (i = 0; i < 5; i++) { + + /* Preset PIT loop values */ + latch = CAL_LATCH; + ms = CAL_MS; + loopmin = CAL_PIT_LOOPS; + + for (i = 0; i < 3; i++) { unsigned long tsc_pit_khz; /* @@ -268,7 +279,7 @@ unsigned long native_calibrate_tsc(void) */ local_irq_save(flags); tsc1 = tsc_read_refs(&ref1, hpet); - tsc_pit_khz = pit_calibrate_tsc(); + tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); @@ -290,6 +301,35 @@ unsigned long native_calibrate_tsc(void) tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 10% window + * then we can be sure, that the calibration + * succeeded. We break out of the loop right away. We + * use the reference value, as it is more precise. + */ + if (delta >= 90 && delta <= 110) { + printk(KERN_INFO + "TSC: PIT calibration matches %s. %d loops\n", + hpet ? "HPET" : "PMTIMER", i + 1); + return tsc_ref_min; + } + + /* + * Check whether PIT failed more than once. This + * happens in virtualized environments. We need to + * give the virtual PC a slightly longer timeframe for + * the HPET/PMTIMER to make the result precise. + */ + if (i == 1 && tsc_pit_min == ULONG_MAX) { + latch = CAL2_LATCH; + ms = CAL2_MS; + loopmin = CAL2_PIT_LOOPS; + } } /* @@ -309,7 +349,7 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed as well, disable TSC */ if (tsc_ref_min == ULONG_MAX) { printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " - "failed due to SMI disturbance.\n"); + "failed.\n"); return 0; } @@ -328,37 +368,18 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed, use the PIT calibration value */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " - "to SMI disturbance. Using PIT calibration\n"); + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " + "Using PIT calibration\n"); return tsc_pit_min; } - /* Check the reference deviation */ - delta = ((u64) tsc_pit_min) * 100; - do_div(delta, tsc_ref_min); - - /* - * If both calibration results are inside a 5% window, the we - * use the lower frequency of those as it is probably the - * closest estimate. - */ - if (delta >= 95 && delta <= 105) { - printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", - hpet ? "HPET" : "PMTIMER"); - printk(KERN_INFO "TSC: using %s calibration value\n", - tsc_pit_min <= tsc_ref_min ? "PIT" : - hpet ? "HPET" : "PMTIMER"); - return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; - } - - printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", - hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); - /* * The calibration values differ too much. In doubt, we use * the PIT value as we know that there are PMTIMERs around - * running at double speed. + * running at double speed. At least we let the user know: */ + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } -- cgit v1.2.3 From 58f7c98850a226d3fb05b1095af9f7c4ea3507ba Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Aug 2008 13:52:25 -0700 Subject: x86: split e820 reserved entries record to late v2 so could let BAR res register at first, or even pnp. v2: insert e820 reserve resources before pnp_system_init Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 291e6cd9f9c..523d6c5605d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1271,13 +1271,15 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ +struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; - struct resource *res; u64 end; + struct resource *res; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); + e820_res = res; for (i = 0; i < e820.nr_map; i++) { end = e820.map[i].addr + e820.map[i].size - 1; #ifndef CONFIG_RESOURCES_64BIT @@ -1291,7 +1293,8 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + insert_resource(&iomem_resource, res); res++; } @@ -1303,6 +1306,19 @@ void __init e820_reserve_resources(void) } } +void __init e820_reserve_resources_late(void) +{ + int i; + struct resource *res; + + res = e820_res; + for (i = 0; i < e820.nr_map; i++) { + if (e820.map[i].type == E820_RESERVED && res->start >= (1ULL<<20)) + insert_resource(&iomem_resource, res); + res++; + } +} + char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; -- cgit v1.2.3 From a5444d15b611cf2ffe2bc52aaf11f2ac51882f89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Aug 2008 08:09:23 +0200 Subject: x86: split e820 reserved entries record to late v4 this one replaces: | commit a2bd7274b47124d2fc4dfdb8c0591f545ba749dd | Author: Yinghai Lu | Date: Mon Aug 25 00:56:08 2008 -0700 | | x86: fix HPET regression in 2.6.26 versus 2.6.25, check hpet against BAR, v3 v2: insert e820 reserve resources before pnp_system_init v3: fix merging problem in tip/x86/core v4: address Linus's review about comments and condition in _late() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 523d6c5605d..a7a71339bfb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1271,12 +1271,12 @@ static inline const char *e820_type_to_string(int e820_type) /* * Mark e820 reserved areas as busy for the resource manager. */ -struct resource __initdata *e820_res; +static struct resource __initdata *e820_res; void __init e820_reserve_resources(void) { int i; - u64 end; struct resource *res; + u64 end; res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); e820_res = res; @@ -1293,6 +1293,12 @@ void __init e820_reserve_resources(void) res->end = end; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + /* + * don't register the region that could be conflicted with + * pci device BAR resource and insert them later in + * pcibios_resource_survey() + */ if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) insert_resource(&iomem_resource, res); res++; @@ -1313,7 +1319,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { - if (e820.map[i].type == E820_RESERVED && res->start >= (1ULL<<20)) + if (!res->parent && res->end) insert_resource(&iomem_resource, res); res++; } -- cgit v1.2.3 From fac8f1e4f99dff7a0c3a929f327d66f46de6fa21 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:59:22 +0200 Subject: x86: split e820 reserved entries record to late, v7 try to insert_resource second time, by expanding the resource... for case: e820 reserved entry is partially overlapped with bar res... hope it will never happen Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a7a71339bfb..e24d1bc47b4 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1320,7 +1320,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { if (!res->parent && res->end) - insert_resource(&iomem_resource, res); + reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); res++; } } -- cgit v1.2.3 From 5fef55fddb7317585cabc1eae38dbd57f1c59729 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: move mtrr cpu cap setting early in early_init_xxxx Krzysztof Helt found MTRR is not detected on k6-2 root cause: we moved mtrr_bp_init() early for mtrr trimming, and in early_detect we only read the CPU capability from cpuid, so some cpu doesn't have that bit in cpuid. So we need to add early_init_xxxx to preset those bit before mtrr_bp_init for those earlier cpus. this patch is for v2.6.27 Reported-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 9 +++++---- arch/x86/kernel/cpu/centaur.c | 11 +++++++++++ arch/x86/kernel/cpu/cyrix.c | 32 ++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cae9cabc303..18514ed2610 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) mbytes); } - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); break; } diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a5..a0534c04d38 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -314,6 +314,16 @@ enum { EAMD3D = 1<<20, }; +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 5: + /* Emulate MTRRs using Centaur's MCR. */ + set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); + break; + } +} + static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { @@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, }; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index ada50505a5c..13f8fa16b81 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -15,13 +15,11 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; - unsigned long flags; /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, ccr3 ^ 0x80); getCx86(0xc0); /* dummy to change bus */ @@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) *dir0 = getCx86(CX86_DIR0); *dir1 = getCx86(CX86_DIR1); } - local_irq_restore(flags); } +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned long flags; + + local_irq_save(flags); + __do_cyrix_devid(dir0, dir1); + local_irq_restore(flags); +} /* * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in * order to identify the Cyrix CPU model after we're out of setup.c @@ -161,6 +166,24 @@ static void __cpuinit geode_configure(void) local_irq_restore(flags); } +static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir1 = 0; + + __do_cyrix_devid(&dir0, &dir1); + dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + + switch (dir0_msn) { + case 3: /* 6x86/6x86L */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + case 5: /* 6x86MX/M II */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + } +} static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { @@ -416,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, + .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, }; -- cgit v1.2.3 From 5031088dbc0cd30e893eb27d53f0e0eee6eb1c00 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: delay early cpu initialization until cpuid is done Move early cpu initialization after cpu early get cap so the early cpu initialization can fix up cpu caps. Signed-off-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0785b3c8d04..8aab8517642 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -335,11 +335,11 @@ static void __init early_cpu_detect(void) get_cpu_vendor(c, 1); + early_get_cap(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); - - early_get_cap(c); } /* -- cgit v1.2.3 From 3da99c97763703b23cbf2bd6e96252256d4e4617 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: x86: make (early)_identify_cpu more the same between 32bit and 64 bit 1. add extended_cpuid_level for 32bit 2. add generic_identify for 64bit 3. add early_identify_cpu for 32bit 4. early_identify_cpu not be called by identify_cpu 5. remove early in get_cpu_vendor for 32bit 6. add get_cpu_cap 7. add cpu_detect for 64bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 138 +++++++++++++++------------------------ arch/x86/kernel/cpu/common_64.c | 139 +++++++++++++++++++++++++--------------- 2 files changed, 141 insertions(+), 136 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642..96e1b8698d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -96,7 +96,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) unsigned int *v; char *p, *q; - if (cpuid_eax(0x80000000) < 0x80000004) + if (c->extended_cpuid_level < 0x80000004) return 0; v = (unsigned int *) c->x86_model_id; @@ -125,7 +125,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; - n = cpuid_eax(0x80000000); + n = c->extended_cpuid_level; if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); @@ -186,7 +186,7 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; int i; @@ -198,8 +198,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) (cpu_devs[i]->c_ident[1] && !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; - if (!early) - this_cpu = cpu_devs[i]; + this_cpu = cpu_devs[i]; return; } } @@ -284,34 +283,30 @@ void __init cpu_detect(struct cpuinfo_x86 *c) } } } -static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; - unsigned int ebx; + u32 ebx; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (have_cpuid_p()) { - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + c->extended_cpuid_level = xlvl; + if ((xlvl & 0xffff0000) == 0x80000000) { + if (xlvl >= 0x80000001) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); } - } - } - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -321,25 +316,29 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) * WARNING: this function is only called on the BP. Don't add code here * that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - c->x86_cache_alignment = 32; c->x86_clflush_size = 32; if (!have_cpuid_p()) return; + c->extended_cpuid_level = 0; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + cpu_detect(c); - get_cpu_vendor(c, 1); + get_cpu_vendor(c); - early_get_cap(c); + get_cpu_cap(c); if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); + + validate_pat_support(c); } /* @@ -373,60 +372,32 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { - u32 tfms, xlvl; - unsigned int ebx; - - if (have_cpuid_p()) { - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c, 0); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; - c->initial_apicid = (ebx >> 24) & 0xFF; + if (!have_cpuid_p()) + return; + + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; + c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->phys_proc_id = c->initial_apicid; #else - c->apicid = c->initial_apicid; + c->apicid = c->initial_apicid; #endif - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) - c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } + } - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ - } + if (c->extended_cpuid_level >= 0x80000004) + get_model_name(c); /* Default name */ - init_scattered_cpuid_features(c); - detect_nopl(c); - } + init_scattered_cpuid_features(c); + detect_nopl(c); } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) @@ -651,13 +622,10 @@ void __init early_cpu_init(void) { struct cpu_vendor_dev *cvdev; - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_detect(); - validate_pat_support(&boot_cpu_data); + early_identify_cpu(&boot_cpu_data); } /* Make sure %fs is initialized properly in idle threads */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 699ecbfb63e..28719fe0794 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -195,6 +195,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) printk(KERN_ERR "CPU: Your system may be unstable.\n"); } c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; } static void __init early_cpu_support_print(void) @@ -249,56 +250,18 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) } } -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); - -void __init early_cpu_init(void) +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start ; - cvdev < __x86cpuvendor_end ; - cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - early_cpu_support_print(); - early_identify_cpu(&boot_cpu_data); -} - -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ -static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; - c->x86_max_cores = 1; - c->x86_coreid_bits = 0; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, (unsigned int *)&c->x86_vendor_id[0], (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); - get_cpu_vendor(c); - - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { - __u32 misc; - cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4], - &c->x86_capability[0]); + u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; c->x86_mask = tfms & 0xf; @@ -306,17 +269,32 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (test_cpu_cap(c, X86_FEATURE_CLFLSH)) + if (cap0 & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; } +} + + +static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +{ + u32 tfms, xlvl; + u32 ebx; + + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if (c->cpuid_level >= 0x00000001) { + u32 capability, excap; + + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + } - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; -#endif /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; @@ -325,8 +303,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[1] = cpuid_edx(0x80000001); c->x86_capability[6] = cpuid_ecx(0x80000001); } - if (xlvl >= 0x80000004) - get_model_name(c); /* Default name */ } /* Transmeta-defined flags: level 0x80860001 */ @@ -346,8 +322,26 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +} - detect_nopl(c); +/* Do some early cpuid on the boot CPU to get some parameter that are + needed before check_bugs. Everything advanced is in identify_cpu + below. */ +static void __init early_identify_cpu(struct cpuinfo_x86 *c) +{ + + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) @@ -356,6 +350,39 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); } +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + +static void __cpuinit generic_identify(struct cpuinfo_x86 *c) +{ + c->extended_cpuid_level = 0; + + cpu_detect(c); + + get_cpu_vendor(c); + + get_cpu_cap(c); + + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; +#ifdef CONFIG_SMP + c->phys_proc_id = c->initial_apicid; +#endif + + if (c->extended_cpuid_level >= 0x80000004) + get_model_name(c); /* Default name */ + + init_scattered_cpuid_features(c); + detect_nopl(c); +} + /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -363,9 +390,19 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; - early_identify_cpu(c); + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; + c->x86_max_cores = 1; + c->x86_coreid_bits = 0; + memset(&c->x86_capability, 0, sizeof c->x86_capability); - init_scattered_cpuid_features(c); + generic_identify(c); c->apicid = phys_pkg_id(0); -- cgit v1.2.3 From 9d31d35b5f9d619bb2482235cc889326de049e29 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:44 +0200 Subject: x86: order functions in cpu/common.c and cpu/common_64.c v2 v2: make 64 bit get c->x86_cache_alignment = c->x86_clfush_size Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 190 ++++++++++++++++++++++------------------ arch/x86/kernel/cpu/common_64.c | 106 +++++++++++----------- 2 files changed, 156 insertions(+), 140 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 96e1b8698d3..10e89ae5a60 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -60,6 +60,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct desc_ptr gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +} + static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -123,15 +135,15 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ecx, edx, l2size; + unsigned int n, dummy, ebx, ecx, edx, l2size; n = c->extended_cpuid_level; if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24)+(edx>>24); + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size = (ecx>>24) + (edx>>24); } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -185,6 +197,51 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } +#ifdef CONFIG_X86_HT +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + return; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings); + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } +} +#endif static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { @@ -258,7 +315,26 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -void __init cpu_detect(struct cpuinfo_x86 *c) +static void __init early_cpu_support_print(void) +{ + int i,j; + struct cpu_dev *cpu_devx; + + printk("KERNEL supported cpus:\n"); + for (i = 0; i < X86_VENDOR_NUM; i++) { + cpu_devx = cpu_devs[i]; + if (!cpu_devx) + continue; + for (j = 0; j < 2; j++) { + if (!cpu_devx->c_ident[j]) + continue; + printk(" %s %s\n", cpu_devx->c_vendor, + cpu_devx->c_ident[j]); + } + } +} + +void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ cpuid(0x00000000, (unsigned int *)&c->cpuid_level, @@ -267,19 +343,20 @@ void __init cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[4]); c->x86 = 4; + /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; + c->x86 = (tfms >> 8) & 0xf; + c->x86_model = (tfms >> 4) & 0xf; + c->x86_mask = tfms & 0xf; if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; + c->x86_model += ((tfms >> 16) & 0xf) << 4; if (cap0 & (1<<19)) { - c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; + c->x86_cache_alignment = c->x86_clflush_size; } } } @@ -341,6 +418,17 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) validate_pat_support(c); } +void __init early_cpu_init(void) +{ + struct cpu_vendor_dev *cvdev; + + for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) + cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + + early_cpu_support_print(); + early_identify_cpu(&boot_cpu_data); +} + /* * The NOPL instruction is supposed to exist on all CPUs with * family >= 6, unfortunately, that's not true in practice because @@ -500,7 +588,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) */ if (c != &boot_cpu_data) { /* AND the already accumulated flags with these */ - for (i = 0 ; i < NCAPINTS ; i++) + for (i = 0; i < NCAPINTS; i++) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } @@ -529,52 +617,6 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -#ifdef CONFIG_X86_HT -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings) ; - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); - - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -} -#endif - static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); @@ -592,17 +634,17 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) vendor = c->x86_vendor_id; if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); + printk(KERN_CONT "%s ", vendor); - if (!c->x86_model_id[0]) - printk("%d86", c->x86); + if (c->x86_model_id[0]) + printk(KERN_CONT "%s", c->x86_model_id); else - printk("%s", c->x86_model_id); + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); + printk(KERN_CONT " stepping %02x\n", c->x86_mask); else - printk("\n"); + printk(KERN_CONT "\n"); } static __init int setup_disablecpuid(char *arg) @@ -618,16 +660,6 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; -void __init early_cpu_init(void) -{ - struct cpu_vendor_dev *cvdev; - - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; - - early_identify_cpu(&boot_cpu_data); -} - /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -636,18 +668,6 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) return regs; } -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -} - /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 28719fe0794..522a5f2e405 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -103,9 +103,8 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), " - "D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); /* On K8 L1 TLB is inclusive, so don't count it */ c->x86_tlbsize = 0; @@ -143,8 +142,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } else if (smp_num_siblings > 1) { if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of " - "siblings %d", smp_num_siblings); + printk(KERN_WARNING "CPU: Unsupported number of siblings %d", + smp_num_siblings); smp_num_siblings = 1; return; } @@ -182,7 +181,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (cpu_devs[i]) { if (!strcmp(v, cpu_devs[i]->c_ident[0]) || (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { + !strcmp(v, cpu_devs[i]->c_ident[1]))) { c->x86_vendor = i; this_cpu = cpu_devs[i]; return; @@ -217,39 +216,6 @@ static void __init early_cpu_support_print(void) } } -/* - * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because - * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. - * - * Note: no 64-bit chip is known to lack these, but put the code here - * for consistency with 32 bits, and to make it utterly trivial to - * diagnose the problem should it ever surface. - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -258,6 +224,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) (unsigned int *)&c->x86_vendor_id[8], (unsigned int *)&c->x86_vendor_id[4]); + c->x86 = 4; /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; @@ -268,12 +235,11 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - if (cap0 & (1<<19)) + c->x86_model += ((tfms >> 16) & 0xf) << 4; + if (cap0 & (1<<19)) { c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; + c->x86_cache_alignment = c->x86_clflush_size; + } } } @@ -283,9 +249,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) u32 tfms, xlvl; u32 ebx; - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; @@ -361,6 +324,39 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { c->extended_cpuid_level = 0; @@ -448,7 +444,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) } -void __cpuinit identify_boot_cpu(void) +void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); } @@ -460,13 +456,6 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - struct msr_range { unsigned min; unsigned max; @@ -510,6 +499,13 @@ static __init int setup_show_msr(char *arg) } __setup("show_msr=", setup_show_msr); +static __init int setup_noclflush(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_CLFLSH); + return 1; +} +__setup("noclflush", setup_noclflush); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) -- cgit v1.2.3 From 10a434fcb23a57c385177a0086955fae01003f64 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:45 +0200 Subject: x86: remove cpu_vendor_dev 1. add c_x86_vendor into cpu_dev 2. change cpu_devs to static 3. check c_x86_vendor before put that cpu_dev into array 4. remove alignment for 64bit 5. order the sequence in cpu_devs according to link sequence... so could put intel at first, then amd... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 4 +-- arch/x86/kernel/cpu/amd.c | 3 +- arch/x86/kernel/cpu/amd_64.c | 4 +-- arch/x86/kernel/cpu/centaur.c | 3 +- arch/x86/kernel/cpu/centaur_64.c | 3 +- arch/x86/kernel/cpu/common.c | 71 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/common_64.c | 71 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/cpu.h | 18 ++++------ arch/x86/kernel/cpu/cyrix.c | 6 ++-- arch/x86/kernel/cpu/intel.c | 3 +- arch/x86/kernel/cpu/intel_64.c | 3 +- arch/x86/kernel/cpu/transmeta.c | 3 +- arch/x86/kernel/cpu/umc.c | 3 +- arch/x86/kernel/vmlinux_32.lds.S | 8 ++--- arch/x86/kernel/vmlinux_64.lds.S | 9 +++-- 15 files changed, 106 insertions(+), 106 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3ede19a4e0b..403e689df0b 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,14 +8,14 @@ obj-y += proc.o capflags.o powerflags.o obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o -obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o -obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 18514ed2610..d64ea6097ca 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -298,6 +298,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_early_init = early_init_amd, .c_init = init_amd, .c_size_cache = amd_size_cache, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index d1692b2a41f..d1c721c0c49 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -218,7 +218,7 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_ident = { "AuthenticAMD" }, .c_early_init = early_init_amd, .c_init = init_amd, + .c_x86_vendor = X86_VENDOR_AMD, }; -cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); - +cpu_dev_register(amd_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index a0534c04d38..e5f6d89521b 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -475,6 +475,7 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 1d181c40e2e..49cfc6d2f2f 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -29,7 +29,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_ident = { "CentaurHauls" }, .c_early_init = early_init_centaur, .c_init = init_centaur, + .c_x86_vendor = X86_VENDOR_CENTAUR, }; -cpu_vendor_dev_register(X86_VENDOR_CENTAUR, ¢aur_cpu_dev); +cpu_dev_register(centaur_cpu_dev); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 10e89ae5a60..ee1044ca481 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -75,7 +75,7 @@ void switch_to_new_gdt(void) static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -93,8 +93,9 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata; static int __init cachesize_setup(char *str) { @@ -250,21 +251,24 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } @@ -315,25 +319,6 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -411,21 +396,35 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); validate_pat_support(c); } void __init early_cpu_init(void) { - struct cpu_vendor_dev *cvdev; + struct cpu_dev **cdev; + int count = 0; + + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } - early_cpu_support_print(); early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 522a5f2e405..b82479c1083 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -66,7 +66,7 @@ void switch_to_new_gdt(void) load_gdt(&gdt_descr); } -struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; +static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { @@ -76,8 +76,9 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; +static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -178,44 +179,28 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) static int printed; for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - this_cpu = cpu_devs[i]; - return; - } + if (!cpu_devs[i]) + break; + + if (!strcmp(v, cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v, cpu_devs[i]->c_ident[1]))) { + this_cpu = cpu_devs[i]; + c->x86_vendor = this_cpu->c_x86_vendor; + return; } } + if (!printed) { printed++; printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } + c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; } -static void __init early_cpu_support_print(void) -{ - int i,j; - struct cpu_dev *cpu_devx; - - printk("KERNEL supported cpus:\n"); - for (i = 0; i < X86_VENDOR_NUM; i++) { - cpu_devx = cpu_devs[i]; - if (!cpu_devx) - continue; - for (j = 0; j < 2; j++) { - if (!cpu_devx->c_ident[j]) - continue; - printk(" %s %s\n", cpu_devx->c_vendor, - cpu_devx->c_ident[j]); - } - } -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -306,21 +291,35 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_cap(c); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && - cpu_devs[c->x86_vendor]->c_early_init) - cpu_devs[c->x86_vendor]->c_early_init(c); + if (this_cpu->c_early_init) + this_cpu->c_early_init(c); validate_pat_support(c); } void __init early_cpu_init(void) { - struct cpu_vendor_dev *cvdev; + struct cpu_dev **cdev; + int count = 0; + + printk("KERNEL supported cpus:\n"); + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { + struct cpu_dev *cpudev = *cdev; + unsigned int j; - for (cvdev = __x86cpuvendor_start; cvdev < __x86cpuvendor_end; cvdev++) - cpu_devs[cvdev->vendor] = cvdev->cpu_dev; + if (count >= X86_VENDOR_NUM) + break; + cpu_devs[count] = cpudev; + count++; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(" %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } + } - early_cpu_support_print(); early_identify_cpu(&boot_cpu_data); } diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 4d894e8565f..3cc9d92afd8 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -21,21 +21,15 @@ struct cpu_dev { void (*c_init)(struct cpuinfo_x86 * c); void (*c_identify)(struct cpuinfo_x86 * c); unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); + int c_x86_vendor; }; -extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; +#define cpu_dev_register(cpu_devX) \ + static struct cpu_dev *__cpu_dev_##cpu_devX __used \ + __attribute__((__section__(".x86_cpu_dev.init"))) = \ + &cpu_devX; -struct cpu_vendor_dev { - int vendor; - struct cpu_dev *cpu_dev; -}; - -#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \ - static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \ - __attribute__((__section__(".x86cpuvendor.init"))) = \ - { cpu_vendor_id, cpu_dev } - -extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[]; +extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 13f8fa16b81..3f8c7283d81 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -442,14 +442,16 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, + .c_x86_vendor = X86_VENDOR_CYRIX, }; -cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); +cpu_dev_register(cyrix_cpu_dev); static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, + .c_x86_vendor = X86_VENDOR_NSC, }; -cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); +cpu_dev_register(nsc_cpu_dev); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 77618c717d7..c5ac08124ad 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -303,9 +303,10 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_early_init = early_init_intel, .c_init = init_intel, .c_size_cache = intel_size_cache, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); /* arch_initcall(intel_cpu_init); */ diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 1019c58d39f..0a8128a240d 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -90,6 +90,7 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_ident = { "GenuineIntel" }, .c_early_init = early_init_intel, .c_init = init_intel, + .c_x86_vendor = X86_VENDOR_INTEL, }; -cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); +cpu_dev_register(intel_cpu_dev); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index b911a2c61b8..7c46e6ecedc 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -102,6 +102,7 @@ static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_init = init_transmeta, .c_identify = transmeta_identify, + .c_x86_vendor = X86_VENDOR_TRANSMETA, }; -cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); +cpu_dev_register(transmeta_cpu_dev); diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index b1fc90989d7..e777f79e096 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c @@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { } }, }, + .c_x86_vendor = X86_VENDOR_UMC, }; -cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); +cpu_dev_register(umc_cpu_dev); diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index af5bdad8460..21b4f7eefaa 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -140,10 +140,10 @@ SECTIONS *(.con_initcall.init) __con_initcall_end = .; } - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - __x86cpuvendor_start = .; - *(.x86cpuvendor.init) - __x86cpuvendor_end = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; } SECURITY_INIT . = ALIGN(4); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 63e5c1a22e8..201e81a91a9 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -168,13 +168,12 @@ SECTIONS *(.con_initcall.init) } __con_initcall_end = .; - . = ALIGN(16); - __x86cpuvendor_start = .; - .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { - *(.x86cpuvendor.init) + __x86_cpu_dev_start = .; + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + *(.x86_cpu_dev.init) } - __x86cpuvendor_end = .; SECURITY_INIT + __x86_cpu_dev_end = .; . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { -- cgit v1.2.3 From a0854a46c5eb82588126421a9cbceb973384a644 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:46 +0200 Subject: x86: make 32bit support show_msr like 64 bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ee1044ca481..a79cf5c52b6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -616,6 +616,49 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); } +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLSH); @@ -644,6 +687,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " stepping %02x\n", c->x86_mask); else printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) -- cgit v1.2.3 From 01b2e16a7a9be6573cba5d594d6659b3c6cb46a0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:46 +0200 Subject: x86: make get_mode_name of 64bit the same as 32bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index b82479c1083..f1fb94e766b 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -83,6 +83,7 @@ static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; + char *p, *q; if (c->extended_cpuid_level < 0x80000004) return 0; @@ -92,6 +93,19 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while (*p == ' ') + p++; + if (p != q) { + while (*p) + *q++ = *p++; + while (q <= &c->x86_model_id[48]) + *q++ = '\0'; /* Zero-pad the rest */ + } + return 1; } -- cgit v1.2.3 From 0a488a53d7ca46ac638c30079072c57e50cfcc7b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:47 +0200 Subject: x86: move 32bit related functions together Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 251 ++++++++++++++++++++-------------------- arch/x86/kernel/cpu/common_64.c | 34 +++--- 2 files changed, 143 insertions(+), 142 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a79cf5c52b6..2b2c170e8d6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,6 +22,8 @@ #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, @@ -58,6 +60,109 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; + +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -72,9 +177,6 @@ void switch_to_new_gdt(void) asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); } -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) @@ -95,14 +197,6 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -133,7 +227,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -150,7 +243,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n < 0x80000006) /* Some chips just has a large L1. */ return; - ecx = cpuid_ecx(0x80000006); + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; /* do processor-specific cache resizing */ @@ -167,48 +260,23 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) c->x86_cache_size = l2size; printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ + l2size, ecx & 0xFF); } #ifdef CONFIG_X86_HT void __cpuinit detect_ht(struct cpuinfo_x86 *c) { - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; + + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { @@ -225,8 +293,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -236,10 +302,14 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); + } - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); } } #endif @@ -273,52 +343,6 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) this_cpu = &default_cpu; } - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - void __cpuinit cpu_detect(struct cpuinfo_x86 *c) { /* Get vendor name */ @@ -380,16 +404,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - c->x86_cache_alignment = 32; c->x86_clflush_size = 32; + c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) return; - c->extended_cpuid_level = 0; - memset(&c->x86_capability, 0, sizeof c->x86_capability); + c->extended_cpuid_level = 0; + cpu_detect(c); get_cpu_vendor(c); @@ -487,31 +511,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) detect_nopl(c); } -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); - - - /* * This does the hard work of actually picking apart the CPU stuff... */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index f1fb94e766b..b2da0413532 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -37,6 +37,8 @@ #include "cpu.h" +static struct cpu_dev *this_cpu __cpuinitdata; + /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout @@ -78,7 +80,6 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_vendor = "Unknown", .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -static struct cpu_dev *this_cpu __cpuinitdata; int __cpuinit get_model_name(struct cpuinfo_x86 *c) { @@ -112,7 +113,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { - unsigned int n, dummy, ebx, ecx, edx; + unsigned int n, dummy, ebx, ecx, edx, l2size; n = c->extended_cpuid_level; @@ -125,15 +126,17 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) c->x86_tlbsize = 0; } - if (n >= 0x80000006) { - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - ecx = cpuid_ecx(0x80000006); - c->x86_cache_size = ecx >> 16; - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - c->x86_cache_size, ecx & 0xFF); - } + cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); + l2size = ecx >> 16; + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -142,14 +145,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; int index_msb, core_bits; - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT)) return; if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { @@ -175,6 +177,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); } + out: if ((c->x86_max_cores * smp_num_siblings) > 1) { printk(KERN_INFO "CPU: Physical Processor ID: %d\n", @@ -182,7 +185,6 @@ out: printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } - #endif } @@ -405,10 +407,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_clflush_size = 64; - c->x86_cache_alignment = c->x86_clflush_size; c->x86_max_cores = 1; c->x86_coreid_bits = 0; + c->x86_clflush_size = 64; + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); generic_identify(c); -- cgit v1.2.3 From 6ac40ed0413ef4096720f966e11c7cdf259eee3f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 4 Sep 2008 10:41:22 -0700 Subject: x86: quick TSC calibration Introduce a fast TSC-calibration method on sane hardware. It only uses 17920 PIT timer ticks to calibrate the TSC, plus 256 ticks on each side to make sure the TSC values were very close to the tick, so the whole calibration takes 15ms. Yet, despite only takign 15ms, we can actually give pretty stringent guarantees of accuracy: - the code requires that we hit each 256-counter block at least 50 times, so the TSC error is basically at *MOST* just a few PIT cycles off in any direction. In practice, it's going to be about one microseconds off (which is how long it takes to read the counter) - so over 17920 PIT cycles, we can pretty much guarantee that the calibration error is less than one half of a percent. My testing bears this out: on my machine, the quick-calibration reports 2934.085kHz, while the slow one reports 2933.415. Yes, the slower calibration is still more precise. For me, the slow calibration is stable to within about one hundreth of a percent, so it's (at a guess) roughly an order-and-a-half of magnitude more precise. The longer you wait, the more precise you can be. However, the nice thing about the fast TSC PIT synchronization is that it's pretty much _guaranteed_ to give that 0.5% precision, and fail gracefully (and very quickly) if it doesn't get it. And it really is fairly simple (even if there's a lot of _details_ there, and I didn't get all of those right ont he first try or even the second ;) The patch says "110 insertions", but 63 of those new lines are actually comments. Signed-off-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 110 insertions(+), 1 deletions(-) --- arch/x86/kernel/tsc.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index da033b5b3e1..839070ba846 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -227,6 +227,117 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) return delta; } +/* + * This reads the current MSB of the PIT counter, and + * checks if we are running on sufficiently fast and + * non-virtualized hardware. + * + * Our expectations are: + * + * - the PIT is running at roughly 1.19MHz + * + * - each IO is going to take about 1us on real hardware, + * but we allow it to be much faster (by a factor of 10) or + * _slightly_ slower (ie we allow up to a 2us read+counter + * update - anything else implies a unacceptably slow CPU + * or PIT for the fast calibration to work. + * + * - with 256 PIT ticks to read the value, we have 214us to + * see the same MSB (and overhead like doing a single TSC + * read per MSB value etc). + * + * - We're doing 2 reads per loop (LSB, MSB), and we expect + * them each to take about a microsecond on real hardware. + * So we expect a count value of around 100. But we'll be + * generous, and accept anything over 50. + * + * - if the PIT is stuck, and we see *many* more reads, we + * return early (and the next caller of pit_expect_msb() + * then consider it a failure when they don't see the + * next expected value). + * + * These expectations mean that we know that we have seen the + * transition from one expected value to another with a fairly + * high accuracy, and we didn't miss any events. We can thus + * use the TSC value at the transitions to calculate a pretty + * good value for the TSC frequencty. + */ +static inline int pit_expect_msb(unsigned char val) +{ + int count = 0; + + for (count = 0; count < 50000; count++) { + /* Ignore LSB */ + inb(0x42); + if (inb(0x42) != val) + break; + } + return count > 50; +} + +/* + * How many MSB values do we want to see? We aim for a + * 15ms calibration, which assuming a 2us counter read + * error should give us roughly 150 ppm precision for + * the calibration. + */ +#define QUICK_PIT_MS 15 +#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) + +static unsigned long quick_pit_calibrate(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Counter 2, mode 0 (one-shot), binary count + * + * NOTE! Mode 2 decrements by two (and then the + * output is flipped each time, giving the same + * final output frequency as a decrement-by-one), + * so mode 0 is much better when looking at the + * individual counts. + */ + outb(0xb0, 0x43); + + /* Start at 0xffff */ + outb(0xff, 0x42); + outb(0xff, 0x42); + + if (pit_expect_msb(0xff)) { + int i; + u64 t1, t2, delta; + unsigned char expect = 0xfe; + + t1 = get_cycles(); + for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { + if (!pit_expect_msb(expect)) + goto failed; + } + t2 = get_cycles(); + + /* + * Ok, if we get here, then we've seen the + * MSB of the PIT decrement QUICK_PIT_ITERATIONS + * times, and each MSB had many hits, so we never + * had any sudden jumps. + * + * As a result, we can depend on there not being + * any odd delays anywhere, and the TSC reads are + * reliable. + * + * kHz = ticks / time-in-seconds / 1000; + * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 + * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) + */ + delta = (t2 - t1)*PIT_TICK_RATE; + do_div(delta, QUICK_PIT_ITERATIONS*256*1000); + printk("Fast TSC calibration using PIT\n"); + return delta; + } +failed: + return 0; +} /** * native_calibrate_tsc - calibrate the tsc on boot @@ -235,9 +346,15 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms; + unsigned long flags, latch, ms, fast_calibrate; int hpet = is_hpet_enabled(), i, loopmin; + local_irq_save(flags); + fast_calibrate = quick_pit_calibrate(); + local_irq_restore(flags); + if (fast_calibrate) + return fast_calibrate; + /* * Run 5 calibration loops to get the lowest frequency value * (the best estimate). We use two different calibration modes -- cgit v1.2.3 From 4156e9a8ef5b521185f451213d33fa661f38512e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 4 Sep 2008 22:47:47 +0200 Subject: x86: quick TSC calibration, improve - make sure the final TSC timestamp is reliable too Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 839070ba846..6dab90f6851 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -316,6 +316,12 @@ static unsigned long quick_pit_calibrate(void) } t2 = get_cycles(); + /* + * Make sure we can rely on the second TSC timestamp: + */ + if (!pit_expect_msb(--expect)) + goto failed; + /* * Ok, if we get here, then we've seen the * MSB of the PIT decrement QUICK_PIT_ITERATIONS -- cgit v1.2.3 From 97e4db7c8719f67c52793eeca5ec4df4c3407f2a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:08:59 -0700 Subject: x86: make detect_ht depend on CONFIG_X86_HT 64-bit has X86_HT set too, so use that instead of SMP. This also removes a include/asm-x86/processor.h ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/common_64.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7d5a07f0fd2..1f80ce07daa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -263,9 +263,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) l2size, ecx & 0xFF); } -#ifdef CONFIG_X86_HT void __cpuinit detect_ht(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; @@ -311,8 +311,8 @@ out: printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } -} #endif +} static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index bcb48ce05d2..8e630734e1f 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -141,7 +141,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) void __cpuinit detect_ht(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; -- cgit v1.2.3 From f0fc4aff1fa4db1c201593422dcbf85c9897ec4f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:00 -0700 Subject: x86: make header file the same in arch/x86/kernel/cpu/common_xx.c Make the files more similar in preparation to unification, no code changed. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 22 +++++++++++++++++++--- arch/x86/kernel/cpu/common_64.c | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1f80ce07daa..6bbdbc24f2b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,25 +1,41 @@ #include +#include +#include #include +#include +#include +#include +#include +#include #include #include -#include #include -#include -#include #include #include #include +#include #include #include #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include #include +#include #endif +#include +#include +#include +#include +#include +#include +#include +#include + #include "cpu.h" static struct cpu_dev *this_cpu __cpuinitdata; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 8e630734e1f..5daec699acf 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -24,6 +24,7 @@ #include #include #include +#include #endif #include #include @@ -33,7 +34,6 @@ #include #include #include -#include #include "cpu.h" -- cgit v1.2.3 From 950ad7ff6ec17fc1b47abc95c5c74628eb1adf8b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:01 -0700 Subject: x86: same gdt_page with macro Move the 32-bit and 64-bit gdt_page definitions next to each other, separated with an #ifdef. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 17 +++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6bbdbc24f2b..e0ca51f4f2d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -40,6 +40,22 @@ static struct cpu_dev *this_cpu __cpuinitdata; +#ifdef CONFIG_X86_64 +/* We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + */ +/* The TLS descriptors are currently at a different place compared to i386. + Hopefully nobody expects them at a fixed place (Wine?) */ +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, + [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, +} }; +#else DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, @@ -74,6 +90,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, } }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int cachesize_override __cpuinitdata = -1; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 5daec699acf..b4890504284 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -39,6 +39,7 @@ static struct cpu_dev *this_cpu __cpuinitdata; +#ifdef CONFIG_X86_64 /* We need valid kernel segments for data and code in long mode too * IRET will check the segment types kkeil 2000/10/28 * Also sysret mandates a special GDT layout @@ -53,6 +54,42 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, } }; +#else +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, + [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, + [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, + [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + /* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, + /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + /* 32-bit code */ + [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, + /* 16-bit code */ + [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, + /* data */ + [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, + + [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, +} }; +#endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; -- cgit v1.2.3 From ba51dced0b55eb62874e1646d5eeff344c3d4e67 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:02 -0700 Subject: x86: cpu/common.c, let 64-bit code have 32-bit only functions No effect on 64-bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 +++ arch/x86/kernel/cpu/common_64.c | 111 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e0ca51f4f2d..9128ba0c8d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -93,6 +93,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +#ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -195,6 +196,13 @@ static int __init x86_serial_nr_setup(char *s) return 1; } __setup("serialnumber", x86_serial_nr_setup); +#else +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index b4890504284..40c9d89cc14 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -92,6 +92,117 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #endif EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +#ifdef CONFIG_X86_32 +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_serial_nr __cpuinitdata = 1; + +static int __init cachesize_setup(char *str) +{ + get_option(&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + +static int __init x86_sep_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_SEP); + return 1; +} +__setup("nosep", x86_sep_setup); + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { + /* Disable processor serial number */ + unsigned long lo, hi; + rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_cpu_cap(c, X86_FEATURE_PN); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); +#else +/* Probe for the CPUID instruction */ +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, -- cgit v1.2.3 From d5494d4f517158b1d2eea1ae33f6c264eb12675f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:03 -0700 Subject: x86: cpu/common*.c, make 32-bit have 64-bit only functions Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 138 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 12 ++++ 2 files changed, 150 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9128ba0c8d3..dcd3ebd5ba6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -750,6 +750,143 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +#ifdef CONFIG_X86_64 +struct x8664_pda **_cpu_pda __read_mostly; +EXPORT_SYMBOL(_cpu_pda); + +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; + +char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; + +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable(default) +off Disable +*/ +static int __init nonx_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", nonx_setup); + +int force_personality32; + +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off + +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC +*/ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); + +void pda_init(int cpu) +{ + struct x8664_pda *pda = cpu_pda(cpu); + + /* Setup up data that may be needed in __get_free_pages early */ + loadsegment(fs, 0); + loadsegment(gs, 0); + /* Memory clobbers used to order PDA accessed */ + mb(); + wrmsrl(MSR_GS_BASE, pda); + mb(); + + pda->cpunumber = cpu; + pda->irqcount = -1; + pda->kernelstack = (unsigned long)stack_thread_info() - + PDA_STACKOFFSET + THREAD_SIZE; + pda->active_mm = &init_mm; + pda->mmu_state = 0; + + if (cpu == 0) { + /* others are initialized in smpboot.c */ + pda->pcurrent = &init_task; + pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; + } else { + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } + + if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) + pda->nodenumber = cpu_to_node(cpu); + } +} + +char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + + DEBUG_STKSZ] __page_aligned_bss; + +extern asmlinkage void ignore_sysret(void); + +/* May not be marked __init: used by software suspend */ +void syscall_init(void) +{ + /* + * LSTAR and STAR live in a bit strange symbiosis. + * They both write to the same internal register. STAR allows to + * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. + */ + wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); + wrmsrl(MSR_LSTAR, system_call); + wrmsrl(MSR_CSTAR, ignore_sysret); + +#ifdef CONFIG_IA32_EMULATION + syscall32_cpu_init(); +#endif + + /* Flags to clear on syscall */ + wrmsrl(MSR_SYSCALL_MASK, + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); +} + +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +unsigned long kernel_eflags; + +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +#else + /* Make sure %fs is initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { @@ -757,6 +894,7 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) regs->fs = __KERNEL_PERCPU; return regs; } +#endif /* * cpu_init() initializes state that is per-CPU. Some data is already diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 40c9d89cc14..9f2a6ece82d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -704,6 +704,7 @@ __setup("clearcpuid=", setup_disablecpuid); cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; +#ifdef CONFIG_X86_64 struct x8664_pda **_cpu_pda __read_mostly; EXPORT_SYMBOL(_cpu_pda); @@ -838,6 +839,17 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); +#else + +/* Make sure %fs is initialized properly in idle threads */ +struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->fs = __KERNEL_PERCPU; + return regs; +} +#endif + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT -- cgit v1.2.3 From 1ba76586f778be327e452180d8378e40ee70f066 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:04 -0700 Subject: x86: cpu/common*.c have same cpu_init(), with copying and #ifdef hard to merge by lines... (as here we have material differences between 32-bit and 64-bit mode) - will try to do it later. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 124 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 88 +++++++++++++++++++++++++++- 2 files changed, 211 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dcd3ebd5ba6..f44678db161 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -901,7 +901,129 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 +void __cpuinit cpu_init(void) +{ + int cpu = stack_smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); + unsigned long v; + char *estacks = NULL; + struct task_struct *me; + int i; + + /* CPU 0 is initialised in head64.c */ + if (cpu != 0) + pda_init(cpu); + else + estacks = boot_exception_stacks; + + me = current; + + if (cpu_test_and_set(cpu, cpu_initialized)) + panic("CPU#%d already initialized!\n", cpu); + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + switch_to_new_gdt(); + load_idt((const struct desc_ptr *)&idt_descr); + + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + syscall_init(); + + wrmsrl(MSR_FS_BASE, 0); + wrmsrl(MSR_KERNEL_GS_BASE, 0); + barrier(); + + check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); + + /* + * set up and load the per-CPU TSS + */ + if (!orig_ist->ist[0]) { + static const unsigned int order[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + }; + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; + } + } + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. + */ + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; + if (me->mm) + BUG(); + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_KGDB + /* + * If the kgdb is connected no debug regs should be altered. This + * is only applicable when KGDB and a KGDB I/O module are built + * into the kernel and you are using early debugging with + * kgdbwait. KGDB will control the kernel HW breakpoint registers. + */ + if (kgdb_connected && arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + else { +#endif + /* + * Clear all 6 debug registers: + */ + + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); +#ifdef CONFIG_KGDB + /* If the kgdb is connected no debug regs should be altered. */ + } +#endif + + fpu_init(); + + raw_local_save_flags(kernel_eflags); + + if (is_uv_system()) + uv_cpu_init(); +} + +#else + void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); @@ -982,3 +1104,5 @@ void __cpuinit cpu_uninit(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } #endif + +#endif diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 9f2a6ece82d..2bd0ed5abb0 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -855,8 +855,9 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init. + * A lot of state is already set up in PDA init for 64 bit */ +#ifdef CONFIG_X86_64 void __cpuinit cpu_init(void) { int cpu = stack_smp_processor_id(); @@ -974,3 +975,88 @@ void __cpuinit cpu_init(void) if (is_uv_system()) uv_cpu_init(); } + +#else + +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct *t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + + load_idt(&idt_descr); + switch_to_new_gdt(); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); + + load_sp0(t, thread); + set_tss_desc(cpu, t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); + + /* + * Force FPU initialization: + */ + if (cpu_has_xsave) + current_thread_info()->status = TS_XSAVE; + else + current_thread_info()->status = 0; + clear_used_math(); + mxcsr_feature_mask_init(); + + /* + * Boot processor to setup the FP and extended state context info. + */ + if (!smp_processor_id()) + init_thread_xstate(); + + xsave_init(); +} + +#ifdef CONFIG_HOTPLUG_CPU +void __cpuinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif + +#endif -- cgit v1.2.3 From fab334c1d5f24d23d12f98ad652d399279cd03ce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:05 -0700 Subject: x86: cpu/common*.c, merge switch_to_new_gdt() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 ++ arch/x86/kernel/cpu/common_64.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f44678db161..43d5287bb2a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -215,7 +215,9 @@ void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 2bd0ed5abb0..d7b996518f8 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -214,6 +214,9 @@ void switch_to_new_gdt(void) gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_X86_32 + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +#endif } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; -- cgit v1.2.3 From b9e67f00424e164dcd29391eb48dc941db8691ad Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:06 -0700 Subject: x86: cpu/common.c, merge default_init() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/common_64.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 43d5287bb2a..2c4bfa2e56a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -224,6 +224,9 @@ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else /* Not much we can do here... */ /* Check if at least it has cpuid */ if (c->cpuid_level == -1) { @@ -233,6 +236,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c) else if (c->x86 == 3) strcpy(c->x86_model_id, "386"); } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index d7b996518f8..2fda1097481 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -223,7 +223,19 @@ static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; static void __cpuinit default_init(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 display_cacheinfo(c); +#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +#endif } static struct cpu_dev __cpuinitdata default_cpu = { -- cgit v1.2.3 From 140fc72709278989f08eb756d16a70008bdcc409 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:07 -0700 Subject: x86: cpu/common*.c, merge display_cacheinfo() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 ++++++++ arch/x86/kernel/cpu/common_64.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c4bfa2e56a..f9191207718 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -285,6 +285,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 + /* On K8 L1 TLB is inclusive, so don't count it */ + c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -293,6 +297,9 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; +#ifdef CONFIG_X86_64 + c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -303,6 +310,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (l2size == 0) return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 2fda1097481..f7a2d524b1e 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -285,8 +285,10 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); +#ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ c->x86_tlbsize = 0; +#endif } if (n < 0x80000006) /* Some chips just has a large L1. */ @@ -294,7 +296,22 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); l2size = ecx >> 16; + +#ifdef CONFIG_X86_64 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); +#else + + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c, l2size); + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if (l2size == 0) + return; /* Again, no L2 cache is possible */ +#endif c->x86_cache_size = l2size; -- cgit v1.2.3 From 1cd78776c7d5487e3000426d0ce1ff203c5d60cd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:08 -0700 Subject: x86: cpu/common*.c, merge detect_ht() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 13 ++++++++++++- arch/x86/kernel/cpu/common_64.c | 9 +++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f9191207718..b2018f76c94 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -330,6 +330,9 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; + if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) + return; + cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; @@ -346,8 +349,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 + c->phys_proc_id = phys_pkg_id(index_msb); +#else c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); - +#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -355,8 +361,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); +#ifdef CONFIG_X86_64 + c->cpu_core_id = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); +#else c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); +#endif } out: diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index f7a2d524b1e..7ac0a00743c 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -349,7 +349,11 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); +#ifdef CONFIG_X86_64 c->phys_proc_id = phys_pkg_id(index_msb); +#else + c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); +#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -357,8 +361,13 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); +#ifdef CONFIG_X86_64 c->cpu_core_id = phys_pkg_id(index_msb) & ((1 << core_bits) - 1); +#else + c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + ((1 << core_bits) - 1); +#endif } out: -- cgit v1.2.3 From 5122c890ba969e6efeaba9ed45f5936e62d3c6d5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:09 -0700 Subject: x86: cpu/common.c: merge get_cpu_cap() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++++++++++ arch/x86/kernel/cpu/common_64.c | 2 ++ 2 files changed, 22 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b2018f76c94..ffc2f5ed09a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -458,6 +458,26 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[6] = cpuid_ecx(0x80000001); } } + +#ifdef CONFIG_X86_64 + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + /* Don't set x86_cpuid_level here for now to not confuse. */ + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + + if (c->extended_cpuid_level >= 0x80000008) { + u32 eax = cpuid_eax(0x80000008); + + c->x86_virt_bits = (eax >> 8) & 0xff; + c->x86_phys_bits = eax & 0xff; + } +#endif } /* * Do minimum CPU detection early. diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7ac0a00743c..75bb7ff99ee 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -461,6 +461,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } } +#ifdef CONFIG_X86_64 /* Transmeta-defined flags: level 0x80860001 */ xlvl = cpuid_eax(0x80860000); if ((xlvl & 0xffff0000) == 0x80860000) { @@ -478,6 +479,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; } +#endif } /* Do some early cpuid on the boot CPU to get some parameter that are -- cgit v1.2.3 From 6627d2423067f2c6eedb422a59fba9270a3c5e36 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:10 -0700 Subject: x86: cpu/common*.c, merge early_identify_cpu() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++++ arch/x86/kernel/cpu/common_64.c | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ffc2f5ed09a..61a70748213 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -490,7 +490,11 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 + c->x86_clflush_size = 64; +#else c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; if (!have_cpuid_p()) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 75bb7ff99ee..6475548d64e 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -482,15 +482,28 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) #endif } -/* Do some early cpuid on the boot CPU to get some parameter that are - needed before check_bugs. Everything advanced is in identify_cpu - below. */ +/* + * Do minimum CPU detection early. + * Fields really needed: vendor, cpuid_level, family, model, mask, + * cache alignment. + * The others are not touched to avoid unwanted side effects. + * + * WARNING: this function is only called on the BP. Don't add code here + * that is supposed to run on all CPUs. + */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { +#ifdef CONFIG_X86_64 c->x86_clflush_size = 64; +#else + c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; + if (!have_cpuid_p()) + return; + memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; -- cgit v1.2.3 From 56f0d033be2ebb983993e5d7f24ae232c9a1e7f9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:11 -0700 Subject: x86: cpu/common*.c: merge print_cpu_info() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6475548d64e..68b06a39dca 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -730,8 +730,20 @@ __setup("noclflush", setup_noclflush); void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { + char *vendor = NULL; + + if (c->x86_vendor < X86_VENDOR_NUM) + vendor = this_cpu->c_vendor; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk(KERN_CONT "%s ", vendor); + if (c->x86_model_id[0]) printk(KERN_CONT "%s", c->x86_model_id); + else + printk(KERN_CONT "%d86", c->x86); if (c->x86_mask || c->cpuid_level >= 0) printk(KERN_CONT " stepping %02x\n", c->x86_mask); -- cgit v1.2.3 From b89d3b3e2caeab3d895301d1aee3cd2a91eddb79 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:12 -0700 Subject: x86: cpu/common*.c, merge generic_identify() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 15 ++++++++++++--- arch/x86/kernel/cpu/common_64.c | 19 ++++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 61a70748213..f35baa7f303 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -548,6 +548,10 @@ void __init early_cpu_init(void) * of early VIA chips and (more importantly) broken virtualizers that * are not easy to detect. Hence, probe for it based on first * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. */ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { @@ -586,11 +590,16 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) if (c->cpuid_level >= 0x00000001) { c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_HT +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT c->apicid = phys_pkg_id(c->initial_apicid, 0); - c->phys_proc_id = c->initial_apicid; -#else +# else c->apicid = c->initial_apicid; +# endif +#endif + +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; #endif } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 68b06a39dca..869a6ff9f7d 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -581,6 +581,9 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { + if (!have_cpuid_p()) + return; + c->extended_cpuid_level = 0; cpu_detect(c); @@ -589,11 +592,21 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) get_cpu_cap(c); - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; -#ifdef CONFIG_SMP - c->phys_proc_id = c->initial_apicid; + if (c->cpuid_level >= 0x00000001) { + c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; +#ifdef CONFIG_X86_32 +# ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id(c->initial_apicid, 0); +# else + c->apicid = c->initial_apicid; +# endif #endif +#ifdef CONFIG_X86_HT + c->phys_proc_id = c->initial_apicid; +#endif + } + if (c->extended_cpuid_level >= 0x80000004) get_model_name(c); /* Default name */ -- cgit v1.2.3 From 102bbe3ab83c7ac04461d277df23cd5acdb49892 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:13 -0700 Subject: x86: cpu/common*.c, merge identify_cpu() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 89 ++++++++++++++++++++---------- arch/x86/kernel/cpu/common_64.c | 116 ++++++++++++++++++++++++++++++---------- 2 files changed, 147 insertions(+), 58 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f35baa7f303..eef868c97b8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -104,34 +104,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); @@ -197,13 +169,48 @@ static int __init x86_serial_nr_setup(char *s) } __setup("serialnumber", x86_serial_nr_setup); #else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} /* Probe for the CPUID instruction */ static inline int have_cpuid_p(void) { return 1; } +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} #endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -620,12 +627,18 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->loops_per_jiffy = loops_per_jiffy; c->x86_cache_size = -1; c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; +#ifdef CONFIG_X86_64 + c->x86_coreid_bits = 0; + c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ c->x86_clflush_size = 32; +#endif + c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -644,6 +657,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) this_cpu->c_identify(c); +#ifdef CONFIG_X86_64 + c->apicid = phys_pkg_id(0); +#endif + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -677,6 +694,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } +#ifdef CONFIG_X86_64 + detect_ht(c); +#endif + /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -693,24 +714,34 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) for (i = 0; i < NCAPINTS; i++) c->x86_capability[i] &= ~cleared_cpu_caps[i]; +#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_init(c); +#endif select_idle_routine(c); + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + numa_add_cpu(smp_processor_id()); +#endif } void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 enable_sep_cpu(); +#endif mtrr_ap_init(); } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 869a6ff9f7d..6a42371a609 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -103,34 +103,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - static int __init x86_fxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); @@ -196,13 +168,48 @@ static int __init x86_serial_nr_setup(char *s) } __setup("serialnumber", x86_serial_nr_setup); #else +static inline int flag_is_changeable_p(u32 flag) +{ + return 1; +} /* Probe for the CPUID instruction */ static inline int have_cpuid_p(void) { return 1; } +static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ +} #endif +/* + * Naming convention should be: [()] + * This table only is used unless init_() below doesn't set it; + * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used + * + */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if (c->x86_model >= 16) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -628,14 +635,35 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; +#ifdef CONFIG_X86_64 c->x86_coreid_bits = 0; c->x86_clflush_size = 64; +#else + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_clflush_size = 32; +#endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); + if (!have_cpuid_p()) { + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + } + generic_identify(c); + if (this_cpu->c_identify) + this_cpu->c_identify(c); + +#ifdef CONFIG_X86_64 c->apicid = phys_pkg_id(0); +#endif /* * Vendor-specific initialization. In this section we @@ -650,7 +678,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_init) this_cpu->c_init(c); + /* Disable the PN if appropriate */ + squash_the_stupid_serial_number(c); + + /* + * The vendor-specific functions might have changed features. Now + * we do "generic changes." + */ + + /* If the model name is still unset, do table lookup. */ + if (!c->x86_model_id[0]) { + char *p; + p = table_lookup_model(c); + if (p) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86, c->x86_model); + } + +#ifdef CONFIG_X86_64 detect_ht(c); +#endif /* * On SMP, boot_cpu_data holds the common feature set between @@ -669,11 +719,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[i] &= ~cleared_cpu_caps[i]; #ifdef CONFIG_X86_MCE + /* Init Machine Check Exception if available. */ mcheck_init(c); #endif select_idle_routine(c); -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif @@ -682,12 +733,19 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifdef CONFIG_X86_32 + sysenter_setup(); + enable_sep_cpu(); +#endif } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) { BUG_ON(c == &boot_cpu_data); identify_cpu(c); +#ifdef CONFIG_X86_32 + enable_sep_cpu(); +#endif mtrr_ap_init(); } -- cgit v1.2.3 From 143b604a2d07ff69cc2bc02ecd9395b28e0b5292 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Sep 2008 09:37:15 +0200 Subject: x86: cpu/common*.c, merge whitespaces Merge leftover whitespaces, to make arch/x86/kernel/cpu/common_64.c exactly identical to arch/x86/kernel/cpu/common.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 6a42371a609..eef868c97b8 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -26,6 +26,7 @@ #include #include #endif + #include #include #include @@ -280,7 +281,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) return 1; } - void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -307,7 +307,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); #else - /* do processor-specific cache resizing */ if (this_cpu->c_size_cache) l2size = this_cpu->c_size_cache(c, l2size); @@ -334,6 +333,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) goto out; @@ -443,7 +443,6 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) } } - static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -452,7 +451,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) /* Intel-defined flags: level 0x00000001 */ if (c->cpuid_level >= 0x00000001) { u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; @@ -488,7 +486,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } #endif } - /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -500,7 +497,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; #else @@ -722,12 +718,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Init Machine Check Exception if available. */ mcheck_init(c); #endif + select_idle_routine(c); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) numa_add_cpu(smp_processor_id()); #endif - } void __init identify_boot_cpu(void) -- cgit v1.2.3 From f5017cfa3591d8eaf5c792e40ff30f18e498b68c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:09:14 -0700 Subject: x86: use cpu/common.c on 64 bit Use cpu/common.c on both 64-bit and 32-bit and remove cpu/common_64.c. We started out with this linecount: 816 arch/x86/kernel/cpu/common_64.c 805 arch/x86/kernel/cpu/common.c and the resulting common.c is 1197 lines long, so there's already 424 lines of code eliminated in this phase of the unification. Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 6 +- arch/x86/kernel/cpu/common_64.c | 1197 --------------------------------------- 2 files changed, 3 insertions(+), 1200 deletions(-) delete mode 100644 arch/x86/kernel/cpu/common_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 403e689df0b..d031f248dfc 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,10 +3,10 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += proc.o capflags.o powerflags.o +obj-y += proc.o capflags.o powerflags.o common.o -obj-$(CONFIG_X86_32) += common.o bugs.o cmpxchg.o -obj-$(CONFIG_X86_64) += common_64.o bugs_64.o +obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c deleted file mode 100644 index eef868c97b8..00000000000 --- a/arch/x86/kernel/cpu/common_64.c +++ /dev/null @@ -1,1197 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cpu.h" - -static struct cpu_dev *this_cpu __cpuinitdata; - -#ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, - [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; -#else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, - [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, - [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, - [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, - /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, - /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, - - [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; -#endif -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -#ifdef CONFIG_X86_32 -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_serial_nr __cpuinitdata = 1; - -static int __init cachesize_setup(char *str) -{ - get_option(&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - -static int __init x86_sep_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_SEP); - return 1; -} -__setup("nosep", x86_sep_setup); - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { - /* Disable processor serial number */ - unsigned long lo, hi; - rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_cpu_cap(c, X86_FEATURE_PN); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); -#else -static inline int flag_is_changeable_p(u32 flag) -{ - return 1; -} -/* Probe for the CPUID instruction */ -static inline int have_cpuid_p(void) -{ - return 1; -} -static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ -} -#endif - -/* - * Naming convention should be: [()] - * This table only is used unless init_() below doesn't set it; - * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used - * - */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if (c->x86_model >= 16) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - -__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; - -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct desc_ptr gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -#ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -#endif -} - -static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; - -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static struct cpu_dev __cpuinitdata default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - -int __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (c->extended_cpuid_level < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while (*p == ' ') - p++; - if (p != q) { - while (*p) - *q++ = *p++; - while (q <= &c->x86_model_id[48]) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ebx, ecx, edx, l2size; - - n = c->extended_cpuid_level; - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size = (ecx>>24) + (edx>>24); -#ifdef CONFIG_X86_64 - /* On K8 L1 TLB is inclusive, so don't count it */ - c->x86_tlbsize = 0; -#endif - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); - l2size = ecx >> 16; - -#ifdef CONFIG_X86_64 - c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); -#else - /* do processor-specific cache resizing */ - if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c, l2size); - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if (l2size == 0) - return; /* Again, no L2 cache is possible */ -#endif - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - if (!cpu_has(c, X86_FEATURE_HT)) - return; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - goto out; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); -#else - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif - } - -out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -#endif -} - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) -{ - char *v = c->x86_vendor_id; - int i; - static int printed; - - for (i = 0; i < X86_VENDOR_NUM; i++) { - if (!cpu_devs[i]) - break; - - if (!strcmp(v, cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v, cpu_devs[i]->c_ident[1]))) { - this_cpu = cpu_devs[i]; - c->x86_vendor = this_cpu->c_x86_vendor; - return; - } - } - - if (!printed) { - printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); - printk(KERN_ERR "CPU: Your system may be unstable.\n"); - } - - c->x86_vendor = X86_VENDOR_UNKNOWN; - this_cpu = &default_cpu; -} - -void __cpuinit cpu_detect(struct cpuinfo_x86 *c) -{ - /* Get vendor name */ - cpuid(0x00000000, (unsigned int *)&c->cpuid_level, - (unsigned int *)&c->x86_vendor_id[0], - (unsigned int *)&c->x86_vendor_id[8], - (unsigned int *)&c->x86_vendor_id[4]); - - c->x86 = 4; - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 junk, tfms, cap0, misc; - cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 0xf; - c->x86_model = (tfms >> 4) & 0xf; - c->x86_mask = tfms & 0xf; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xf) << 4; - if (cap0 & (1<<19)) { - c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - c->x86_cache_alignment = c->x86_clflush_size; - } - } -} - -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) -{ - u32 tfms, xlvl; - u32 ebx; - - /* Intel-defined flags: level 0x00000001 */ - if (c->cpuid_level >= 0x00000001) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - c->extended_cpuid_level = xlvl; - if ((xlvl & 0xffff0000) == 0x80000000) { - if (xlvl >= 0x80000001) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - } - -#ifdef CONFIG_X86_64 - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - - if (c->extended_cpuid_level >= 0x80000008) { - u32 eax = cpuid_eax(0x80000008); - - c->x86_virt_bits = (eax >> 8) & 0xff; - c->x86_phys_bits = eax & 0xff; - } -#endif -} -/* - * Do minimum CPU detection early. - * Fields really needed: vendor, cpuid_level, family, model, mask, - * cache alignment. - * The others are not touched to avoid unwanted side effects. - * - * WARNING: this function is only called on the BP. Don't add code here - * that is supposed to run on all CPUs. - */ -static void __init early_identify_cpu(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; -#else - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - - if (!have_cpuid_p()) - return; - - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - c->extended_cpuid_level = 0; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (this_cpu->c_early_init) - this_cpu->c_early_init(c); - - validate_pat_support(c); -} - -void __init early_cpu_init(void) -{ - struct cpu_dev **cdev; - int count = 0; - - printk("KERNEL supported cpus:\n"); - for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { - struct cpu_dev *cpudev = *cdev; - unsigned int j; - - if (count >= X86_VENDOR_NUM) - break; - cpu_devs[count] = cpudev; - count++; - - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(" %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); - } - } - - early_identify_cpu(&boot_cpu_data); -} - -/* - * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because - * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. - * - * Note: no 64-bit chip is known to lack these, but put the code here - * for consistency with 32 bits, and to make it utterly trivial to - * diagnose the problem should it ever surface. - */ -static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) -{ - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } -} - -static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -{ - if (!have_cpuid_p()) - return; - - c->extended_cpuid_level = 0; - - cpu_detect(c); - - get_cpu_vendor(c); - - get_cpu_cap(c); - - if (c->cpuid_level >= 0x00000001) { - c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); -# else - c->apicid = c->initial_apicid; -# endif -#endif - -#ifdef CONFIG_X86_HT - c->phys_proc_id = c->initial_apicid; -#endif - } - - if (c->extended_cpuid_level >= 0x80000004) - get_model_name(c); /* Default name */ - - init_scattered_cpuid_features(c); - detect_nopl(c); -} - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; -#ifdef CONFIG_X86_64 - c->x86_coreid_bits = 0; - c->x86_clflush_size = 64; -#else - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_clflush_size = 32; -#endif - c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - if (!have_cpuid_p()) { - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - } - - generic_identify(c); - - if (this_cpu->c_identify) - this_cpu->c_identify(c); - -#ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); -#endif - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - if (this_cpu->c_init) - this_cpu->c_init(c); - - /* Disable the PN if appropriate */ - squash_the_stupid_serial_number(c); - - /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." - */ - - /* If the model name is still unset, do table lookup. */ - if (!c->x86_model_id[0]) { - char *p; - p = table_lookup_model(c); - if (p) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86, c->x86_model); - } - -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if (c != &boot_cpu_data) { - /* AND the already accumulated flags with these */ - for (i = 0; i < NCAPINTS; i++) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Clear all flags overriden by options */ - for (i = 0; i < NCAPINTS; i++) - c->x86_capability[i] &= ~cleared_cpu_caps[i]; - -#ifdef CONFIG_X86_MCE - /* Init Machine Check Exception if available. */ - mcheck_init(c); -#endif - - select_idle_routine(c); - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - numa_add_cpu(smp_processor_id()); -#endif -} - -void __init identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); -#ifdef CONFIG_X86_32 - sysenter_setup(); - enable_sep_cpu(); -#endif -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); -#ifdef CONFIG_X86_32 - enable_sep_cpu(); -#endif - mtrr_ap_init(); -} - -struct msr_range { - unsigned min; - unsigned max; -}; - -static struct msr_range msr_range_array[] __cpuinitdata = { - { 0x00000000, 0x00000418}, - { 0xc0000000, 0xc000040b}, - { 0xc0010000, 0xc0010142}, - { 0xc0011000, 0xc001103b}, -}; - -static void __cpuinit print_cpu_msr(void) -{ - unsigned index; - u64 val; - int i; - unsigned index_min, index_max; - - for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { - index_min = msr_range_array[i].min; - index_max = msr_range_array[i].max; - for (index = index_min; index < index_max; index++) { - if (rdmsrl_amd_safe(index, &val)) - continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); - } - } -} - -static int show_msr __cpuinitdata; -static __init int setup_show_msr(char *arg) -{ - int num; - - get_option(&arg, &num); - - if (num > 0) - show_msr = num; - return 1; -} -__setup("show_msr=", setup_show_msr); - -static __init int setup_noclflush(char *arg) -{ - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); - return 1; -} -__setup("noclflush", setup_noclflush); - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < X86_VENDOR_NUM) - vendor = this_cpu->c_vendor; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk(KERN_CONT "%s ", vendor); - - if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); - else - printk(KERN_CONT "%d86", c->x86); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); - else - printk(KERN_CONT "\n"); - -#ifdef CONFIG_SMP - if (c->cpu_index < show_msr) - print_cpu_msr(); -#else - if (show_msr) - print_cpu_msr(); -#endif -} - -static __init int setup_disablecpuid(char *arg) -{ - int bit; - if (get_option(&arg, &bit) && bit < NCAPINTS*32) - setup_clear_cpu_cap(bit); - else - return 0; - return 1; -} -__setup("clearcpuid=", setup_disablecpuid); - -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; - -#ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - -struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; - -char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; - -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - -void pda_init(int cpu) -{ - struct x8664_pda *pda = cpu_pda(cpu); - - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); - - pda->cpunumber = cpu; - pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; - - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; - } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } -} - -char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; - -extern asmlinkage void ignore_sysret(void); - -/* May not be marked __init: used by software suspend */ -void syscall_init(void) -{ - /* - * LSTAR and STAR live in a bit strange symbiosis. - * They both write to the same internal register. STAR allows to - * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. - */ - wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); - -#ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); -#endif - - /* Flags to clear on syscall */ - wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); -} - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - -unsigned long kernel_eflags; - -/* - * Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -#else - -/* Make sure %fs is initialized properly in idle threads */ -struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - regs->fs = __KERNEL_PERCPU; - return regs; -} -#endif - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - * A lot of state is already set up in PDA init for 64 bit - */ -#ifdef CONFIG_X86_64 -void __cpuinit cpu_init(void) -{ - int cpu = stack_smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); - unsigned long v; - char *estacks = NULL; - struct task_struct *me; - int i; - - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); - else - estacks = boot_exception_stacks; - - me = current; - - if (cpu_test_and_set(cpu, cpu_initialized)) - panic("CPU#%d already initialized!\n", cpu); - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - - switch_to_new_gdt(); - load_idt((const struct desc_ptr *)&idt_descr); - - memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); - syscall_init(); - - wrmsrl(MSR_FS_BASE, 0); - wrmsrl(MSR_KERNEL_GS_BASE, 0); - barrier(); - - check_efer(); - if (cpu != 0 && x2apic) - enable_x2apic(); - - /* - * set up and load the per-CPU TSS - */ - if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER - }; - for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = - (unsigned long)estacks; - } - } - - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); - /* - * <= is required because the CPU will access up to - * 8 bits beyond the end of the IO permission bitmap. - */ - for (i = 0; i <= IO_BITMAP_LONGS; i++) - t->io_bitmap[i] = ~0UL; - - atomic_inc(&init_mm.mm_count); - me->active_mm = &init_mm; - if (me->mm) - BUG(); - enter_lazy_tlb(&init_mm, me); - - load_sp0(t, ¤t->thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_KGDB - /* - * If the kgdb is connected no debug regs should be altered. This - * is only applicable when KGDB and a KGDB I/O module are built - * into the kernel and you are using early debugging with - * kgdbwait. KGDB will control the kernel HW breakpoint registers. - */ - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - else { -#endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ - } -#endif - - fpu_init(); - - raw_local_save_flags(kernel_eflags); - - if (is_uv_system()) - uv_cpu_init(); -} - -#else - -void __cpuinit cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = &curr->thread; - - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - - load_idt(&idt_descr); - switch_to_new_gdt(); - - /* - * Set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - curr->active_mm = &init_mm; - if (curr->mm) - BUG(); - enter_lazy_tlb(&init_mm, curr); - - load_sp0(t, thread); - set_tss_desc(cpu, t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - - /* Clear all 6 debug registers: */ - set_debugreg(0, 0); - set_debugreg(0, 1); - set_debugreg(0, 2); - set_debugreg(0, 3); - set_debugreg(0, 6); - set_debugreg(0, 7); - - /* - * Force FPU initialization: - */ - if (cpu_has_xsave) - current_thread_info()->status = TS_XSAVE; - else - current_thread_info()->status = 0; - clear_used_math(); - mxcsr_feature_mask_init(); - - /* - * Boot processor to setup the FP and extended state context info. - */ - if (!smp_processor_id()) - init_thread_xstate(); - - xsave_init(); -} - -#ifdef CONFIG_HOTPLUG_CPU -void __cpuinit cpu_uninit(void) -{ - int cpu = raw_smp_processor_id(); - cpu_clear(cpu, cpu_initialized); - - /* lazy TLB state */ - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} -#endif - -#endif -- cgit v1.2.3 From bd220a24a9263eaa70d5e6821383085950b5a13c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Sep 2008 00:58:28 -0700 Subject: x86: move nonx_setup etc from common.c to init_64.c like 32 bit put it in init_32.c Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 54 -------------------------------------------- 1 file changed, 54 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index eef868c97b8..286c89a991b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -847,51 +847,6 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; -unsigned long __supported_pte_mask __read_mostly = ~0UL; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -static int do_not_nx __cpuinitdata; - -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; - } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -int force_personality32; - -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ -static int __init nonx32_setup(char *str) -{ - if (!strcmp(str, "on")) - force_personality32 &= ~READ_IMPLIES_EXEC; - else if (!strcmp(str, "off")) - force_personality32 |= READ_IMPLIES_EXEC; - return 1; -} -__setup("noexec32=", nonx32_setup); - void pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -957,15 +912,6 @@ void syscall_init(void) X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); } -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - unsigned long kernel_eflags; /* -- cgit v1.2.3 From 7cfb0435330364f90f274a26ecdc5f47f738498c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Sep 2008 21:37:24 +0000 Subject: HPET: make minimum reprogramming delta useful The minimum reprogramming delta was hardcoded in HPET ticks, which is stupid as it does not work with faster running HPETs. The C1E idle patches made this prominent on AMD/RS690 chipsets, where the HPET runs with 25MHz. Set it to 5us which seems to be a reasonable value and fixes the problems on the bug reporters machines. We have a further sanity check now in the clock events, which increases the delta when it is not sufficient. Signed-off-by: Thomas Gleixner Tested-by: Luiz Fernando N. Capitulino Tested-by: Dmitry Nezhevenko Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 59fd3b6b130..2256315416d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void) /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); - hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, - &hpet_clockevent); + /* 5 usec minimum reprogramming delta. */ + hpet_clockevent.min_delta_ns = 5000; /* * Start hpet with the boot cpu mask and make it -- cgit v1.2.3 From 551b4545bf9658dc5ae5a1277dcd4d7bf0028b28 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 5 Sep 2008 17:58:49 +0900 Subject: x86: gart alloc_coherent doesn't need to check NULL device argument asm/dma-mapping.h guarantees that gart alloc_coherent doesn't get NULL device argument. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4d0864900b8..0b99d4a06f7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -506,9 +506,6 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; - if (!dev) - dev = &x86_dma_fallback_dev; - *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, align_mask); flush_gart(); -- cgit v1.2.3 From cf169702ba6928cee9d4f4adf3e932b643b8db7a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Sep 2008 13:13:40 +0200 Subject: x86, gart: add detection of AMD family 0x11 northbridges This patch adds the detection of the northbridges in the AMD family 0x11 processors. It also fixes the magic numbers there while changing this code. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/k8.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 7377ccb2133..304d8bad655 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges); static u32 *flush_words; struct pci_device_id k8_nb_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, {} }; EXPORT_SYMBOL(k8_nb_ids); -- cgit v1.2.3 From e51af6630848406fc97adbd71443818cdcda297b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 4 Sep 2008 09:54:37 +0100 Subject: x86: blacklist DMAR on Intel G31/G33 chipsets Some BIOSes (the Intel DG33BU, for example) wrongly claim to have DMAR when they don't. Avoid the resulting crashes when it doesn't work as expected. I'd still be grateful if someone could test it on a DG33BU with the old BIOS though, since I've killed mine. I tested the DMI version, but not this one. Signed-off-by: David Woodhouse Signed-off-by: Ingo Molnar --- arch/x86/kernel/early-quirks.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4353cf5e6fa..24bb5faf5ef 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func) } +#ifdef CONFIG_DMAR +static void __init intel_g33_dmar(int num, int slot, int func) +{ + struct acpi_table_header *dmar_tbl; + acpi_status status; + + status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); + if (ACPI_SUCCESS(status)) { + printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); + dmar_disabled = 1; + } +} +#endif + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, +#ifdef CONFIG_DMAR + { PCI_VENDOR_ID_INTEL, 0x29c0, + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, +#endif {} }; -- cgit v1.2.3 From b6734c35af028f06772c0b2c836c7d579e6d4dad Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:39:32 -0700 Subject: x86: add NOPL as a synthetic CPU feature bit The long noops ("NOPL") are supposed to be detected by family >= 6. Unfortunately, several non-Intel x86 implementations, both hardware and software, don't obey this dictum. Instead, probe for NOPL directly by executing a NOPL instruction and see if we get #UD. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 32 +++++++++++++++++++++++++++++++- arch/x86/kernel/cpu/common_64.c | 36 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/feature_names.c | 3 ++- 3 files changed, 69 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa3..0785b3c8d04 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include @@ -341,6 +342,35 @@ static void __init early_cpu_detect(void) early_get_cap(c); } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { u32 tfms, xlvl; @@ -395,8 +425,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) } init_scattered_cpuid_features(c); + detect_nopl(c); } - } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index dd6e3f15017..c3afba5a81a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -215,6 +216,39 @@ static void __init early_cpu_support_print(void) } } +/* + * The NOPL instruction is supposed to exist on all CPUs with + * family >= 6, unfortunately, that's not true in practice because + * of early VIA chips and (more importantly) broken virtualizers that + * are not easy to detect. Hence, probe for it based on first + * principles. + * + * Note: no 64-bit chip is known to lack these, but put the code here + * for consistency with 32 bits, and to make it utterly trivial to + * diagnose the problem should it ever surface. + */ +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) +{ + const u32 nopl_signature = 0x888c53b1; /* Random number */ + u32 has_nopl = nopl_signature; + + clear_cpu_cap(c, X86_FEATURE_NOPL); + if (c->x86 >= 6) { + asm volatile("\n" + "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ + "2:\n" + " .section .fixup,\"ax\"\n" + "3: xor %0,%0\n" + " jmp 2b\n" + " .previous\n" + _ASM_EXTABLE(1b,3b) + : "+a" (has_nopl)); + + if (has_nopl == nopl_signature) + set_cpu_cap(c, X86_FEATURE_NOPL); + } +} + static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); void __init early_cpu_init(void) @@ -313,6 +347,8 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } + detect_nopl(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cb..c9017799497 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = { NULL, NULL, NULL, NULL, "constant_tsc", "up", NULL, "arch_perfmon", "pebs", "bts", NULL, NULL, - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "rep_good", NULL, NULL, NULL, + "nopl", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ -- cgit v1.2.3 From f31d731e4467e61de51d7f6d7115f3b712d9354c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Aug 2008 17:50:33 -0700 Subject: x86: use X86_FEATURE_NOPL in alternatives Use X86_FEATURE_NOPL to determine if it is safe to use P6 NOPs in alternatives. Also, replace table and loop with simple if statement. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/alternative.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b55..65a0c1b4869 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { extern char __vsyscall_0; const unsigned char *const *find_nop_table(void) { - return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return k8_nops; } #else /* CONFIG_X86_64 */ -static const struct nop { - int cpuid; - const unsigned char *const *noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { X86_FEATURE_P4, p6_nops }, - { X86_FEATURE_P3, p6_nops }, - { -1, NULL } -}; - const unsigned char *const *find_nop_table(void) { - const unsigned char *const *noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; + if (boot_cpu_has(X86_FEATURE_K8)) + return k8_nops; + else if (boot_cpu_has(X86_FEATURE_K7)) + return k7_nops; + else if (boot_cpu_has(X86_FEATURE_NOPL)) + return p6_nops; + else + return intel_nops; } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From f7676254f179eac6b5244a80195ec8ae0e9d4606 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 6 Sep 2008 03:03:32 +0200 Subject: x86: HPET fix moronic 32/64bit thinko We use the HPET only in 32bit mode because: 1) some HPETs are 32bit only 2) on i386 there is no way to read/write the HPET atomic 64bit wide The HPET code unification done by the "moron of the year" did not take into account that unsigned long is different on 32 and 64 bit. This thinko results in a possible endless loop in the clockevents code, when the return comparison fails due to the 64bit/332bit unawareness. unsigned long cnt = (u32) hpet_read() + delta can wrap over 32bit. but the final compare will fail and return -ETIME causing endless loops. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 2256315416d..801497a16e0 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -270,15 +270,15 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode, } static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt) + struct clock_event_device *evt) { - unsigned long cnt; + u32 cnt; cnt = hpet_readl(HPET_COUNTER); - cnt += delta; + cnt += (u32) delta; hpet_writel(cnt, HPET_T0_CMP); - return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; + return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } /* -- cgit v1.2.3 From 72d43d9bc9210d24d09202eaf219eac09e17b339 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 6 Sep 2008 03:06:08 +0200 Subject: x86: HPET: read back compare register before reading counter After fixing the u32 thinko I sill had occasional hickups on ATI chipsets with small deltas. There seems to be a delay between writing the compare register and the transffer to the internal register which triggers the interrupt. Reading back the value makes sure, that it hit the internal match register befor we compare against the counter value. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 801497a16e0..73deaffadd0 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -278,6 +278,13 @@ static int hpet_legacy_next_event(unsigned long delta, cnt += (u32) delta; hpet_writel(cnt, HPET_T0_CMP); + /* + * We need to read back the CMP register to make sure that + * what we wrote hit the chip before we compare it to the + * counter. + */ + WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); + return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } -- cgit v1.2.3 From 1b05d60d60e81c6594da8298107a05b506f01797 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Sep 2008 01:52:27 -0700 Subject: x86: remove duplicated get_model_name() calling Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 3 --- arch/x86/kernel/cpu/amd_64.c | 3 +-- arch/x86/kernel/cpu/centaur.c | 1 - arch/x86/kernel/cpu/common.c | 9 +++------ arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/transmeta.c | 1 - 7 files changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d64ea6097ca..e0ba2c7c5a1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -42,7 +42,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; int mbytes = num_physpages >> (20-PAGE_SHIFT); - int r; #ifdef CONFIG_SMP unsigned long long value; @@ -75,8 +74,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) */ clear_cpu_cap(c, 0*32+31); - r = get_model_name(c); - switch (c->x86) { case 4: /* diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index d1c721c0c49..c5fbf7477ea 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -157,8 +157,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - level = get_model_name(c); - if (!level) { + if (!c->x86_model_id[0]) { switch (c->x86) { case 0xf: /* Should distinguish Models here, but this is only diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e5f6d89521b..89bfdd9cacc 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) if (c->x86_model >= 6 && c->x86_model < 9) set_cpu_cap(c, X86_FEATURE_3DNOW); - get_model_name(c); display_cacheinfo(c); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 286c89a991b..a8b9b724242 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -252,13 +252,13 @@ static struct cpu_dev __cpuinitdata default_cpu = { .c_x86_vendor = X86_VENDOR_UNKNOWN, }; -int __cpuinit get_model_name(struct cpuinfo_x86 *c) +static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; if (c->extended_cpuid_level < 0x80000004) - return 0; + return; v = (unsigned int *) c->x86_model_id; cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); @@ -277,8 +277,6 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) while (q <= &c->x86_model_id[48]) *q++ = '\0'; /* Zero-pad the rest */ } - - return 1; } void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) @@ -610,8 +608,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) #endif } - if (c->extended_cpuid_level >= 0x80000004) - get_model_name(c); /* Default name */ + get_model_name(c); /* Default name */ init_scattered_cpuid_features(c); detect_nopl(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3cc9d92afd8..de4094a3921 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -31,7 +31,6 @@ struct cpu_dev { extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; -extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3f8c7283d81..ffd0f5ed071 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -301,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) */ if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) geode_configure(); - get_model_name(c); /* get CPU marketing name */ return; } else { /* MediaGX */ Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 7c46e6ecedc..738e03244f9 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -12,7 +12,6 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; - get_model_name(c); /* Same as AMD/Cyrix */ display_cacheinfo(c); /* Print CMS and CPU revision */ -- cgit v1.2.3 From e3224234717b4228c235cee401af89212f17a3a4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 6 Sep 2008 01:52:28 -0700 Subject: x86, cpu init: call early_init_xxx in init_xxx so we: 1. could set some cap to ap 2. restore some cap after memset in identify_cpu for boot cpu esp for CONSTANT_TSC this matters, as: before this patch: flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow rep_good nopl pni monitor cx16 lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs after this patch: flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc rep_good nopl pni monitor cx16 lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs so constant_tsc is back... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 7 ++----- arch/x86/kernel/cpu/amd_64.c | 2 ++ arch/x86/kernel/cpu/centaur_64.c | 2 ++ arch/x86/kernel/cpu/common.c | 7 ++++--- arch/x86/kernel/cpu/intel_64.c | 2 ++ 5 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e0ba2c7c5a1..c3175da7bc6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -26,11 +26,8 @@ __asm__(".align 4\nvide: ret"); static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - } + if (c->x86_power & (1<<8)) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); /* Set MTRR capability flag if appropriate */ if (c->x86_model == 13 || c->x86_model == 9 || diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index c5fbf7477ea..8c2d07f06f1 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -140,6 +140,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif + early_init_amd(c); + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 49cfc6d2f2f..0e5cf17a3c8 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -16,6 +16,8 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { + early_init_centaur(c); + if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8b9b724242..e8045c4ef1c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -473,9 +473,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } - if (c->extended_cpuid_level >= 0x80000007) - c->x86_power = cpuid_edx(0x80000007); - if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); @@ -483,6 +480,10 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } #endif + + if (c->extended_cpuid_level >= 0x80000007) + c->x86_power = cpuid_edx(0x80000007); + } /* * Do minimum CPU detection early. diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 0c0a58dfe09..14a2cd98d48 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -54,6 +54,8 @@ static void __cpuinit srat_detect_node(void) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + early_init_intel(c); + init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); -- cgit v1.2.3 From 12cf105cd66d95cf32c73cfa847a50bd1b700f23 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: delay early cpu initialization until cpuid is done Move early cpu initialization after cpu early get cap so the early cpu initialization can fix up cpu caps. Signed-off-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0785b3c8d04..8aab8517642 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -335,11 +335,11 @@ static void __init early_cpu_detect(void) get_cpu_vendor(c, 1); + early_get_cap(c); + if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); - - early_get_cap(c); } /* -- cgit v1.2.3 From dd786dd12c99634055a9066f25ea957f29991c22 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 21:09:43 +0200 Subject: x86: move mtrr cpu cap setting early in early_init_xxxx Krzysztof Helt found MTRR is not detected on k6-2 root cause: we moved mtrr_bp_init() early for mtrr trimming, and in early_detect we only read the CPU capability from cpuid, so some cpu doesn't have that bit in cpuid. So we need to add early_init_xxxx to preset those bit before mtrr_bp_init for those earlier cpus. this patch is for v2.6.27 Reported-by: Krzysztof Helt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 9 +++++---- arch/x86/kernel/cpu/centaur.c | 11 +++++++++++ arch/x86/kernel/cpu/cyrix.c | 32 ++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index cae9cabc303..18514ed2610 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -31,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -166,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) mbytes); } - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); break; } diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a5..a0534c04d38 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -314,6 +314,16 @@ enum { EAMD3D = 1<<20, }; +static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 5: + /* Emulate MTRRs using Centaur's MCR. */ + set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); + break; + } +} + static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { @@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, + .c_early_init = early_init_centaur, .c_init = init_centaur, .c_size_cache = centaur_size_cache, }; diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index e710a21bb6e..898a5a2002e 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -15,13 +15,11 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; - unsigned long flags; /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, ccr3 ^ 0x80); getCx86(0xc0); /* dummy to change bus */ @@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) *dir0 = getCx86(CX86_DIR0); *dir1 = getCx86(CX86_DIR1); } - local_irq_restore(flags); } +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned long flags; + + local_irq_save(flags); + __do_cyrix_devid(dir0, dir1); + local_irq_restore(flags); +} /* * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in * order to identify the Cyrix CPU model after we're out of setup.c @@ -161,6 +166,24 @@ static void __cpuinit geode_configure(void) local_irq_restore(flags); } +static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir1 = 0; + + __do_cyrix_devid(&dir0, &dir1); + dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + + switch (dir0_msn) { + case 3: /* 6x86/6x86L */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + case 5: /* 6x86MX/M II */ + /* Emulate MTRRs using Cyrix's ARRs. */ + set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); + break; + } +} static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { @@ -416,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, + .c_early_init = early_init_cyrix, .c_init = init_cyrix, .c_identify = cyrix_identify, }; -- cgit v1.2.3 From 2d9cd6c27f00958a31622b8e9909d5e35f069b28 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 29 Aug 2008 13:15:04 +0100 Subject: x86-64: add two __cpuinit annotations Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e8045c4ef1c..5cde4b18448 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -845,7 +845,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; -void pda_init(int cpu) +void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 726a5fcdf34..4b031a4ac85 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -860,7 +860,7 @@ error: return err; } -static void mce_remove_device(unsigned int cpu) +static __cpuinit void mce_remove_device(unsigned int cpu) { int i; -- cgit v1.2.3 From d04ec773d7ca1bbc05a2768be95c1cebe2b07757 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:27:30 +0200 Subject: x86: pda_init(): fix memory leak when using CPU hotplug pda->irqstackptr is allocated whenever a CPU is set online. But it is never freed. This results in a memory leak of 16K for each CPU offline/online cycle. Fix is to allocate pda->irqstackptr only once. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index c3afba5a81a..4f2eeb5652e 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -529,17 +529,20 @@ void pda_init(int cpu) /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; pda->irqstackptr = boot_cpu_stack; + pda->irqstackptr += IRQSTACKSIZE - 64; } else { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", cpu); + if (!pda->irqstackptr) { + pda->irqstackptr = (char *) + __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); + if (!pda->irqstackptr) + panic("cannot allocate irqstack for cpu %d", + cpu); + pda->irqstackptr += IRQSTACKSIZE - 64; + } if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } - - pda->irqstackptr += IRQSTACKSIZE-64; } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + -- cgit v1.2.3 From 23952a96ae738277f3139b63d622e22984589031 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 6 Aug 2008 10:29:37 +0200 Subject: x86: cpu_init(): fix memory leak when using CPU hotplug Exception stacks are allocated each time a CPU is set online. But the allocated space is never freed. Thus with one CPU hotplug offline/online cycle there is a memory leak of 24K (6 pages) for a CPU. Fix is to allocate exception stacks only once -- when the CPU is set online for the first time. Signed-off-by: Andreas Herrmann Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 4f2eeb5652e..a11f5d4477c 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -640,19 +640,22 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (!orig_ist->ist[0]) { static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER }; - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception stack %ld %d\n", - v, cpu); + for (v = 0; v < N_EXCEPTION_STACKS; v++) { + if (cpu) { + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception " + "stack %ld %d\n", v, cpu); + } + estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->x86_tss.ist[v] = + (unsigned long)estacks; } - estacks += PAGE_SIZE << order[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); -- cgit v1.2.3 From 5df45515512436a808d3476a90e83f2efb022422 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Sep 2008 23:55:40 +0200 Subject: x86, tsc calibration: fix my brown paperbag day ... Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6dab90f6851..4847a928050 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -319,7 +319,7 @@ static unsigned long quick_pit_calibrate(void) /* * Make sure we can rely on the second TSC timestamp: */ - if (!pit_expect_msb(--expect)) + if (!pit_expect_msb(expect)) goto failed; /* -- cgit v1.2.3 From 11fdd252bb5b461289fc5c21dc8fc87dc66f3284 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:50 -0700 Subject: x86: cpu make amd.c more like amd_64.c v2 1. make 32bit have early_init_amd_mc and amd_detect_cmp 2. seperate init_amd_k5/k6/k7 ... v2: fix compiling for !CONFIG_SMP Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 396 ++++++++++++++++++++++++------------------- arch/x86/kernel/cpu/amd_64.c | 17 +- arch/x86/kernel/cpu/common.c | 2 +- 3 files changed, 235 insertions(+), 180 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c3175da7bc6..a3a9e3cbdea 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,8 +24,200 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +{ +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } +} + + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} + +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits; + + bits = c->x86_coreid_bits; + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); + /* Convert the initial APIC ID into the socket ID */ + c->phys_proc_id = c->initial_apicid >> bits; +#endif +} + +static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_HT + unsigned bits, ecx; + + /* Multi core CPU? */ + if (c->extended_cpuid_level < 0x80000008) + return; + + ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + c->x86_coreid_bits = bits; +#endif +} + static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { + early_init_amd_mc(c); + if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); @@ -37,9 +229,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) static void __cpuinit init_amd(struct cpuinfo_x86 *c) { - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - #ifdef CONFIG_SMP unsigned long long value; @@ -50,7 +239,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ - if (c->x86 == 15) { + if (c->x86 == 0xf) { rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; wrmsrl(MSR_K7_HWCR, value); @@ -73,192 +262,55 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) switch (c->x86) { case 4: - /* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } - break; + init_amd_k5(c); + break; case 5: - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - break; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - break; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - break; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - break; - } - break; - case 6: /* An Athlon/Duron */ - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - break; - } - - switch (c->x86) { - case 15: - /* Use K8 tuning for Fam10h and Fam11h */ - case 0x10: - case 0x11: - set_cpu_cap(c, X86_FEATURE_K8); + init_amd_k6(c); break; - case 6: - set_cpu_cap(c, X86_FEATURE_K7); + case 6: /* An Athlon/Duron */ + init_amd_k7(c); break; } + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); + if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - display_cacheinfo(c); + if (!c->x86_model_id[0]) { + switch (c->x86) { + case 0xf: + /* Should distinguish Models here, but this is only + a fallback anyways. */ + strcpy(c->x86_model_id, "Hammer"); + break; + } + } - if (cpuid_eax(0x80000000) >= 0x80000008) - c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + display_cacheinfo(c); -#ifdef CONFIG_X86_HT - /* - * On a AMD multi core setup the lower bits of the APIC id - * distinguish the cores. - */ - if (c->x86_max_cores > 1) { - int cpu = smp_processor_id(); - unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; - printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_max_cores, c->cpu_core_id); - } -#endif + detect_ht(c); - if (cpuid_eax(0x80000000) >= 0x80000006) { - if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; else num_cache_leaves = 3; } - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); + if (c->x86 >= 0xf && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_K8); - if (cpu_has_xmm2) + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 8c2d07f06f1..7913e48f673 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -174,17 +174,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000008) amd_detect_cmp(c); - if (c->extended_cpuid_level >= 0x80000006 && - (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; + if (c->extended_cpuid_level >= 0x80000006) { + if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } if (c->x86 >= 0xf && c->x86 <= 0x11) set_cpu_cap(c, X86_FEATURE_K8); - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + if (cpu_has_xmm2) { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } if (c->x86 == 0x10) { /* do this for boot cpu */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5cde4b18448..9a57106c199 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -629,8 +629,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; -#ifdef CONFIG_X86_64 c->x86_coreid_bits = 0; +#ifdef CONFIG_X86_64 c->x86_clflush_size = 64; #else c->cpuid_level = -1; /* CPUID not detected */ -- cgit v1.2.3 From c58606ad551e9a1c85a9bd4f1698ca41ec279b2c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:51 -0700 Subject: x86: remove duplicated force_mwait Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 2 -- arch/x86/kernel/process.c | 1 - 2 files changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 7913e48f673..466a1eaa81e 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -9,8 +9,6 @@ #include "cpu.h" -int force_mwait __cpuinitdata; - #ifdef CONFIG_NUMA static int __cpuinit nearby_node(int apicid) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a..9f94bb1c811 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,7 +15,6 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; -static int force_mwait __cpuinitdata; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { -- cgit v1.2.3 From 2b86473604f6e9aef0b5e89c56798a90ccddcdbe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:52 -0700 Subject: x86: add srat_detect_node for amd64 separate that from amd_detect_cmp() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 466a1eaa81e..20c3f12dfe7 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -36,19 +36,23 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned bits; -#ifdef CONFIG_NUMA - int cpu = smp_processor_id(); - int node = 0; - unsigned apicid = hard_smp_processor_id(); -#endif + bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; +#endif +} +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ #ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + node = c->phys_proc_id; if (apicid_to_node[apicid] != NUMA_NO_NODE) node = apicid_to_node[apicid]; @@ -76,7 +80,6 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); #endif -#endif } static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) @@ -169,8 +172,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) + if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + srat_detect_node(c); + } if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) -- cgit v1.2.3 From 8d71a2ea0ad4ef9b9076ffd44726bad1f0ccf59b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:53 -0700 Subject: x86: merge header in amd_64.c Singed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 8 ++++++++ arch/x86/kernel/cpu/amd_64.c | 13 ++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a3a9e3cbdea..3c8090d1005 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1,11 +1,19 @@ #include #include #include + #include #include #include +#ifdef CONFIG_X86_64 +# include +# include +# include +#endif + #include + #include "cpu.h" /* diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 20c3f12dfe7..1d83601e85e 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -1,9 +1,16 @@ #include +#include #include -#include -#include -#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +# include +# include +# include +#endif #include -- cgit v1.2.3 From 6c62aa4a3c12989a1f1fcbbe6f1ee5d4de4b2300 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:54 -0700 Subject: x86: make amd.c have 64bit support code Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 137 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 126 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3c8090d1005..32e73520adf 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -16,6 +16,7 @@ #include "cpu.h" +#ifdef CONFIG_X86_32 /* * B step AMD K6 before B 9730xxxx have hardware bugs that can cause * misexecution of code under Linux. Owners of such processors should @@ -177,6 +178,26 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K7); } +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +static int __cpuinit nearby_node(int apicid) +{ + int i, node; + + for (i = apicid - 1; i >= 0; i--) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif /* * On a AMD dual core setup the lower bits of the APIC id distingush the cores. @@ -196,6 +217,42 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } +static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + int cpu = smp_processor_id(); + int node; + unsigned apicid = hard_smp_processor_id(); + + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + + int ht_nodeid = c->initial_apicid; + + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_HT @@ -226,13 +283,19 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { early_init_amd_mc(c); + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -256,18 +319,31 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) early_init_amd(c); - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - /* * Bit 31 in normal CPUID used for nonstandard 3DNow ID; * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_cpu_cap(c, 0*32+31); +#ifdef CONFIG_X86_64 + /* On C+ stepping K8 rep microcode works well for copy/memset */ + if (c->x86 == 0xf) { + u32 level; + + level = cpuid_eax(1); + if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + } + if (c->x86 == 0x10 || c->x86 == 0x11) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + switch (c->x86) { case 4: init_amd_k5(c); @@ -283,7 +359,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); +#endif + /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); @@ -300,10 +378,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) + if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + srat_detect_node(c); + } +#ifdef CONFIG_X86_32 detect_ht(c); +#endif if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) @@ -319,8 +401,38 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } + +#ifdef CONFIG_X86_64 + if (c->x86 == 0x10) { + /* do this for boot cpu */ + if (c == &boot_cpu_data) + check_enable_amd_mmconf_dmi(); + + fam10h_check_enable_mmcfg(); + } + + if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { + unsigned long long tseg; + + /* + * Split up direct mapping around the TSEG SMM area. + * Don't do it for gbpages because there seems very little + * benefit in doing so. + */ + if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + printk(KERN_DEBUG "tseg: %010llx\n", tseg); + if ((tseg>>PMD_SHIFT) < + (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || + ((tseg>>PMD_SHIFT) < + (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && + (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) + set_memory_4k((unsigned long)__va(tseg), 1); + } + } +#endif } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ @@ -333,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int } return size; } +#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = { @@ -349,9 +463,10 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, - .c_size_cache = amd_size_cache, .c_x86_vendor = X86_VENDOR_AMD, }; -- cgit v1.2.3 From 2a02505055fdd44958efd0e140dd87cb9fdecaf1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:55 -0700 Subject: x86: make amd_64 have 32 bit code Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd_64.c | 259 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 247 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 1d83601e85e..32e73520adf 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -16,7 +16,171 @@ #include "cpu.h" -#ifdef CONFIG_NUMA +#ifdef CONFIG_X86_32 +/* + * B step AMD K6 before B 9730xxxx have hardware bugs that can cause + * misexecution of code under Linux. Owners of such processors should + * contact AMD for precise details and a CPU swap. + * + * See http://www.multimania.com/poulot/k6bug.html + * http://www.amd.com/K6/k6docs/revgd.html + * + * The following test is erm.. interesting. AMD neglected to up + * the chip setting when fixing the bug but they also tweaked some + * performance at the same time.. + */ + +extern void vide(void); +__asm__(".align 4\nvide: ret"); + +static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) +{ +/* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model == 9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } +} + + +static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + + if (c->x86_model < 6) { + /* Based on AMD doc 20734R - June 2000 */ + if (c->x86_model == 0) { + clear_cpu_cap(c, X86_FEATURE_APIC); + set_cpu_cap(c, X86_FEATURE_PGE); + } + return; + } + + if (c->x86_model == 6 && c->x86_mask == 1) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model == 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if (mbytes > 508) + mbytes = 508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF) == 0) { + unsigned long flags; + l = (1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + return; + } + + if ((c->x86_model == 8 && c->x86_mask > 7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if (mbytes > 4092) + mbytes = 4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000) == 0) { + unsigned long flags; + l = ((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + return; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + return; + } +} + +static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /* + * Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_cpu_cap(c, X86_FEATURE_XMM); + } + } + + /* + * It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + + set_cpu_cap(c, X86_FEATURE_K7); +} +#endif + +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) static int __cpuinit nearby_node(int apicid) { int i, node; @@ -41,7 +205,7 @@ static int __cpuinit nearby_node(int apicid) */ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned bits; bits = c->x86_coreid_bits; @@ -55,7 +219,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) int cpu = smp_processor_id(); int node; unsigned apicid = hard_smp_processor_id(); @@ -91,7 +255,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned bits, ecx; /* Multi core CPU? */ @@ -112,7 +276,6 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) } c->x86_coreid_bits = bits; - #endif } @@ -124,15 +287,21 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); +#else + /* Set MTRR capability flag if appropriate */ + if (c->x86 == 5) + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_cpu_cap(c, X86_FEATURE_K6_MTRR); +#endif } static void __cpuinit init_amd(struct cpuinfo_x86 *c) { - unsigned level; - #ifdef CONFIG_SMP - unsigned long value; + unsigned long long value; /* * Disable TLB flush filter by setting HWCR.FFDIS on K8 @@ -142,26 +311,55 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * Errata 122 for all steppings (F+ have it disabled by default) */ if (c->x86 == 0xf) { - rdmsrl(MSR_K8_HWCR, value); + rdmsrl(MSR_K7_HWCR, value); value |= 1 << 6; - wrmsrl(MSR_K8_HWCR, value); + wrmsrl(MSR_K7_HWCR, value); } #endif early_init_amd(c); - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + /* + * Bit 31 in normal CPUID used for nonstandard 3DNow ID; + * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway + */ clear_cpu_cap(c, 0*32+31); +#ifdef CONFIG_X86_64 /* On C+ stepping K8 rep microcode works well for copy/memset */ if (c->x86 == 0xf) { + u32 level; + level = cpuid_eax(1); if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + + switch (c->x86) { + case 4: + init_amd_k5(c); + break; + case 5: + init_amd_k6(c); + break; + case 6: /* An Athlon/Duron */ + init_amd_k7(c); + break; + } + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_cpu_cap(c, X86_FEATURE_MCE); +#endif /* Enable workaround for FXSAVE leak */ if (c->x86 >= 6) @@ -176,6 +374,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) break; } } + display_cacheinfo(c); /* Multi core CPU? */ @@ -184,6 +383,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) srat_detect_node(c); } +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + if (c->extended_cpuid_level >= 0x80000006) { if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) num_cache_leaves = 4; @@ -199,6 +402,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } +#ifdef CONFIG_X86_64 if (c->x86 == 0x10) { /* do this for boot cpu */ if (c == &boot_cpu_data) @@ -225,11 +429,42 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) set_memory_4k((unsigned long)__va(tseg), 1); } } +#endif +} + +#ifdef CONFIG_X86_32 +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + size = 64; + if (c->x86_model == 4 && + (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ + size = 256; + } + return size; } +#endif static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, +#ifdef CONFIG_X86_32 + .c_models = { + { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + { + [3] = "486 DX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB", + [14] = "Am5x86-WT", + [15] = "Am5x86-WB" + } + }, + }, + .c_size_cache = amd_size_cache, +#endif .c_early_init = early_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, -- cgit v1.2.3 From ff73152ced60871f7d5fb7dee52fa499902a3c6d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:56 -0700 Subject: x86: make 64 bit to use amd.c arch/x86/kernel/cpu/amd.c is now 100% identical to arch/x86/kernel/cpu/amd_64.c, so use amd.c on 64-bit too and fix up the namespace impact. Simplify the Kconfig glue as well. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/amd_64.c | 473 ------------------------------------------- 2 files changed, 1 insertion(+), 475 deletions(-) delete mode 100644 arch/x86/kernel/cpu/amd_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d031f248dfc..510d1bcb058 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -10,8 +10,7 @@ obj-$(CONFIG_X86_64) += bugs_64.o obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o -obj-$(CONFIG_CPU_SUP_AMD_32) += amd.o -obj-$(CONFIG_CPU_SUP_AMD_64) += amd_64.o +obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c deleted file mode 100644 index 32e73520adf..00000000000 --- a/arch/x86/kernel/cpu/amd_64.c +++ /dev/null @@ -1,473 +0,0 @@ -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_X86_64 -# include -# include -# include -#endif - -#include - -#include "cpu.h" - -#ifdef CONFIG_X86_32 -/* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) -{ -/* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model == 9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } -} - - -static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - - if (c->x86_model < 6) { - /* Based on AMD doc 20734R - June 2000 */ - if (c->x86_model == 0) { - clear_cpu_cap(c, X86_FEATURE_APIC); - set_cpu_cap(c, X86_FEATURE_PGE); - } - return; - } - - if (c->x86_model == 6 && c->x86_mask == 1) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model == 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if (mbytes > 508) - mbytes = 508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF) == 0) { - unsigned long flags; - l = (1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - return; - } - - if ((c->x86_model == 8 && c->x86_mask > 7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if (mbytes > 4092) - mbytes = 4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000) == 0) { - unsigned long flags; - l = ((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - return; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - return; - } -} - -static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /* - * Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_cpu_cap(c, X86_FEATURE_XMM); - } - } - - /* - * It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - - set_cpu_cap(c, X86_FEATURE_K7); -} -#endif - -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -static int __cpuinit nearby_node(int apicid) -{ - int i, node; - - for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; - if (node != NUMA_NO_NODE && node_online(node)) - return node; - } - return first_node(node_online_map); /* Shouldn't happen */ -} -#endif - -/* - * On a AMD dual core setup the lower bits of the APIC id distingush the cores. - * Assumes number of cores is a power of two. - */ -static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits; - - bits = c->x86_coreid_bits; - - /* Low order bits define the core id (index of core in socket) */ - c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->phys_proc_id = c->initial_apicid >> bits; -#endif -} - -static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - int cpu = smp_processor_id(); - int node; - unsigned apicid = hard_smp_processor_id(); - - node = c->phys_proc_id; - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - - int ht_nodeid = c->initial_apicid; - - if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; - /* Pick a nearby node */ - if (!node_online(node)) - node = nearby_node(apicid); - } - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_HT - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - -static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) -{ - early_init_amd_mc(c); - - /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ - if (c->x86_power & (1<<8)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSCALL32); -#else - /* Set MTRR capability flag if appropriate */ - if (c->x86 == 5) - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_cpu_cap(c, X86_FEATURE_K6_MTRR); -#endif -} - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned long long value; - - /* - * Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 0xf) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } -#endif - - early_init_amd(c); - - /* - * Bit 31 in normal CPUID used for nonstandard 3DNow ID; - * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway - */ - clear_cpu_cap(c, 0*32+31); - -#ifdef CONFIG_X86_64 - /* On C+ stepping K8 rep microcode works well for copy/memset */ - if (c->x86 == 0xf) { - u32 level; - - level = cpuid_eax(1); - if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); - } - if (c->x86 == 0x10 || c->x86 == 0x11) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - switch (c->x86) { - case 4: - init_amd_k5(c); - break; - case 5: - init_amd_k6(c); - break; - case 6: /* An Athlon/Duron */ - init_amd_k7(c); - break; - } - - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_cpu_cap(c, X86_FEATURE_MCE); -#endif - - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) - set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - - if (!c->x86_model_id[0]) { - switch (c->x86) { - case 0xf: - /* Should distinguish Models here, but this is only - a fallback anyways. */ - strcpy(c->x86_model_id, "Hammer"); - break; - } - } - - display_cacheinfo(c); - - /* Multi core CPU? */ - if (c->extended_cpuid_level >= 0x80000008) { - amd_detect_cmp(c); - srat_detect_node(c); - } - -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - - if (c->extended_cpuid_level >= 0x80000006) { - if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - } - - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_K8); - - if (cpu_has_xmm2) { - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 0x10) { - /* do this for boot cpu */ - if (c == &boot_cpu_data) - check_enable_amd_mmconf_dmi(); - - fam10h_check_enable_mmcfg(); - } - - if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - size = 64; - if (c->x86_model == 4 && - (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ - size = 256; - } - return size; -} -#endif - -static struct cpu_dev amd_cpu_dev __cpuinitdata = { - .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = - { - [3] = "486 DX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB", - [14] = "Am5x86-WT", - [15] = "Am5x86-WB" - } - }, - }, - .c_size_cache = amd_size_cache, -#endif - .c_early_init = early_init_amd, - .c_init = init_amd, - .c_x86_vendor = X86_VENDOR_AMD, -}; - -cpu_dev_register(amd_cpu_dev); -- cgit v1.2.3 From f69feff720497237ae9dd2f4604921bd3080c421 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:58 -0700 Subject: x86: little clean up of intel.c/intel_64.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 7 +++---- arch/x86/kernel/cpu/intel_64.c | 8 +++----- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 959417b8cd6..4a8ac9d4ff5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -76,7 +76,7 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -183,7 +183,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) * let's use the legacy cpuid vector 0x1 and 0x4 for topology * detection. */ - c->x86_max_cores = num_cpu_cores(c); + c->x86_max_cores = intel_num_cpu_cores(c); detect_ht(c); } @@ -210,9 +210,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) { + if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); - } if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index 14a2cd98d48..aef4f282631 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -71,17 +71,15 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); - } - - - if (cpu_has_bts) ds_init_intel(c); + } if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); detect_extended_topology(c); if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) -- cgit v1.2.3 From de9f521fb72dd091aa4989fe2e004ecf4785a850 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:11 +0900 Subject: x86: move pci-nommu's dma_mask check to common code The check to see if dev->dma_mask is NULL in pci-nommu is more appropriate for dma_alloc_coherent(). Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 73853d3fdca..0f51883cc6a 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,9 +80,6 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - if (hwdev->dma_mask == NULL) - return NULL; - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); gfp |= __GFP_ZERO; -- cgit v1.2.3 From 8a53ad675f86ee003482b557da944e070d3c4859 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:12 +0900 Subject: x86: fix nommu_alloc_coherent allocation with NULL device argument We need to use __GFP_DMA for NULL device argument (fallback_dev) with pci-nommu. It's a hack for ISA (and some old code) so we need to use GFP_DMA. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 0f51883cc6a..ada1c87cafc 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,7 +80,6 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); gfp |= __GFP_ZERO; dma_mask = hwdev->coherent_dma_mask; @@ -93,7 +92,7 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, node = dev_to_node(hwdev); #ifdef CONFIG_X86_64 - if (dma_mask <= DMA_32BIT_MASK) + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; #endif -- cgit v1.2.3 From 823e7e8c6ef12cd1943dc42fe7595ca74e8cc3d7 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 8 Sep 2008 18:10:13 +0900 Subject: x86: dma_alloc_coherent sets gfp flags properly Non real IOMMU implemenations (which doesn't do virtual mappings, e.g. swiotlb, pci-nommu, etc) need to use proper gfp flags and dma_mask to allocate pages in their own dma_alloc_coherent() (allocated page need to be suitable for device's coherent_dma_mask). This patch makes dma_alloc_coherent do this job so that IOMMUs don't need to take care of it any more. Real IOMMU implemenataions can simply ignore the gfp flags. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index ada1c87cafc..8e398b56f50 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -80,26 +80,11 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, int node; struct page *page; - gfp |= __GFP_ZERO; - - dma_mask = hwdev->coherent_dma_mask; - if (!dma_mask) - dma_mask = *(hwdev->dma_mask); + dma_mask = dma_alloc_coherent_mask(hwdev, gfp); - if (dma_mask < DMA_24BIT_MASK) - return NULL; + gfp |= __GFP_ZERO; node = dev_to_node(hwdev); - -#ifdef CONFIG_X86_64 - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; -#endif - - /* No alloc-free penalty for ISA devices */ - if (dma_mask == DMA_24BIT_MASK) - gfp |= GFP_DMA; - again: page = alloc_pages_node(node, gfp, get_order(size)); if (!page) -- cgit v1.2.3 From e545a6140b698b2494daf0b32107bdcc5e901390 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Sun, 7 Sep 2008 16:57:22 +0200 Subject: kernel/cpu.c: create a CPU_STARTING cpu_chain notifier Right now, there is no notifier that is called on a new cpu, before the new cpu begins processing interrupts/softirqs. Various kernel function would need that notification, e.g. kvm works around by calling smp_call_function_single(), rcu polls cpu_online_map. The patch adds a CPU_STARTING notification. It also adds a helper function that sends the message to all cpu_chain handlers. Tested on x86-64. All other archs are untested. Especially on sparc, I'm not sure if I got it right. Signed-off-by: Manfred Spraul Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b3f91..0b8261c3cac 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -257,6 +257,7 @@ static void __cpuinit smp_callin(void) end_local_APIC_setup(); map_cpu_to_logical_apicid(); + notify_cpu_starting(cpuid); /* * Get our bogomips. * -- cgit v1.2.3 From d6be118a97ce51ca84035270f91c2bccecbfac5f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 9 Sep 2008 09:56:08 -0400 Subject: x86: fix memmap=exactmap boot argument When using kdump modifying the e820 map is yielding strange results. For example starting with BIOS-provided physical RAM map: BIOS-e820: 0000000000000100 - 0000000000093400 (usable) BIOS-e820: 0000000000093400 - 00000000000a0000 (reserved) BIOS-e820: 0000000000100000 - 000000003fee0000 (usable) BIOS-e820: 000000003fee0000 - 000000003fef3000 (ACPI data) BIOS-e820: 000000003fef3000 - 000000003ff80000 (ACPI NVS) BIOS-e820: 000000003ff80000 - 0000000040000000 (reserved) BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved) BIOS-e820: 00000000fec00000 - 00000000fec10000 (reserved) BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) BIOS-e820: 00000000ff000000 - 0000000100000000 (reserved) and booting with args memmap=exactmap memmap=640K@0K memmap=5228K@16384K memmap=125188K@22252K memmap=76K#1047424K memmap=564K#1047500K resulted in: user-defined physical RAM map: user: 0000000000000000 - 0000000000093400 (usable) user: 0000000000093400 - 00000000000a0000 (reserved) user: 0000000000100000 - 000000003fee0000 (usable) user: 000000003fee0000 - 000000003fef3000 (ACPI data) user: 000000003fef3000 - 000000003ff80000 (ACPI NVS) user: 000000003ff80000 - 0000000040000000 (reserved) user: 00000000e0000000 - 00000000f0000000 (reserved) user: 00000000fec00000 - 00000000fec10000 (reserved) user: 00000000fee00000 - 00000000fee01000 (reserved) user: 00000000ff000000 - 0000000100000000 (reserved) But should have resulted in: user-defined physical RAM map: user: 0000000000000000 - 00000000000a0000 (usable) user: 0000000001000000 - 000000000151b000 (usable) user: 00000000015bb000 - 0000000008ffc000 (usable) user: 000000003fee0000 - 000000003ff80000 (ACPI data) This is happening because of an improper usage of strcmp() in the e820 parsing code. The strcmp() always returns !0 and never resets the value for e820.nr_map and returns an incorrect user-defined map. This patch fixes the problem. Signed-off-by: Prarit Bhargava Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9af89078f7b..66e48aa2dd1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1203,7 +1203,7 @@ static int __init parse_memmap_opt(char *p) if (!p) return -EINVAL; - if (!strcmp(p, "exactmap")) { + if (!strncmp(p, "exactmap", 8)) { #ifdef CONFIG_CRASH_DUMP /* * If we are doing a crash dump, we still need to know -- cgit v1.2.3 From 185f3b9da24c09c26b784591ed354fe57998a7b1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:35 -0700 Subject: x86: make intel.c have 64-bit support code prepare for unification. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 119 ++++++++++++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 36 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a66989586a8..365a008080c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -15,6 +15,11 @@ #include #include +#ifdef CONFIG_X86_64 +#include +#include +#endif + #include "cpu.h" #ifdef CONFIG_X86_LOCAL_APIC @@ -25,14 +30,20 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#else + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +#endif } +#ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 * @@ -73,6 +84,40 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) } + +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif +#endif + +static void __cpuinit srat_detect_node(void) +{ +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE || !node_online(node)) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +} + /* * find out the number of processor cores on the die */ @@ -91,20 +136,6 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -139,6 +170,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } +#ifdef CONFIG_X86_32 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); @@ -176,18 +208,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); - detect_extended_topology(c); - - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); - detect_ht(c); - } - - /* Work around errata */ Intel_errata_workarounds(c); #ifdef CONFIG_X86_INTEL_USERCOPY @@ -206,14 +226,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) movsl_mask.mask = 7; break; } +#endif + #endif if (cpu_has_xmm2) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); if (cpu_has_ds) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); @@ -224,6 +242,17 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } +#ifdef CONFIG_X86_64 + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else + if (c->x86 == 15) + set_cpu_cap(c, X86_FEATURE_P4); + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_P3); + if (cpu_has_bts) ptrace_bts_init_intel(c); @@ -240,8 +269,25 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif +#endif + + detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + + /* Work around errata */ + srat_detect_node(); } +#ifdef CONFIG_X86_32 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* @@ -254,10 +300,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i size = 256; return size; } +#endif static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, +#ifdef CONFIG_X86_32 .c_models = { { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = { @@ -307,13 +355,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { } }, }, + .c_size_cache = intel_size_cache, +#endif .c_early_init = early_init_intel, .c_init = init_intel, - .c_size_cache = intel_size_cache, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); -/* arch_initcall(intel_cpu_init); */ - -- cgit v1.2.3 From 58602c1681bdfa1a0deaa5574b8a72d6e30c0e97 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:36 -0700 Subject: x86: make intel_64.c the same as intel.c No change in functionality intended - this only adds the 32-bit side. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_64.c | 301 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 284 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index aef4f282631..365a008080c 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c @@ -1,42 +1,108 @@ #include +#include + +#include +#include #include +#include +#include + #include +#include +#include +#include #include +#include +#include + +#ifdef CONFIG_X86_64 #include #include +#endif #include "cpu.h" +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif + static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) + (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#else + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +#endif } +#ifdef CONFIG_X86_32 /* - * find out the number of processor cores on the die + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, t; - if (c->cpuid_level < 4) +int __cpuinit ppro_with_ram_bug(void) +{ + /* Uses data from early_cpu_detect now */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask < 8) { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; + } + return 0; +} - cpuid_count(4, 0, &eax, &t, &t, &t); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; +/* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +{ + unsigned long lo, hi; + + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); + if ((lo & (1<<9)) == 0) { + printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); + printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); + lo |= (1<<9); /* Disable hw prefetching */ + wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + } + } +} + + + +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); } +#endif +#endif static void __cpuinit srat_detect_node(void) { -#ifdef CONFIG_NUMA +#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); int apicid = hard_smp_processor_id(); @@ -52,11 +118,51 @@ static void __cpuinit srat_detect_node(void) #endif } +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + if (c->cpuid_level < 4) + return 1; + + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { + unsigned int l2 = 0; + char *p = NULL; + early_init_intel(c); - init_intel_cacheinfo(c); +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -64,8 +170,70 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } +#ifdef CONFIG_X86_32 + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* + * Names for the Pentium II/Celeron processors + * detectable only by also checking the cache size. + * Dixon is NOT a Celeron. + */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (c->x86_mask == 0) { + if (l2 == 0) + p = "Celeron (Covington)"; + else if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + } + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + else if (c->x86_mask == 0 || c->x86_mask == 5) + p = "Celeron-A"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if (p) + strcpy(c->x86_model_id, p); + + Intel_errata_workarounds(c); + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif + +#endif + + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (cpu_has_ds) { - unsigned int l1, l2; + unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) set_cpu_cap(c, X86_FEATURE_BTS); @@ -74,26 +242,125 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } +#ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#else + if (c->x86 == 15) + set_cpu_cap(c, X86_FEATURE_P4); + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_P3); + + if (cpu_has_bts) + ptrace_bts_init_intel(c); + + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); + +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); +#endif +#endif detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + /* Work around errata */ srat_detect_node(); } +#ifdef CONFIG_X86_32 +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + /* + * Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + size = 256; + return size; +} +#endif + static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, +#ifdef CONFIG_X86_32 + .c_models = { + { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + { + [0] = "486 DX-25/33", + [1] = "486 DX-50", + [2] = "486 SX", + [3] = "486 DX/2", + [4] = "486 SL", + [5] = "486 SX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = + { + [0] = "Pentium 60/66 A-step", + [1] = "Pentium 60/66", + [2] = "Pentium 75 - 200", + [3] = "OverDrive PODP5V83", + [4] = "Pentium MMX", + [7] = "Mobile Pentium 75 - 200", + [8] = "Mobile Pentium MMX" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = + { + [0] = "Pentium Pro A-step", + [1] = "Pentium Pro", + [3] = "Pentium II (Klamath)", + [4] = "Pentium II (Deschutes)", + [5] = "Pentium II (Deschutes)", + [6] = "Mobile Pentium II", + [7] = "Pentium III (Katmai)", + [8] = "Pentium III (Coppermine)", + [10] = "Pentium III (Cascades)", + [11] = "Pentium III (Tualatin)", + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = + { + [0] = "Pentium 4 (Unknown)", + [1] = "Pentium 4 (Willamette)", + [2] = "Pentium 4 (Northwood)", + [4] = "Pentium 4 (Foster)", + [5] = "Pentium 4 (Foster)", + } + }, + }, + .c_size_cache = intel_size_cache, +#endif .c_early_init = early_init_intel, .c_init = init_intel, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); + -- cgit v1.2.3 From 879d792b66d633bbe466974f61d1acc9aa8c78eb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:37 -0700 Subject: x86: let intel 64-bit use intel.c now that arch/x86/kernel/cpu/intel_64.c and arch/x86/kernel/cpu/intel.c are equal, drop arch/x86/kernel/cpu/intel_64.c and fix up the glue. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/intel_64.c | 366 ----------------------------------------- 2 files changed, 1 insertion(+), 368 deletions(-) delete mode 100644 arch/x86/kernel/cpu/intel_64.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 510d1bcb058..7f0b45a5d78 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,8 +8,7 @@ obj-y += proc.o capflags.o powerflags.o common.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o -obj-$(CONFIG_CPU_SUP_INTEL_32) += intel.o -obj-$(CONFIG_CPU_SUP_INTEL_64) += intel_64.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c deleted file mode 100644 index 365a008080c..00000000000 --- a/arch/x86/kernel/cpu/intel_64.c +++ /dev/null @@ -1,366 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_X86_64 -#include -#include -#endif - -#include "cpu.h" - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#endif - -static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) -{ - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); - -#ifdef CONFIG_X86_64 - set_cpu_cap(c, X86_FEATURE_SYSENTER32); -#else - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; -#endif -} - -#ifdef CONFIG_X86_32 -/* - * Early probe support logic for ppro memory erratum #50 - * - * This is called before we do cpu ident work - */ - -int __cpuinit ppro_with_ram_bug(void) -{ - /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } - return 0; -} - - -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ - wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); - } - } -} - - - -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif -#endif - -static void __cpuinit srat_detect_node(void) -{ -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) - unsigned node; - int cpu = smp_processor_id(); - int apicid = hard_smp_processor_id(); - - /* Don't do the funky fallback heuristics the AMD version employs - for now. */ - node = apicid_to_node[apicid]; - if (node == NUMA_NO_NODE || !node_online(node)) - node = first_node(node_online_map); - numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); -#endif -} - -/* - * find out the number of processor cores on the die - */ -static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - unsigned int l2 = 0; - char *p = NULL; - - early_init_intel(c); - -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - l2 = init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); - } - -#ifdef CONFIG_X86_32 - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); - - /* - * Names for the Pentium II/Celeron processors - * detectable only by also checking the cache size. - * Dixon is NOT a Celeron. - */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (c->x86_mask == 0) { - if (l2 == 0) - p = "Celeron (Covington)"; - else if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - } - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) - p = "Celeron-A"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if (p) - strcpy(c->x86_model_id, p); - - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - -#endif - - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else - if (c->x86 == 15) - set_cpu_cap(c, X86_FEATURE_P4); - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_P3); - - if (cpu_has_bts) - ptrace_bts_init_intel(c); - - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif -#endif - - detect_extended_topology(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - - /* Work around errata */ - srat_detect_node(); -} - -#ifdef CONFIG_X86_32 -static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) -{ - /* - * Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) - size = 256; - return size; -} -#endif - -static struct cpu_dev intel_cpu_dev __cpuinitdata = { - .c_vendor = "Intel", - .c_ident = { "GenuineIntel" }, -#ifdef CONFIG_X86_32 - .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = - { - [0] = "486 DX-25/33", - [1] = "486 DX-50", - [2] = "486 SX", - [3] = "486 DX/2", - [4] = "486 SL", - [5] = "486 SX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = - { - [0] = "Pentium 60/66 A-step", - [1] = "Pentium 60/66", - [2] = "Pentium 75 - 200", - [3] = "OverDrive PODP5V83", - [4] = "Pentium MMX", - [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = - { - [0] = "Pentium Pro A-step", - [1] = "Pentium Pro", - [3] = "Pentium II (Klamath)", - [4] = "Pentium II (Deschutes)", - [5] = "Pentium II (Deschutes)", - [6] = "Mobile Pentium II", - [7] = "Pentium III (Katmai)", - [8] = "Pentium III (Coppermine)", - [10] = "Pentium III (Cascades)", - [11] = "Pentium III (Tualatin)", - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = - { - [0] = "Pentium 4 (Unknown)", - [1] = "Pentium 4 (Willamette)", - [2] = "Pentium 4 (Northwood)", - [4] = "Pentium 4 (Foster)", - [5] = "Pentium 4 (Foster)", - } - }, - }, - .c_size_cache = intel_size_cache, -#endif - .c_early_init = early_init_intel, - .c_init = init_intel, - .c_x86_vendor = X86_VENDOR_INTEL, -}; - -cpu_dev_register(intel_cpu_dev); - -- cgit v1.2.3 From 4052704d92c3172ebc13cc0cc8df8ba2bd446a5c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:38 -0700 Subject: x86: intel.c put workaround for old cpus together consolidate the code some more. No change in functionality intended. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 192 ++++++++++++++++++++++---------------------- 1 file changed, 98 insertions(+), 94 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 365a008080c..5f76bf139fd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -63,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void) return 0; } +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif + +static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled; + + c->f00f_bug = 1; + if (!f00f_workaround_enabled) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + /* + * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until + * model 3 mask 3 + */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_cpu_cap(c, X86_FEATURE_SEP); + + /* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); if ((lo & (1<<9)) == 0) { @@ -81,23 +120,44 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); } } -} - + /* + * See if we have a good local APIC by checking for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ + if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && + (c->x86_mask < 0x6 || c->x86_mask == 0xb)) + set_cpu_cap(c, X86_FEATURE_11AP); -#ifdef CONFIG_X86_F00F_BUG -static void __cpuinit trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); +#ifdef CONFIG_X86_INTEL_USERCOPY /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. + * Set up the preferred alignment for movsl bulk memory moves */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } #endif + +#ifdef CONFIG_X86_NUMAQ + numaq_tsc_disable(); +#endif +} +#else +static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) +{ +} #endif static void __cpuinit srat_detect_node(void) @@ -139,28 +199,10 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; - char *p = NULL; early_init_intel(c); -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled; - - c->f00f_bug = 1; - if (!f00f_workaround_enabled) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif + intel_workarounds(c); l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { @@ -170,17 +212,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } -#ifdef CONFIG_X86_32 - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_cpu_cap(c, X86_FEATURE_SEP); + if (cpu_has_xmm2) + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_cpu_cap(c, X86_FEATURE_BTS); + if (!(l1 & (1<<12))) + set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); + } +#ifdef CONFIG_X86_64 + if (c->x86 == 15) + c->x86_cache_alignment = c->x86_clflush_size * 2; + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); +#else /* * Names for the Pentium II/Celeron processors * detectable only by also checking the cache size. * Dixon is NOT a Celeron. */ if (c->x86 == 6) { + char *p = NULL; + switch (c->x86_model) { case 5: if (c->x86_mask == 0) { @@ -203,51 +260,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) p = "Celeron (Coppermine)"; break; } - } - if (p) - strcpy(c->x86_model_id, p); - - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; + if (p) + strcpy(c->x86_model_id, p); } -#endif - -#endif - if (cpu_has_xmm2) - set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_cpu_cap(c, X86_FEATURE_BTS); - if (!(l1 & (1<<12))) - set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); - } - -#ifdef CONFIG_X86_64 - if (c->x86 == 15) - c->x86_cache_alignment = c->x86_clflush_size * 2; - if (c->x86 == 6) - set_cpu_cap(c, X86_FEATURE_REP_GOOD); -#else if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); if (c->x86 == 6) @@ -256,19 +273,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_bts) ptrace_bts_init_intel(c); - /* - * See if we have a good local APIC by checking for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ - if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && - (c->x86_mask < 0x6 || c->x86_mask == 0xb)) - set_cpu_cap(c, X86_FEATURE_11AP); - -#ifdef CONFIG_X86_NUMAQ - numaq_tsc_disable(); -#endif #endif detect_extended_topology(c); -- cgit v1.2.3 From ec70cae8698632917f06110fdb70c6364281ecc6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 9 Sep 2008 16:40:39 -0700 Subject: x86: centaur_64.c remove duplicated setting of CONSTANT_TSC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/centaur_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 0e5cf17a3c8..a1625f5a1e7 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c @@ -20,7 +20,6 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) if (c->x86 == 0x6 && c->x86_model >= 0xf) { c->x86_cache_alignment = c->x86_clflush_size * 2; - set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_REP_GOOD); } set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); -- cgit v1.2.3 From ac4ff656c07ada78316307b0c0ce8a8eb48aa6dd Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:47 +0900 Subject: x86: convert gart to use is_buffer_dma_capable helper function Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0b99d4a06f7..1b0c412566e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -214,24 +214,14 @@ static void iommu_full(struct device *dev, size_t size, int dir) static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) { - u64 mask = *dev->dma_mask; - int high = addr + size > mask; - int mmu = high; - - if (force_iommu) - mmu = 1; - - return mmu; + return force_iommu || + !is_buffer_dma_capable(*dev->dma_mask, addr, size); } static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) { - u64 mask = *dev->dma_mask; - int high = addr + size > mask; - int mmu = high; - - return mmu; + return !is_buffer_dma_capable(*dev->dma_mask, addr, size); } /* Map a single continuous physical area into the IOMMU. -- cgit v1.2.3 From 49fbf4e9f982c704dc365698c5b5efa780aadcb5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 01:06:48 +0900 Subject: x86: convert pci-nommu to use is_buffer_dma_capable helper function Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-nommu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 8e398b56f50..1c1c98a31d5 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -14,7 +14,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { - if (hwdev && bus + size > *hwdev->dma_mask) { + if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { if (*hwdev->dma_mask >= DMA_32BIT_MASK) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", @@ -79,6 +79,7 @@ nommu_alloc_coherent(struct device *hwdev, size_t size, unsigned long dma_mask; int node; struct page *page; + dma_addr_t addr; dma_mask = dma_alloc_coherent_mask(hwdev, gfp); @@ -90,14 +91,15 @@ again: if (!page) return NULL; - if ((page_to_phys(page) + size > dma_mask) && !(gfp & GFP_DMA)) { + addr = page_to_phys(page); + if (!is_buffer_dma_capable(dma_mask, addr, size) && !(gfp & GFP_DMA)) { free_pages((unsigned long)page_address(page), get_order(size)); gfp |= GFP_DMA; goto again; } - *dma_addr = page_to_phys(page); - if (check_addr("alloc_coherent", hwdev, *dma_addr, size)) { + if (check_addr("alloc_coherent", hwdev, addr, size)) { + *dma_addr = addr; flush_write_buffers(); return page_address(page); } -- cgit v1.2.3 From e38e05a85828dac23540cd007df5f20985388afc Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 10 Sep 2008 18:53:34 +0800 Subject: x86: extended "flags" to show virtualization HW feature in /proc/cpuinfo The hardware virtualization technology evolves very fast. But currently it's hard to tell if your CPU support a certain kind of HW technology without digging into the source code. The patch add a new catagory in "flags" under /proc/cpuinfo. Now "flags" can indicate the (important) HW virtulization features the CPU supported as well. Current implementation just cover Intel VMX side. Signed-off-by: Sheng Yang Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5f76bf139fd..99468dbd08d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -196,6 +196,44 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + /* Intel VMX MSR indicated features */ +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + clear_cpu_cap(c, X86_FEATURE_VNMI); + clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + clear_cpu_cap(c, X86_FEATURE_EPT); + clear_cpu_cap(c, X86_FEATURE_VPID); + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; @@ -289,6 +327,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) /* Work around errata */ srat_detect_node(); + + if (cpu_has(c, X86_FEATURE_VMX)) + detect_vmx_virtcap(c); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From f461a1d80c865e5ec4d24107adbab8b010b60e32 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 10 Sep 2008 13:57:43 +0200 Subject: arch/x86/kernel/kdebugfs.c: introduce missing kfree Error handling code following a kmalloc should free the allocated data. Note that at the point of the change, node has not yet been stored in d, so it is not affected by the existing cleanup code. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Ingo Molnar --- arch/x86/kernel/kdebugfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index f2d43bc7551..ff7d3b0124f 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -139,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) if (PageHighMem(pg)) { data = ioremap_cache(pa_data, sizeof(*data)); if (!data) { + kfree(node); error = -ENXIO; goto err_dir; } -- cgit v1.2.3 From 0ad5bce7409d681a5655c66e64bb0eb740b89c1f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Sep 2008 16:42:00 -0700 Subject: x86: fix possible x86_64 and EFI regression Russ Anderson reported a boot crash with EFI and latest mainline: BIOS-e820: 00000000fffa0000 - 00000000fffac000 (reserved) Pid: 0, comm: swapper Not tainted 2.6.27-rc5-00100-gec0c15a-dirty #5 Call Trace: [] early_idt_handler+0x55/0x69 [] __memcpy+0x12/0xa4 [] efi_init+0xce/0x932 [] setup_early_serial8250_console+0x2d/0x36a [] __insert_resource+0x18/0xc8 [] setup_arch+0x3a7/0x632 [] start_kernel+0x91/0x367 [] x86_64_start_kernel+0xe3/0xe7 [] x86_64_start_kernel+0x0/0xe7 RIP 0x10 Such a crash is possible if the CPU in this system is a 64-bit processor which doesn't support NX (ie, old Intel P4 -based64-bit processors). Certainly, if we support such processors, then we should start with _PAGE_NX initially clear in __supported_pte_flags, and then set it once we've established that the processor does indeed support NX. That will prevent early_ioremap - or anything else - from trying to set it. The simple fix is to simply call check_efer() earlier. Reported-by: Russ Anderson Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 362d4e7f2d3..9838f2539df 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -670,6 +670,10 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#ifdef CONFIG_X86_64 + check_efer(); +#endif + #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) /* * Must be before kernel pagetables are setup @@ -738,7 +742,6 @@ void __init setup_arch(char **cmdline_p) #else num_physpages = max_pfn; - check_efer(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ -- cgit v1.2.3 From aef93c8bd506a78983d91cd576a97fee6e934ac1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:15 -0700 Subject: x86: identify_cpu_without_cpuid v2 Krzysztof found some old cyrix cpu where an mtrr-alike cpu feature was not detected properly. this one is based on Krzysztof' patch, and we call ->c_identify() in early_identify_cpu. need to call c_identify() for cpus without cpuid even earlier ... v2: Krzysztof point out need to give cyrix another chance about cpuid checking again, after ->c_identify() enables cpuid for it Signed-off-by: Yinghai Lu Cc: Krzysztof Helt Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9a57106c199..f5f25b85be9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -485,6 +485,33 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_power = cpuid_edx(0x80000007); } + +static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_32 + int i; + + /* + * First of all, decide if this is a 486 or higher + * It's a 486 if we can modify the AC flag + */ + if (flag_is_changeable_p(X86_EFLAGS_AC)) + c->x86 = 4; + else + c->x86 = 3; + + for (i = 0; i < X86_VENDOR_NUM; i++) + if (cpu_devs[i] && cpu_devs[i]->c_identify) { + c->x86_vendor_id[0] = 0; + cpu_devs[i]->c_identify(c); + if (c->x86_vendor_id[0]) { + get_cpu_vendor(c); + break; + } + } +#endif +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -503,13 +530,16 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; - if (!have_cpuid_p()) - return; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - c->extended_cpuid_level = 0; + if (!have_cpuid_p()) + identify_cpu_without_cpuid(c); + + /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid()) + return; + cpu_detect(c); get_cpu_vendor(c); @@ -583,10 +613,14 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) static void __cpuinit generic_identify(struct cpuinfo_x86 *c) { + c->extended_cpuid_level = 0; + if (!have_cpuid_p()) - return; + identify_cpu_without_cpuid(c); - c->extended_cpuid_level = 0; + /* cyrix could have cpuid enabled via c_identify()*/ + if (!have_cpuid()) + return; cpu_detect(c); @@ -639,17 +673,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof c->x86_capability); - if (!have_cpuid_p()) { - /* - * First of all, decide if this is a 486 or higher - * It's a 486 if we can modify the AC flag - */ - if (flag_is_changeable_p(X86_EFLAGS_AC)) - c->x86 = 4; - else - c->x86 = 3; - } - generic_identify(c); if (this_cpu->c_identify) -- cgit v1.2.3 From afae865613c8292e8bdcce4f0b161988d761f033 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 14 Sep 2008 02:33:16 -0700 Subject: x86: move transmeta cap read to early_init_transmeta() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 -------- arch/x86/kernel/cpu/transmeta.c | 28 +++++++++++++++------------- 2 files changed, 15 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f5f25b85be9..3e2ce4d33d3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -465,14 +465,6 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_64 - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - /* Don't set x86_cpuid_level here for now to not confuse. */ - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } - if (c->extended_cpuid_level >= 0x80000008) { u32 eax = cpuid_eax(0x80000008); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 738e03244f9..52b3fefbd5a 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -5,6 +5,18 @@ #include #include "cpu.h" +static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c) +{ + u32 xlvl; + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ((xlvl & 0xffff0000) == 0x80860000) { + if (xlvl >= 0x80860001) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) { unsigned int cap_mask, uk, max, dummy; @@ -12,6 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; char cpu_info[65]; + early_init_transmeta(c); + display_cacheinfo(c); /* Print CMS and CPU revision */ @@ -84,23 +98,11 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) #endif } -static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c) -{ - u32 xlvl; - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ((xlvl & 0xffff0000) == 0x80860000) { - if (xlvl >= 0x80860001) - c->x86_capability[2] = cpuid_edx(0x80860001); - } -} - static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, + .c_early_init = early_init_transmeta, .c_init = init_transmeta, - .c_identify = transmeta_identify, .c_x86_vendor = X86_VENDOR_TRANSMETA, }; -- cgit v1.2.3 From a9853dd6d285c30a3ddeb3cce8c05e1678400bef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:46:58 +0200 Subject: x86: cpuid, fix typo Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3e2ce4d33d3..cef3519b3bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -529,7 +529,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid()) + if (!have_cpuid_p()) return; cpu_detect(c); @@ -611,7 +611,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) identify_cpu_without_cpuid(c); /* cyrix could have cpuid enabled via c_identify()*/ - if (!have_cpuid()) + if (!have_cpuid_p()) return; cpu_detect(c); -- cgit v1.2.3 From bee44f294efd8417f5e68553778a6cc957af1547 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 12 Sep 2008 19:42:35 +0900 Subject: x86: make GART to respect device's dma_mask about virtual mappings Currently, GART IOMMU ingores device's dma_mask when it does virtual mappings. So it could give a device a virtual address that the device can't access to. This patch fixes the above problem. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1b0c412566e..9972c42ac92 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -83,23 +83,34 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask) + unsigned long align_mask, u64 dma_mask) { unsigned long offset, flags; unsigned long boundary_size; unsigned long base_index; + unsigned long limit; base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; + limit = iommu_device_max_index(iommu_pages, + DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), + dma_mask >> PAGE_SHIFT); + spin_lock_irqsave(&iommu_bitmap_lock, flags); - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, + + if (limit <= next_bit) { + need_flush = 1; + next_bit = 0; + } + + offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, size, base_index, boundary_size, align_mask); - if (offset == -1) { + if (offset == -1 && next_bit) { need_flush = 1; - offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, + offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, size, base_index, boundary_size, align_mask); } @@ -228,12 +239,14 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask) + size_t size, int dir, unsigned long align_mask, + u64 dma_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); + unsigned long iommu_page; int i; + iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -263,7 +276,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = dma_map_area(dev, paddr, size, dir, 0); + bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); flush_gart(); return bus; @@ -314,6 +327,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, { struct scatterlist *s; int i; + u64 dma_mask = dma_get_mask(dev); #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); @@ -323,7 +337,8 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0); + addr = dma_map_area(dev, addr, s->length, dir, 0, + dma_mask); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -345,14 +360,16 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages, 0); - unsigned long iommu_page = iommu_start; + unsigned long iommu_start; + unsigned long iommu_page; struct scatterlist *s; int i; + iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); if (iommu_start == -1) return -1; + iommu_page = iommu_start; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; @@ -497,7 +514,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, - align_mask); + align_mask, dma_mask); flush_gart(); if (*dma_addr != bad_dma_address) -- cgit v1.2.3 From f10ac8a232496bf9271cfc67c6eea432891f04a6 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 11 Sep 2008 23:08:47 +0900 Subject: x86: avoid unnecessary low zone allocation in Calgary's alloc_coherent x86's common alloc_coherent (dma_alloc_coherent in dma-mapping.h) sets up the gfp flag according to the device dma_mask but Calgary doesn't need it because of virtual mappings. This patch avoids unnecessary low zone allocation. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-calgary_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 8415d92853c..fe7695e4caa 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -491,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, npages = size >> PAGE_SHIFT; order = get_order(size); + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + /* alloc enough pages (and possibly more) */ ret = (void *)__get_free_pages(flag, order); if (!ret) -- cgit v1.2.3 From f6a32a36ab96016675cd414802904feb288d7899 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 11 Sep 2008 23:08:48 +0900 Subject: x86: gart alloc_coherent does virtual mapppings only when necessary gart alloc_coherent need to do virtual mapppings only when an allocated buffer is not DMA-capable for a device. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9972c42ac92..9739d568209 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -505,15 +505,23 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { void *vaddr; + dma_addr_t paddr; unsigned long align_mask; + u64 dma_mask = dma_alloc_coherent_mask(dev, flag); vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); if (!vaddr) return NULL; + paddr = virt_to_phys(vaddr); + if (is_buffer_dma_capable(dma_mask, paddr, size)) { + *dma_addr = paddr; + return vaddr; + } + align_mask = (1UL << get_order(size)) - 1; - *dma_addr = dma_map_area(dev, __pa(vaddr), size, DMA_BIDIRECTIONAL, + *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, align_mask, dma_mask); flush_gart(); -- cgit v1.2.3 From ba0593bf553c450a03dbc5f8c1f0ff58b778a0c8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 16 Sep 2008 09:29:40 -0700 Subject: x86: completely disable NOPL on 32 bits Completely disable NOPL on 32 bits. It turns out that Microsoft Virtual PC is so broken it can't even reliably *fail* in the presence of NOPL. This leaves the infrastructure in place but disables it unconditionally. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8aab8517642..4e456bd955b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -344,31 +344,15 @@ static void __init early_cpu_detect(void) /* * The NOPL instruction is supposed to exist on all CPUs with - * family >= 6, unfortunately, that's not true in practice because + * family >= 6; unfortunately, that's not true in practice because * of early VIA chips and (more importantly) broken virtualizers that - * are not easy to detect. Hence, probe for it based on first - * principles. + * are not easy to detect. In the latter case it doesn't even *fail* + * reliably, so probing for it doesn't even work. Disable it completely + * unless we can find a reliable way to detect all the broken cases. */ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) { - const u32 nopl_signature = 0x888c53b1; /* Random number */ - u32 has_nopl = nopl_signature; - clear_cpu_cap(c, X86_FEATURE_NOPL); - if (c->x86 >= 6) { - asm volatile("\n" - "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ - "2:\n" - " .section .fixup,\"ax\"\n" - "3: xor %0,%0\n" - " jmp 2b\n" - " .previous\n" - _ASM_EXTABLE(1b,3b) - : "+a" (has_nopl)); - - if (has_nopl == nopl_signature) - set_cpu_cap(c, X86_FEATURE_NOPL); - } } static void __cpuinit generic_identify(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 90f7d25c6b672137344f447a30a9159945ffea72 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 16 Sep 2008 11:27:30 -0700 Subject: x86: print DMI information in the oops trace in order to diagnose hard system specific issues, it's useful to have the system name in the oops (as provided by DMI) Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb0..7b9ee9f0963 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -160,6 +161,7 @@ void __show_registers(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned long sp; unsigned short ss, gs; + const char *board; if (user_mode_vm(regs)) { sp = regs->sp; @@ -172,11 +174,15 @@ void __show_registers(struct pt_regs *regs, int all) } printk("\n"); - printk("Pid: %d, comm: %s %s (%s %.*s)\n", + + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, -- cgit v1.2.3 From ee2fa7435b6dddf1ca119f298ad0100cf50c0397 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 13:47:25 +0200 Subject: AMD IOMMU: set iommu sunc flag after command queuing The iommu->need_sync flag must be set after the command is queued to avoid race conditions. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 69b4d060b21..a96d8c049a8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -140,6 +140,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) { struct iommu_cmd cmd; + int ret; BUG_ON(iommu == NULL); @@ -147,9 +148,11 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); cmd.data[0] = devid; + ret = iommu_queue_command(iommu, &cmd); + iommu->need_sync = 1; - return iommu_queue_command(iommu, &cmd); + return ret; } /* @@ -159,6 +162,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, u64 address, u16 domid, int pde, int s) { struct iommu_cmd cmd; + int ret; memset(&cmd, 0, sizeof(cmd)); address &= PAGE_MASK; @@ -171,9 +175,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + ret = iommu_queue_command(iommu, &cmd); + iommu->need_sync = 1; - return iommu_queue_command(iommu, &cmd); + return ret; } /* -- cgit v1.2.3 From 7e4f88da7bf1887563f70bd5edbbd0479e31dc12 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 14:19:15 +0200 Subject: AMD IOMMU: protect completion wait loop with iommu lock The unlocked polling of the ComWaitInt bit in the IOMMU completion wait path is racy. Protect it with the iommu lock. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a96d8c049a8..042fdc27bc9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -101,10 +101,10 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret, ready = 0; + int ret = 0, ready = 0; unsigned status = 0; struct iommu_cmd cmd; - unsigned long i = 0; + unsigned long flags, i = 0; memset(&cmd, 0, sizeof(cmd)); cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; @@ -112,10 +112,12 @@ static int iommu_completion_wait(struct amd_iommu *iommu) iommu->need_sync = 0; - ret = iommu_queue_command(iommu, &cmd); + spin_lock_irqsave(&iommu->lock, flags); + + ret = __iommu_queue_command(iommu, &cmd); if (ret) - return ret; + goto out; while (!ready && (i < EXIT_LOOP_COUNT)) { ++i; @@ -130,6 +132,8 @@ static int iommu_completion_wait(struct amd_iommu *iommu) if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); +out: + spin_unlock_irqrestore(&iommu->lock, flags); return 0; } -- cgit v1.2.3 From 279b0bbba2bb647348ad90e183b3960aa99eccfd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 18 Sep 2008 23:55:27 -0700 Subject: x86: fix arch/x86/kernel/cpu/mtrr/main.c warning fix this warning reported by Andrew Morton: > arch/x86/kernel/cpu/mtrr/main.c: In function 'mtrr_bp_init': > arch/x86/kernel/cpu/mtrr/main.c:1170: warning: 'extra_remove_base' may be used uninitialized in this function the warning is bogus but the logic that prevents uninitialized use is a bit convoluted so simplify it all. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a56..8a7c79234be 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1218,11 +1218,10 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(range, 0, sizeof(range)); extra_remove_size = 0; - if (mtrr_tom2) { - extra_remove_base = 1 << (32 - PAGE_SHIFT); + extra_remove_base = 1 << (32 - PAGE_SHIFT); + if (mtrr_tom2) extra_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; - } nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); range_sums = sum_ranges(range, nr_range); -- cgit v1.2.3 From dbcc112e3b5367e81a845b082933506b0ff1d1e2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 15:04:26 +0200 Subject: AMD IOMMU: check for invalid device pointers Currently AMD IOMMU code triggers a BUG_ON if NULL is passed as the device. This is inconsistent with other IOMMU implementations. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 01c68c38840..695e0fc41b1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -645,6 +645,18 @@ static void set_device_domain(struct amd_iommu *iommu, * *****************************************************************************/ +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + if (!dev || !dev->dma_mask) + return false; + + return true; +} + /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -661,18 +673,19 @@ static int get_device_resources(struct device *dev, struct pci_dev *pcidev; u16 _bdf; - BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); + *iommu = NULL; + *domain = NULL; + *bdf = 0xffff; + + if (dev->bus != &pci_bus_type) + return 0; pcidev = to_pci_dev(dev); _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); /* device not translated by any IOMMU in the system? */ - if (_bdf > amd_iommu_last_bdf) { - *iommu = NULL; - *domain = NULL; - *bdf = 0xffff; + if (_bdf > amd_iommu_last_bdf) return 0; - } *bdf = amd_iommu_alias_table[_bdf]; @@ -826,6 +839,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, u16 devid; dma_addr_t addr; + if (!check_device(dev)) + return bad_dma_address; + get_device_resources(dev, &iommu, &domain, &devid); if (iommu == NULL || domain == NULL) @@ -860,7 +876,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, struct protection_domain *domain; u16 devid; - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!check_device(dev) || + !get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ return; @@ -910,6 +927,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, phys_addr_t paddr; int mapped_elems = 0; + if (!check_device(dev)) + return 0; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) @@ -967,7 +987,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, u16 devid; int i; - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!check_device(dev) || + !get_device_resources(dev, &iommu, &domain, &devid)) return; spin_lock_irqsave(&domain->lock, flags); @@ -999,6 +1020,9 @@ static void *alloc_coherent(struct device *dev, size_t size, u16 devid; phys_addr_t paddr; + if (!check_device(dev)) + return NULL; + virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; @@ -1047,6 +1071,9 @@ static void free_coherent(struct device *dev, size_t size, struct protection_domain *domain; u16 devid; + if (!check_device(dev)) + return; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) -- cgit v1.2.3 From 270cab2426cdc6307725e4f1f46ecf8ab8e69193 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 15:49:46 +0200 Subject: AMD IOMMU: move TLB flushing to the map/unmap helper functions This patch moves the invocation of the flushing functions to the map/unmap helpers because its common code in all dma_ops relevant mapping/unmapping code. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 695e0fc41b1..691e023695a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -795,6 +795,9 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; + if (unlikely(iommu_has_npcache(iommu))) + iommu_flush_pages(iommu, dma_dom->domain.id, address, size); + out: return address; } @@ -825,6 +828,8 @@ static void __unmap_single(struct amd_iommu *iommu, } dma_ops_free_addresses(dma_dom, dma_addr, pages); + + iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } /* @@ -853,9 +858,6 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -885,8 +887,6 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - iommu_flush_pages(iommu, domain->id, dma_addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -948,9 +948,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, mapped_elems++; } else goto unmap; - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, s->dma_address, - s->dma_length); } if (iommu->need_sync) @@ -996,8 +993,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, for_each_sg(sglist, s, nelems, i) { __unmap_single(iommu, domain->priv, s->dma_address, s->dma_length, dir); - iommu_flush_pages(iommu, domain->id, s->dma_address, - s->dma_length); s->dma_address = s->dma_length = 0; } @@ -1048,9 +1043,6 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (iommu_has_npcache(iommu)) - iommu_flush_pages(iommu, domain->id, *dma_addr, size); - if (iommu->need_sync) iommu_completion_wait(iommu); @@ -1082,7 +1074,6 @@ static void free_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_flush_pages(iommu, domain->id, dma_addr, size); if (iommu->need_sync) iommu_completion_wait(iommu); -- cgit v1.2.3 From 2842e5bf3115193f05dc9dac20f940e7abf44c1a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:23:43 +0200 Subject: x86: move GART TLB flushing options to generic code The GART currently implements the iommu=[no]fullflush command line parameters which influence its IO/TLB flushing strategy. This patch makes these parameters generic so that they can be used by the AMD IOMMU too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 13 +++++++++++++ arch/x86/kernel/pci-gart_64.c | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a1408abcc6..d2f2c0158dc 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -16,6 +16,15 @@ EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush; + #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -171,6 +180,10 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9739d568209..508ef470b27 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,15 +45,6 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush = 1; - /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -901,10 +892,6 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) -- cgit v1.2.3 From 1c65577398589bb44ab0980f9b9d30804b48a5db Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 18:40:05 +0200 Subject: AMD IOMMU: implement lazy IO/TLB flushing The IO/TLB flushing on every unmaping operation is the most expensive part in AMD IOMMU code and not strictly necessary. It is sufficient to do the flush before any entries are reused. This is patch implements lazy IO/TLB flushing which does exactly this. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++++++++---- arch/x86/kernel/amd_iommu_init.c | 7 ++++++- 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 691e023695a..679f2a8e22e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -203,6 +203,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, return 0; } +/* Flush the whole IO/TLB for a given protection domain */ +static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) +{ + u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + + iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); +} + /**************************************************************************** * * The functions below are used the create the page table mappings for @@ -386,14 +394,18 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, PAGE_SIZE) >> PAGE_SHIFT; limit = limit < size ? limit : size; - if (dom->next_bit >= limit) + if (dom->next_bit >= limit) { dom->next_bit = 0; + dom->need_flush = true; + } address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, 0 , boundary_size, 0); - if (address == -1) + if (address == -1) { address = iommu_area_alloc(dom->bitmap, limit, 0, pages, 0, boundary_size, 0); + dom->need_flush = true; + } if (likely(address != -1)) { dom->next_bit = address + pages; @@ -553,6 +565,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->bitmap[0] = 1; dma_dom->next_bit = 0; + dma_dom->need_flush = false; + /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { @@ -795,7 +809,10 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(iommu_has_npcache(iommu))) + if (unlikely(dma_dom->need_flush && !iommu_fullflush)) { + iommu_flush_tlb(iommu, dma_dom->domain.id); + dma_dom->need_flush = false; + } else if (unlikely(iommu_has_npcache(iommu))) iommu_flush_pages(iommu, dma_dom->domain.id, address, size); out: @@ -829,7 +846,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + if (iommu_fullflush) + iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } /* diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index a69cc0f5204..f2fa8dc81be 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -995,6 +995,11 @@ int __init amd_iommu_init(void) else printk("disabled\n"); + if (iommu_fullflush) + printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); + else + printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); + out: return ret; @@ -1057,7 +1062,7 @@ void __init amd_iommu_detect(void) static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { - if (strcmp(str, "isolate") == 0) + if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; } -- cgit v1.2.3 From 5507eef835c9c941e69d6d96e4b43af23eeb4ac9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 19:01:02 +0200 Subject: AMD IOMMU: add branch hints to completion wait checks This patch adds branch hints to the cecks if a completion_wait is necessary. The completion_waits in the mapping paths are unlikly because they will only happen on software implementations of AMD IOMMU which don't exists today or with lazy IO/TLB flushing when the allocator wraps around the address space. With lazy IO/TLB flushing the completion_wait in the unmapping path is unlikely too. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 679f2a8e22e..d743aa0adcc 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -876,7 +876,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -905,7 +905,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); @@ -968,7 +968,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -1014,7 +1014,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); @@ -1061,7 +1061,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); out: @@ -1093,7 +1093,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - if (iommu->need_sync) + if (unlikely(iommu->need_sync)) iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); -- cgit v1.2.3 From 6d4f343f84993eb0d5864c0823dc9babd171a33a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 4 Sep 2008 19:18:02 +0200 Subject: AMD IOMMU: align alloc_coherent addresses properly The API definition for dma_alloc_coherent states that the bus address has to be aligned to the next power of 2 boundary greater than the allocation size. This is violated by AMD IOMMU so far and this patch fixes it. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d743aa0adcc..15792ed082e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -383,7 +383,8 @@ static unsigned long dma_mask_to_pages(unsigned long mask) */ static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, - unsigned int pages) + unsigned int pages, + unsigned long align_mask) { unsigned long limit = dma_mask_to_pages(*dev->dma_mask); unsigned long address; @@ -400,10 +401,10 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, } address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, - 0 , boundary_size, 0); + 0 , boundary_size, align_mask); if (address == -1) { address = iommu_area_alloc(dom->bitmap, limit, 0, pages, - 0, boundary_size, 0); + 0, boundary_size, align_mask); dom->need_flush = true; } @@ -787,17 +788,22 @@ static dma_addr_t __map_single(struct device *dev, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, - int dir) + int dir, + bool align) { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start; unsigned int pages; + unsigned long align_mask = 0; int i; pages = iommu_num_pages(paddr, size); paddr &= PAGE_MASK; - address = dma_ops_alloc_addresses(dev, dma_dom, pages); + if (align) + align_mask = (1UL << get_order(size)) - 1; + + address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask); if (unlikely(address == bad_dma_address)) goto out; @@ -872,7 +878,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, return (dma_addr_t)paddr; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir); + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false); if (addr == bad_dma_address) goto out; @@ -959,7 +965,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, paddr = sg_phys(s); s->dma_address = __map_single(dev, iommu, domain->priv, - paddr, s->length, dir); + paddr, s->length, dir, false); if (s->dma_address) { s->dma_length = s->length; @@ -1053,7 +1059,7 @@ static void *alloc_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); *dma_addr = __map_single(dev, iommu, domain->priv, paddr, - size, DMA_BIDIRECTIONAL); + size, DMA_BIDIRECTIONAL, true); if (*dma_addr == bad_dma_address) { free_pages((unsigned long)virt_addr, get_order(size)); -- cgit v1.2.3 From 335503e57b6b8de04cec5d27eb2c3d09ff98905b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2008 14:29:07 +0200 Subject: AMD IOMMU: add event buffer allocation This patch adds the allocation of a event buffer for each AMD IOMMU in the system. The hardware will log events like device page faults or other errors to this buffer once this is enabled. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index f2fa8dc81be..41ce8d5d626 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -417,6 +417,30 @@ static void __init free_command_buffer(struct amd_iommu *iommu) free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } +/* allocates the memory where the IOMMU will log its events to */ +static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) +{ + u64 entry; + iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(EVT_BUFFER_SIZE)); + + if (iommu->evt_buf == NULL) + return NULL; + + entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; + memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, + &entry, sizeof(entry)); + + iommu->evt_buf_size = EVT_BUFFER_SIZE; + + return iommu->evt_buf; +} + +static void __init free_event_buffer(struct amd_iommu *iommu) +{ + free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); +} + /* sets a specific bit in the device table entry. */ static void set_dev_entry_bit(u16 devid, u8 bit) { @@ -622,6 +646,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) static void __init free_iommu_one(struct amd_iommu *iommu) { free_command_buffer(iommu); + free_event_buffer(iommu); iommu_unmap_mmio_space(iommu); } @@ -661,6 +686,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->cmd_buf) return -ENOMEM; + iommu->evt_buf = alloc_event_buffer(iommu); + if (!iommu->evt_buf) + return -ENOMEM; + init_iommu_from_pci(iommu); init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); -- cgit v1.2.3 From ee893c24edb8ebab9a3fb66566855572579ad616 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Sep 2008 14:48:04 +0200 Subject: AMD IOMMU: save pci segment from ACPI tables This patch adds the pci_seg field to the amd_iommu structure and fills it with the corresponding value from the ACPI table. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 41ce8d5d626..b50234ef91e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -676,6 +676,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) */ iommu->devid = h->devid; iommu->cap_ptr = h->cap_ptr; + iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); if (!iommu->mmio_base) -- cgit v1.2.3 From 3eaf28a1cd2686aaa185b54d5a5e18e91b41f7f2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Sep 2008 15:55:10 +0200 Subject: AMD IOMMU: save pci_dev instead of devid We need the pci_dev later anyways to enable MSI for the IOMMU hardware. So remove the devid pointing to the BDF and replace it with the pci_dev structure where the IOMMU is implemented. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index b50234ef91e..a7eb89d8923 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -242,9 +242,12 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) /* Function to enable the hardware */ void __init iommu_enable(struct amd_iommu *iommu) { - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); - print_devid(iommu->devid, 0); - printk(" cap 0x%hx\n", iommu->cap_ptr); + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " + "at %02x:%02x.%x cap 0x%hx\n", + iommu->dev->bus->number, + PCI_SLOT(iommu->dev->devfn), + PCI_FUNC(iommu->dev->devfn), + iommu->cap_ptr); iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } @@ -511,15 +514,14 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) */ static void __init init_iommu_from_pci(struct amd_iommu *iommu) { - int bus = PCI_BUS(iommu->devid); - int dev = PCI_SLOT(iommu->devid); - int fn = PCI_FUNC(iommu->devid); int cap_ptr = iommu->cap_ptr; u32 range; - iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, + &iommu->cap); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, + &range); - range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); iommu->first_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_FD(range)); iommu->last_device = calc_devid(MMIO_GET_BUS(range), @@ -674,7 +676,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) /* * Copy data from ACPI table entry to the iommu struct */ - iommu->devid = h->devid; + iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff); + if (!iommu->dev) + return 1; + iommu->cap_ptr = h->cap_ptr; iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; @@ -695,6 +700,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); + pci_enable_device(iommu->dev); + return 0; } -- cgit v1.2.3 From a80dc3e0e0dc8393158de317d66ae0f345dc58f9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 16:51:41 +0200 Subject: AMD IOMMU: add MSI interrupt support The AMD IOMMU can generate interrupts for various reasons. This patch adds the basic interrupt enabling infrastructure to the driver. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 11 +++++ arch/x86/kernel/amd_iommu_init.c | 99 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 109 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 15792ed082e..0e494b9d5f2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -49,6 +49,17 @@ static int iommu_has_npcache(struct amd_iommu *iommu) return iommu->cap & IOMMU_CAP_NPCACHE; } +/**************************************************************************** + * + * Interrupt handling functions + * + ****************************************************************************/ + +irqreturn_t amd_iommu_int_handler(int irq, void *data) +{ + return IRQ_NONE; +} + /**************************************************************************** * * IOMMU command queuing functions diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index a7eb89d8923..14a06464a69 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -515,17 +517,20 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) static void __init init_iommu_from_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; - u32 range; + u32 range, misc; pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, &iommu->cap); pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET, &range); + pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET, + &misc); iommu->first_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_FD(range)); iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); + iommu->evt_msi_num = MMIO_MSI_NUM(misc); } /* @@ -696,6 +701,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->evt_buf) return -ENOMEM; + iommu->int_enabled = false; + init_iommu_from_pci(iommu); init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); @@ -741,6 +748,95 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } +/**************************************************************************** + * + * The following functions initialize the MSI interrupts for all IOMMUs + * in the system. Its a bit challenging because there could be multiple + * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per + * pci_dev. + * + ****************************************************************************/ + +static int __init iommu_setup_msix(struct amd_iommu *iommu) +{ + struct amd_iommu *curr; + struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ + int nvec = 0, i; + + list_for_each_entry(curr, &amd_iommu_list, list) { + if (curr->dev == iommu->dev) { + entries[nvec].entry = curr->evt_msi_num; + entries[nvec].vector = 0; + curr->int_enabled = true; + nvec++; + } + } + + if (pci_enable_msix(iommu->dev, entries, nvec)) { + pci_disable_msix(iommu->dev); + return 1; + } + + for (i = 0; i < nvec; ++i) { + int r = request_irq(entries->vector, amd_iommu_int_handler, + IRQF_SAMPLE_RANDOM, + "AMD IOMMU", + NULL); + if (r) + goto out_free; + } + + return 0; + +out_free: + for (i -= 1; i >= 0; --i) + free_irq(entries->vector, NULL); + + pci_disable_msix(iommu->dev); + + return 1; +} + +static int __init iommu_setup_msi(struct amd_iommu *iommu) +{ + int r; + struct amd_iommu *curr; + + list_for_each_entry(curr, &amd_iommu_list, list) { + if (curr->dev == iommu->dev) + curr->int_enabled = true; + } + + + if (pci_enable_msi(iommu->dev)) + return 1; + + r = request_irq(iommu->dev->irq, amd_iommu_int_handler, + IRQF_SAMPLE_RANDOM, + "AMD IOMMU", + NULL); + + if (r) { + pci_disable_msi(iommu->dev); + return 1; + } + + return 0; +} + +static int __init iommu_init_msi(struct amd_iommu *iommu) +{ + if (iommu->int_enabled) + return 0; + + if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) + return iommu_setup_msix(iommu); + else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) + return iommu_setup_msi(iommu); + + return 1; +} + /**************************************************************************** * * The next functions belong to the third pass of parsing the ACPI @@ -862,6 +958,7 @@ static void __init enable_iommus(void) list_for_each_entry(iommu, &amd_iommu_list, list) { iommu_set_exclusion_range(iommu); + iommu_init_msi(iommu); iommu_enable(iommu); } } -- cgit v1.2.3 From 90008ee4b811c944455752dcb72b291a5ba81b53 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 16:41:05 +0200 Subject: AMD IOMMU: add event handling code This patch adds code for polling and printing out events generated by the AMD IOMMU. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 87 +++++++++++++++++++++++++++++++++++++++- arch/x86/kernel/amd_iommu_init.c | 1 - 2 files changed, 86 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0e494b9d5f2..0cb8fd2359f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -55,9 +55,94 @@ static int iommu_has_npcache(struct amd_iommu *iommu) * ****************************************************************************/ +static void iommu_print_event(void *__evt) +{ + u32 *event = __evt; + int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; + int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; + int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; + int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; + u64 address = (u64)(((u64)event[3]) << 32) | event[2]; + + printk(KERN_ERR "AMD IOMMU: Event logged ["); + + switch (type) { + case EVENT_TYPE_ILL_DEV: + printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + case EVENT_TYPE_IO_FAULT: + printk("IO_PAGE_FAULT device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_DEV_TAB_ERR: + printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + case EVENT_TYPE_PAGE_TAB_ERR: + printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " + "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + domid, address, flags); + break; + case EVENT_TYPE_ILL_CMD: + printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + break; + case EVENT_TYPE_CMD_HARD_ERR: + printk("COMMAND_HARDWARE_ERROR address=0x%016llx " + "flags=0x%04x]\n", address, flags); + break; + case EVENT_TYPE_IOTLB_INV_TO: + printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x " + "address=0x%016llx]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address); + break; + case EVENT_TYPE_INV_DEV_REQ: + printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x " + "address=0x%016llx flags=0x%04x]\n", + PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), + address, flags); + break; + default: + printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); + } +} + +static void iommu_poll_events(struct amd_iommu *iommu) +{ + u32 head, tail; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + + head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); + + while (head != tail) { + iommu_print_event(iommu->evt_buf + head); + head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; + } + + writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); + + spin_unlock_irqrestore(&iommu->lock, flags); +} + irqreturn_t amd_iommu_int_handler(int irq, void *data) { - return IRQ_NONE; + struct amd_iommu *iommu; + + list_for_each_entry(iommu, &amd_iommu_list, list) + iommu_poll_events(iommu); + + return IRQ_HANDLED; } /**************************************************************************** diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 14a06464a69..eed488892c0 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -32,7 +32,6 @@ /* * definitions for the ACPI scanning code */ -#define PCI_BUS(x) (((x) >> 8) & 0xff) #define IVRS_HEADER_LENGTH 48 #define ACPI_IVHD_TYPE 0x10 -- cgit v1.2.3 From 126c52be4b1d2eb667a1d140f0ceaff9d353f700 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 16:47:35 +0200 Subject: AMD IOMMU: enable event logging The code to log IOMMU events is in place now. So enable event logging with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index eed488892c0..1974b73fece 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -253,6 +253,13 @@ void __init iommu_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } +/* Function to enable IOMMU event logging and event interrupts */ +void __init iommu_enable_event_logging(struct amd_iommu *iommu) +{ + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); +} + /* * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in * the system has one. @@ -958,6 +965,7 @@ static void __init enable_iommus(void) list_for_each_entry(iommu, &amd_iommu_list, list) { iommu_set_exclusion_range(iommu); iommu_init_msi(iommu); + iommu_enable_event_logging(iommu); iommu_enable(iommu); } } -- cgit v1.2.3 From a22131a223147016041b5e5cd0ae5ab61ef4177e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 17:55:28 +0200 Subject: AMD IOMMU: allow IO page faults from devices There is a bit in the device entry to suppress all IO page faults generated by a device. This bit was set until now because there was no event logging. Now that there is event logging this patch allows IO page faults from devices to see them in the kernel log. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 1974b73fece..8c137598555 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -950,7 +950,6 @@ static void init_device_table(void) for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { set_dev_entry_bit(devid, DEV_ENTRY_VALID); set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); - set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); } } -- cgit v1.2.3 From b39ba6ad004a31bf2a08ba2b08c1e0f9b3530bb7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2008 18:40:46 +0200 Subject: AMD IOMMU: add dma_supported callback This function determines if the AMD IOMMU implementation is responsible for a given device. So the DMA layer can get this information from the driver. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0cb8fd2359f..a6a6f8ed1cf 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1204,6 +1204,30 @@ free_mem: free_pages((unsigned long)virt_addr, get_order(size)); } +/* + * This function is called by the DMA layer to find out if we can handle a + * particular device. It is part of the dma_ops. + */ +static int amd_iommu_dma_supported(struct device *dev, u64 mask) +{ + u16 bdf; + struct pci_dev *pcidev; + + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return 0; + + pcidev = to_pci_dev(dev); + + bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + + /* Out of our scope? */ + if (bdf > amd_iommu_last_bdf) + return 0; + + return 1; +} + /* * The function for pre-allocating protection domains. * @@ -1247,6 +1271,7 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { .unmap_single = unmap_single, .map_sg = map_sg, .unmap_sg = unmap_sg, + .dma_supported = amd_iommu_dma_supported, }; /* -- cgit v1.2.3 From bd60b735c658e6e8c656e89771d281bcfcf51279 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:24:48 +0200 Subject: AMD IOMMU: don't assign preallocated protection domains to devices In isolation mode the protection domains for the devices are preallocated and preassigned. This is bad if a device should be passed to a virtualization guest because the IOMMU code does not know if it is in use by a driver. This patch changes the code to assign the device to the preallocated domain only if there are dma mapping requests for it. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a6a6f8ed1cf..7c179144745 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -33,6 +33,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); +/* A list of preallocated protection domains */ +static LIST_HEAD(iommu_pd_list); +static DEFINE_SPINLOCK(iommu_pd_list_lock); + /* * general struct to manage commands send to an IOMMU */ @@ -663,6 +667,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->next_bit = 0; dma_dom->need_flush = false; + dma_dom->target_dev = 0xffff; /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && @@ -768,6 +773,33 @@ static bool check_device(struct device *dev) return true; } +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid) { + ret = entry; + list_del(&ret->list); + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + /* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the @@ -803,9 +835,11 @@ static int get_device_resources(struct device *dev, *iommu = amd_iommu_rlookup_table[*bdf]; if (*iommu == NULL) return 0; - dma_dom = (*iommu)->default_dom; *domain = domain_for_device(*bdf); if (*domain == NULL) { + dma_dom = find_protection_domain(*bdf); + if (!dma_dom) + dma_dom = (*iommu)->default_dom; *domain = &dma_dom->domain; set_device_domain(*iommu, *domain, *bdf); printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " @@ -1257,10 +1291,9 @@ void prealloc_protection_domains(void) if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); - set_device_domain(iommu, &dma_dom->domain, devid); - printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ", - dma_dom->domain.id); - print_devid(devid, 1); + dma_dom->target_dev = devid; + + list_add_tail(&dma_dom->list, &iommu_pd_list); } } -- cgit v1.2.3 From 38ddf41b198e21d3ecbe5752e875857b7ce7589e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:38:32 +0200 Subject: AMD IOMMU: some set_device_domain cleanups Remove some magic numbers and split the pte_root using standard functions. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7c179144745..a34d8e915e3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -739,12 +739,13 @@ static void set_device_domain(struct amd_iommu *iommu, u64 pte_root = virt_to_phys(domain->pt_root); - pte_root |= (domain->mode & 0x07) << 9; - pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2; + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + << DEV_ENTRY_MODE_SHIFT; + pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - amd_iommu_dev_table[devid].data[0] = pte_root; - amd_iommu_dev_table[devid].data[1] = pte_root >> 32; + amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); + amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[2] = domain->id; amd_iommu_pd_table[devid] = domain; -- cgit v1.2.3 From 13d9fead3daa0efa1b8bb6ae59650e4453b39128 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 10 Sep 2008 20:19:40 +0900 Subject: AMD IOMMU: avoid unnecessary low zone allocation in alloc_coherent x86's common alloc_coherent (dma_alloc_coherent in dma-mapping.h) sets up the gfp flag according to the device dma_mask but AMD IOMMU doesn't need it for devices that the IOMMU can do virtual mappings for. This patch avoids unnecessary low zone allocation. Signed-off-by: FUJITA Tomonori Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a34d8e915e3..e4866660463 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1173,6 +1173,9 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!check_device(dev)) return NULL; + if (!get_device_resources(dev, &iommu, &domain, &devid)) + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; @@ -1180,8 +1183,6 @@ static void *alloc_coherent(struct device *dev, size_t size, memset(virt_addr, 0, size); paddr = virt_to_phys(virt_addr); - get_device_resources(dev, &iommu, &domain, &devid); - if (!iommu || !domain) { *dma_addr = (dma_addr_t)paddr; return virt_addr; -- cgit v1.2.3 From c97ac5359e6897abe22770740294dda185bac30d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Sep 2008 10:59:15 +0200 Subject: AMD IOMMU: replace memset with __GFP_ZERO in alloc_coherent Remove the memset and use __GFP_ZERO at allocation time instead. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e4866660463..f405a61f61f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1176,11 +1176,11 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!get_device_resources(dev, &iommu, &domain, &devid)) flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return 0; - memset(virt_addr, 0, size); paddr = virt_to_phys(virt_addr); if (!iommu || !domain) { -- cgit v1.2.3 From 6754086ce67c0a1f5d7eac612102368781e14588 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 12:17:00 +0200 Subject: AMD IOMMU: simplify dma_mask_to_pages The current calculation is very complicated. This patch replaces it with a much simpler version. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f405a61f61f..db64482b179 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -472,8 +472,7 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, ****************************************************************************/ static unsigned long dma_mask_to_pages(unsigned long mask) { - return (mask >> PAGE_SHIFT) + - (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); + return PAGE_ALIGN(mask) >> PAGE_SHIFT; } /* -- cgit v1.2.3 From d58befd3a0110c93d70756537b4d01d05a9e6e12 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 12:19:58 +0200 Subject: AMD IOMMU: free domain bitmap with its allocation order The amd_iommu_pd_alloc_bitmap is allocated with a calculated order and freed with order 1. This is not a bug since the calculated order always evaluates to 1, but its unclean code. So replace the 1 with the calculation in the release path. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c137598555..e60f4cd29eb 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1144,7 +1144,8 @@ out: return ret; free: - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, + get_order(MAX_DOMAIN_ID/8)); free_pages((unsigned long)amd_iommu_pd_table, get_order(rlookup_table_size)); -- cgit v1.2.3 From 199d0d501202f077fe647a5c14fe046b17abc46b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 16:45:59 +0200 Subject: AMD IOMMU: remove unnecessary cast to u64 in the init code The ctrl variable is only u32 and readl also returns a 32 bit value. So the cast to u64 is pointless. Remove it with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index e60f4cd29eb..505fc04e896 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -235,7 +235,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; - ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); + ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); ctrl &= ~(1 << bit); writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -- cgit v1.2.3 From b514e55569855bbaab782a8ec073630ed4e99c68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 17:14:27 +0200 Subject: AMD IOMMU: calculate IVHD size with a function The current calculation of the IVHD entry size is hard to read. So move this code to a seperate function to make it more clear what this calculation does. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 505fc04e896..80250e63bd0 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -296,6 +296,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) * ****************************************************************************/ +/* + * This function calculates the length of a given IVHD entry + */ +static inline int ivhd_entry_length(u8 *ivhd) +{ + return 0x04 << (*ivhd >> 6); +} + /* * This function reads the last device id the IOMMU has to handle from the PCI * capability header for this IOMMU @@ -340,7 +348,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) default: break; } - p += 0x04 << (*p >> 6); + p += ivhd_entry_length(p); } WARN_ON(p != end); @@ -641,7 +649,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } - p += 0x04 << (e->type >> 6); + p += ivhd_entry_length(p); } } -- cgit v1.2.3 From 23c1713fe9e6ac886a4d44415298d0cbb2e83df2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 17 Sep 2008 17:18:17 +0200 Subject: AMD IOMMU: use cmd_buf_size when freeing the command buffer The command buffer release function uses the CMD_BUF_SIZE macro for get_order. Replace this with iommu->cmd_buf_size which is more reliable about the actual size of the buffer. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 80250e63bd0..db0c83af44d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -433,7 +433,8 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + free_pages((unsigned long)iommu->cmd_buf, + get_order(iommu->cmd_buf_size)); } /* allocates the memory where the IOMMU will log its events to */ -- cgit v1.2.3 From 832a90c30485117d65180cc9a8d9869c1b158570 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 18 Sep 2008 15:54:23 +0200 Subject: AMD IOMMU: use coherent_dma_mask in alloc_coherent The alloc_coherent implementation for AMD IOMMU currently uses *dev->dma_mask per default. This patch changes it to prefer dev->coherent_dma_mask if it is set. Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index db64482b179..6f7b9744573 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -483,9 +483,10 @@ static unsigned long dma_mask_to_pages(unsigned long mask) static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, - unsigned long align_mask) + unsigned long align_mask, + u64 dma_mask) { - unsigned long limit = dma_mask_to_pages(*dev->dma_mask); + unsigned long limit = dma_mask_to_pages(dma_mask); unsigned long address; unsigned long size = dom->aperture_size >> PAGE_SHIFT; unsigned long boundary_size; @@ -919,7 +920,8 @@ static dma_addr_t __map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir, - bool align) + bool align, + u64 dma_mask) { dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t address, start; @@ -933,7 +935,8 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; - address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask); + address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, + dma_mask); if (unlikely(address == bad_dma_address)) goto out; @@ -997,10 +1000,13 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, struct protection_domain *domain; u16 devid; dma_addr_t addr; + u64 dma_mask; if (!check_device(dev)) return bad_dma_address; + dma_mask = *dev->dma_mask; + get_device_resources(dev, &iommu, &domain, &devid); if (iommu == NULL || domain == NULL) @@ -1008,7 +1014,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, return (dma_addr_t)paddr; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false); + addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, + dma_mask); if (addr == bad_dma_address) goto out; @@ -1080,10 +1087,13 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *s; phys_addr_t paddr; int mapped_elems = 0; + u64 dma_mask; if (!check_device(dev)) return 0; + dma_mask = *dev->dma_mask; + get_device_resources(dev, &iommu, &domain, &devid); if (!iommu || !domain) @@ -1095,7 +1105,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, paddr = sg_phys(s); s->dma_address = __map_single(dev, iommu, domain->priv, - paddr, s->length, dir, false); + paddr, s->length, dir, false, + dma_mask); if (s->dma_address) { s->dma_length = s->length; @@ -1168,6 +1179,7 @@ static void *alloc_coherent(struct device *dev, size_t size, struct protection_domain *domain; u16 devid; phys_addr_t paddr; + u64 dma_mask = dev->coherent_dma_mask; if (!check_device(dev)) return NULL; @@ -1187,10 +1199,13 @@ static void *alloc_coherent(struct device *dev, size_t size, return virt_addr; } + if (!dma_mask) + dma_mask = *dev->dma_mask; + spin_lock_irqsave(&domain->lock, flags); *dma_addr = __map_single(dev, iommu, domain->priv, paddr, - size, DMA_BIDIRECTIONAL, true); + size, DMA_BIDIRECTIONAL, true, dma_mask); if (*dma_addr == bad_dma_address) { free_pages((unsigned long)virt_addr, get_order(size)); -- cgit v1.2.3 From af2d237bf574f89ae5a1b67f2556a324c8f64ff5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 21 Sep 2008 23:27:13 +0900 Subject: x86: check for ioremap() failure in copy_oldmem_page() Add a check for ioremap() failure in copy_oldmem_page(). This patch also includes small coding style fixes. Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash_dump_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6bc4a4..280d6ef3af0 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -33,14 +33,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return 0; vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!vaddr) + return -ENOMEM; if (userbuf) { - if (copy_to_user(buf, (vaddr + offset), csize)) { + if (copy_to_user(buf, vaddr + offset, csize)) { iounmap(vaddr); return -EFAULT; } } else - memcpy(buf, (vaddr + offset), csize); + memcpy(buf, vaddr + offset, csize); iounmap(vaddr); return csize; -- cgit v1.2.3 From 153dab77e228931f3aff74f21b762927ac710ca7 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 21 Sep 2008 23:25:40 +0900 Subject: x86: use platform_device_register_simple() Cleanup pcspeaker.c Signed-off-by: Akinobu Mita Signed-off-by: Ingo Molnar --- arch/x86/kernel/pcspeaker.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c index bc1f2d3ea27..a311ffcaad1 100644 --- a/arch/x86/kernel/pcspeaker.c +++ b/arch/x86/kernel/pcspeaker.c @@ -1,20 +1,13 @@ #include -#include +#include #include static __init int add_pcspkr(void) { struct platform_device *pd; - int ret; - pd = platform_device_alloc("pcspkr", -1); - if (!pd) - return -ENOMEM; + pd = platform_device_register_simple("pcspkr", -1, NULL, 0); - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; + return IS_ERR(pd) ? PTR_ERR(pd) : 0; } device_initcall(add_pcspkr); -- cgit v1.2.3 From 16dc552f35bc0ec6fec8ef83f8032eee352d17f5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 19 Sep 2008 11:45:04 -0700 Subject: x86: use WARN_ONCE in workaround for mtrr mask so could help catch attention about bug in bios about mtrr mask setting. WARN_ONCE got into mainline already, lets use it. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/generic.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index cb7d3b6a80e..4e8d77f01ee 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -401,12 +401,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1<<(hi - 1)) - 1); if (tmp != mask_lo) { - static int once = 1; - - if (once) { - printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); - once = 0; - } + WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); mask_lo = tmp; } } -- cgit v1.2.3 From d26dbc5cf94b0a28acc947285c3b54814a73cb2e Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 22 Sep 2008 22:35:07 +0900 Subject: iommu: export iommu_area_reserve helper function x86 has set_bit_string() that does the exact same thing that set_bit_area() in lib/iommu-helper.c does. This patch exports set_bit_area() in lib/iommu-helper.c as iommu_area_reserve(), converts GART, Calgary, and AMD IOMMU to use it. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- arch/x86/kernel/pci-calgary_64.c | 2 +- arch/x86/kernel/pci-gart_64.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6f7b9744573..70537d117a9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -572,7 +572,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, if (start_page + pages > last_page) pages = last_page - start_page; - set_bit_string(dom->bitmap, start_page, pages); + iommu_area_reserve(dom->bitmap, start_page, pages); } static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index fe7695e4caa..080d1d27f37 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, badbit, tbl, start_addr, npages); } - set_bit_string(tbl->it_map, index, npages); + iommu_area_reserve(tbl->it_map, index, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); } diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 508ef470b27..3dcb1ad86e3 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -827,7 +827,7 @@ void __init gart_iommu_init(void) * Out of IOMMU space handling. * Reserve some invalid pages at the beginning of the GART. */ - set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); + iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); agp_memory_reserved = iommu_size; printk(KERN_INFO -- cgit v1.2.3 From 28b166a700899a0f88b1cc283c449fb5bf72a635 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 22 Sep 2008 13:14:13 -0400 Subject: x86, NMI watchdog: when booting with reset_devices, clear the performance counters P4s have a quirk that makes necessary to clear P4_CCCR_OVF bit on the CCCR everytime the PMI is triggered. When booting the kernel with reset_devices (more specific kdump case), the counters reach zero and the PMI will be generated. This is not a problem on other processors but on P4s, it'll continue to generate NMIs until that bit is cleared. Since there may be other users of the performance counters, clear and disable all of them when booting with reset_devices option. We have a P4 box here that crashes because of this problem. Since the kdump kernel usually boots with only one processor active, the second logical unit won't be set up, therefore, MSR_P4_IQ_CCCR1 (and other performance counter registers) won't be cleared and P4_CCCR_OVF may be still set because the previous kernel was using this register. An NMI is triggered because of the MSR_P4_IQ_CCCR1 right after the NMI delivery is enabled, triggering the race fixed on my previous email. Signed-off-by: Aristeu Rozanski Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Vivek Goyal Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 05cc22dbd4f..62c01006397 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -432,6 +432,27 @@ static const struct wd_ops p6_wd_ops = { #define P4_CCCR_ENABLE (1 << 12) #define P4_CCCR_OVF (1 << 31) +#define P4_CONTROLS 18 +static unsigned int p4_controls[18] = { + MSR_P4_BPU_CCCR0, + MSR_P4_BPU_CCCR1, + MSR_P4_BPU_CCCR2, + MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR0, + MSR_P4_MS_CCCR1, + MSR_P4_MS_CCCR2, + MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR0, + MSR_P4_FLAME_CCCR1, + MSR_P4_FLAME_CCCR2, + MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, + MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, + MSR_P4_IQ_CCCR3, + MSR_P4_IQ_CCCR4, + MSR_P4_IQ_CCCR5, +}; /* * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter * CRU_ESCR0 (with any non-null event selector) through a complemented @@ -473,6 +494,26 @@ static int setup_p4_watchdog(unsigned nmi_hz) evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR0; cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + + /* + * If we're on the kdump kernel or other situation, we may + * still have other performance counter registers set to + * interrupt and they'll keep interrupting forever because + * of the P4_CCCR_OVF quirk. So we need to ACK all the + * pending interrupts and disable all the registers here, + * before reenabling the NMI delivery. Refer to p4_rearm() + * about the P4_CCCR_OVF quirk. + */ + if (reset_devices) { + unsigned int low, high; + int i; + + for (i = 0; i < P4_CONTROLS; i++) { + rdmsr(p4_controls[i], low, high); + low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); + wrmsr(p4_controls[i], low, high); + } + } } else { /* logical cpu 1 */ perfctr_msr = MSR_P4_IQ_PERFCTR1; -- cgit v1.2.3 From b3e15bdef689641e7f1bb03efbe56112c3ee82e2 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Mon, 22 Sep 2008 13:13:59 -0400 Subject: x86, NMI watchdog: setup before enabling NMI watchdog There's a small window when NMI watchdog is being set up that if any NMIs are triggered, the NMI code will make make use of not initalized wd_ops elements: void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) return; /* cheap hack to support suspend/resume */ /* if cpu0 is not active neither should the other cpus */ if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) return; switch (nmi_watchdog) { case NMI_LOCAL_APIC: /* enable it before to avoid race with handler */ --> __get_cpu_var(wd_enabled) = 1; --> if (lapic_watchdog_init(nmi_hz) < 0) { (...) asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { (...) if (nmi_watchdog_tick(regs, reason)) return; (...) notrace __kprobes int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { (...) if (!__get_cpu_var(wd_enabled)) return rc; switch (nmi_watchdog) { case NMI_LOCAL_APIC: rc |= lapic_wd_event(nmi_hz); (...) int lapic_wd_event(unsigned nmi_hz) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; --> rdmsrl(wd->perfctr_msr, ctr); and wd->*_msr will be initialized on each processor type specific setup, after enabling NMIs for PMIs. Since the counter was just set, the chances of an performance counter generated NMI is minimal, but any other unknown NMI would trigger the problem. This patch fixes the problem by setting everything up before enabling performance counter generated NMIs and will set wd_enabled using a callback function. Signed-off-by: Aristeu Rozanski Acked-by: Don Zickus Acked-by: Prarit Bhargava Acked-by: Vivek Goyal Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 45 +++++++++++++++++++++++++--------- arch/x86/kernel/nmi.c | 11 +++++++-- 2 files changed, 42 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 62c01006397..6bff382094f 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz) /* setup the timer */ wrmsr(evntsel_msr, evntsel, 0); write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); + /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + return 1; } @@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); + /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + return 1; } @@ -540,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); wrmsr(cccr_msr, cccr_val, 0); write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = cccr_msr; + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); return 1; } @@ -661,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index abb78a2cc4a..2c97f07f1c2 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -299,6 +299,15 @@ void acpi_nmi_disable(void) on_each_cpu(__acpi_nmi_disable, NULL, 1); } +/* + * This function is called as soon the LAPIC NMI watchdog driver has everything + * in place and it's ready to check if the NMIs belong to the NMI watchdog + */ +void cpu_nmi_set_wd_enabled(void) +{ + __get_cpu_var(wd_enabled) = 1; +} + void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) @@ -311,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused) switch (nmi_watchdog) { case NMI_LOCAL_APIC: - /* enable it before to avoid race with handler */ - __get_cpu_var(wd_enabled) = 1; if (lapic_watchdog_init(nmi_hz) < 0) { __get_cpu_var(wd_enabled) = 0; return; -- cgit v1.2.3 From afa9fdc2f5f8e4d98f3e77bfa204412cbc181346 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 20 Sep 2008 01:23:30 +0900 Subject: iommu: remove fullflush and nofullflush in IOMMU generic option This patch against tip/x86/iommu virtually reverts 2842e5bf3115193f05dc9dac20f940e7abf44c1a. But just reverting the commit breaks AMD IOMMU so this patch also includes some fixes. The above commit adds new two options to x86 IOMMU generic kernel boot options, fullflush and nofullflush. But such change that affects all the IOMMUs needs more discussion (all IOMMU parties need the chance to discuss it): http://lkml.org/lkml/2008/9/19/106 Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 4 ++-- arch/x86/kernel/amd_iommu_init.c | 5 ++++- arch/x86/kernel/pci-dma.c | 13 ------------- arch/x86/kernel/pci-gart_64.c | 13 +++++++++++++ 4 files changed, 19 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 70537d117a9..c19212191c9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -948,7 +948,7 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; - if (unlikely(dma_dom->need_flush && !iommu_fullflush)) { + if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { iommu_flush_tlb(iommu, dma_dom->domain.id); dma_dom->need_flush = false; } else if (unlikely(iommu_has_npcache(iommu))) @@ -985,7 +985,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (iommu_fullflush) + if (amd_iommu_unmap_flush) iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index db0c83af44d..148fcfe22f1 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -122,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ int amd_iommu_isolate; /* if 1, device isolation is enabled */ +bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ @@ -1144,7 +1145,7 @@ int __init amd_iommu_init(void) else printk("disabled\n"); - if (iommu_fullflush) + if (amd_iommu_unmap_flush) printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); else printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); @@ -1214,6 +1215,8 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; + if (strncmp(str, "fullflush", 11) == 0) + amd_iommu_unmap_flush = true; } return 1; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d2f2c0158dc..0a1408abcc6 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -16,15 +16,6 @@ EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; -/* - * If this is disabled the IOMMU will use an optimized flushing strategy - * of only flushing when an mapping is reused. With it true the GART is - * flushed for every mapping. Problem is that doing the lazy flush seems - * to trigger bugs with some popular PCI cards, in particular 3ware (but - * has been also also seen with Qlogic at least). - */ -int iommu_fullflush; - #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -180,10 +171,6 @@ static __init int iommu_setup(char *p) } if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; - if (!strncmp(p, "fullflush", 8)) - iommu_fullflush = 1; - if (!strncmp(p, "nofullflush", 11)) - iommu_fullflush = 0; if (!strncmp(p, "forcesac", 8)) iommu_sac_force = 1; if (!strncmp(p, "allowdac", 8)) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 3dcb1ad86e3..9e390f1bd46 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -45,6 +45,15 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +/* + * If this is disabled the IOMMU will use an optimized flushing strategy + * of only flushing when an mapping is reused. With it true the GART is + * flushed for every mapping. Problem is that doing the lazy flush seems + * to trigger bugs with some popular PCI cards, in particular 3ware (but + * has been also also seen with Qlogic at least). + */ +int iommu_fullflush = 1; + /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); /* Guarded by iommu_bitmap_lock: */ @@ -892,6 +901,10 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; + if (!strncmp(p, "fullflush", 8)) + iommu_fullflush = 1; + if (!strncmp(p, "nofullflush", 11)) + iommu_fullflush = 0; if (!strncmp(p, "noagp", 5)) no_agp = 1; if (!strncmp(p, "noaperture", 10)) -- cgit v1.2.3 From 05e12e1c4c09cd35ac9f4e6af1e42b0036375d72 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Mon, 22 Sep 2008 22:58:47 -0700 Subject: x86: fix 27-rc crash on vsmp due to paravirt during module load 27-rc fails to boot up if configured to use modules. Turns out vsmp_patch was marked __init, and vsmp_patch being the pvops 'patch' routine for vsmp, a call to vsmp_patch just turns out to execute a code page with series of 0xcc (POISON_FREE_INITMEM -- int3). vsmp_patch has been marked with __init ever since pvops, however, apply_paravirt can be called during module load causing calls to freed memory location. Since apply_paravirt can only be called during init/module load, make vsmp_patch with "__init_or_module" Signed-off-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsmp_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 0c029e8959c..7766d36983f 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -61,7 +61,7 @@ static void vsmp_irq_enable(void) native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } -static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, +static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { switch (type) { -- cgit v1.2.3 From 4faac97d44ac27bdbb010a9c3597401a8f89341f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 18:54:29 +0200 Subject: x86: prevent stale state of c1e_mask across CPU offline/online Impact: hang which happens across CPU offline/online on AMD C1E systems. When a CPU goes offline then the corresponding bit in the broadcast mask is cleared. For AMD C1E enabled CPUs we do not reenable the broadcast when the CPU comes online again as we do not clear the corresponding bit in the c1e_mask, which keeps track which CPUs have been switched to broadcast already. So on those !$@#& machines we never switch back to broadcasting after a CPU offline/online cycle. Clear the bit when the CPU plays dead. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 11 ++++++++--- arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/process_64.c | 2 ++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7fc4d5b0a6a..2e2247117f6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -246,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) return 1; } +static cpumask_t c1e_mask = CPU_MASK_NONE; +static int c1e_detected; + +void c1e_remove_cpu(int cpu) +{ + cpu_clear(cpu, c1e_mask); +} + /* * C1E aware idle routine. We check for C1E active in the interrupt * pending message MSR. If we detect C1E, then we handle it the same @@ -253,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) */ static void c1e_idle(void) { - static cpumask_t c1e_mask = CPU_MASK_NONE; - static int c1e_detected; - if (need_resched()) return; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3b7a1ddcc0b..4b3cfdf5421 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -88,6 +88,7 @@ static void cpu_exit_clear(void) cpu_clear(cpu, cpu_callin_map); numa_remove_cpu(cpu); + c1e_remove_cpu(cpu); } /* We don't actually take CPU down, just spin without interrupts. */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71553b664e2..e12e0e4dd25 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -93,6 +93,8 @@ DECLARE_PER_CPU(int, cpu_state); static inline void play_dead(void) { idle_task_exit(); + c1e_remove_cpu(raw_smp_processor_id()); + mb(); /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; -- cgit v1.2.3 From a8d6829044901a67732904be5f1eacdf8539604f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 22 Sep 2008 19:02:25 +0200 Subject: x86: prevent C-states hang on AMD C1E enabled machines Impact: System hang when AMD C1E machines switch into C2/C3 AMD C1E enabled systems do not work with normal ACPI C-states even if the BIOS is advertising them. Limit the C-states to C1 for the ACPI processor idle code. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2e2247117f6..d8c2a299bfe 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -272,6 +272,7 @@ static void c1e_idle(void) c1e_detected = 1; mark_tsc_unstable("TSC halt in C1E"); printk(KERN_INFO "System has C1E enabled\n"); + set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); } } -- cgit v1.2.3 From 09bfeea13cea843fb03eaa96b5d891fa0abdcc90 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 18 Sep 2008 21:12:10 +0200 Subject: x86: c1e_idle: don't mark TSC unstable if CPU has invariant TSC Impact: Functional TSC is marked unstable on AMD family 0x10 and 0x11 CPUs. This would be wrong because for those CPUs "invariant TSC" means: "The TSC counts at the same rate in all P-states, all C states, S0, or S1" (See "Processor BIOS and Kernel Developer's Guides" for those CPUs.) [ tglx: Changed C1E to AMD C1E in the printks to avoid confusion with Intel C1E ] Signed-off-by: Andreas Herrmann Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d8c2a299bfe..876e9189077 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -270,8 +270,9 @@ static void c1e_idle(void) rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (lo & K8_INTP_C1E_ACTIVE_MASK) { c1e_detected = 1; - mark_tsc_unstable("TSC halt in C1E"); - printk(KERN_INFO "System has C1E enabled\n"); + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + mark_tsc_unstable("TSC halt in AMD C1E"); + printk(KERN_INFO "System has AMD C1E enabled\n"); set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); } } -- cgit v1.2.3 From 1eda81495a49a4ee91d8863b0a441a624375efea Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 23 Sep 2008 22:40:02 -0400 Subject: x86: prevent stale state of c1e_mask across CPU offline/online, fix Fix build error introduced by commit 4faac97d44ac27 ("x86: prevent stale state of c1e_mask across CPU offline/online"). process_32.c needs to include idle.h to get the prototype for c1e_remove_cpu() Signed-off-by: Marc Dionne Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b3cfdf5421..31f40b24bf5 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,6 +55,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -- cgit v1.2.3 From 77a9a768b7374cd23d1f400097eede9f1547f508 Mon Sep 17 00:00:00 2001 From: Jeremy Katz Date: Tue, 23 Sep 2008 21:54:00 -0400 Subject: x86: disable apm on the olpc The OLPC doesn't support APM but also doesn't have DMI, so we can't detect and disable it based on DMI data. So, just disable based on machine_is_olpc() Signed-off-by: Jeremy Katz Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 9ee24e6bc4b..732d1f4e10e 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -234,6 +234,7 @@ #include #include #include +#include #include #include @@ -2217,7 +2218,7 @@ static int __init apm_init(void) dmi_check_system(apm_dmi_table); - if (apm_info.bios.version == 0 || paravirt_enabled()) { + if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { printk(KERN_INFO "apm: BIOS not found.\n"); return -ENODEV; } -- cgit v1.2.3 From 493cd9122af5bd0b219974a48f0e31da0c29ff7e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 14:56:44 -0700 Subject: x86: ds.c ptrace.c integer as NULL pointer sparse fixes fix: arch/x86/kernel/ptrace.c:763:29: warning: Using plain integer as NULL pointer arch/x86/kernel/ptrace.c:777:46: warning: Using plain integer as NULL pointer arch/x86/kernel/ptrace.c:1115:45: warning: Using plain integer as NULL pointer arch/x86/kernel/ds.c:482:26: warning: Using plain integer as NULL pointer arch/x86/kernel/ds.c:487:25: warning: Using plain integer as NULL pointer Signed-off-by: Harvey Harrison Acked-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 4 ++-- arch/x86/kernel/ptrace.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ab21c270bfa..2b69994fd3a 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -479,12 +479,12 @@ static int ds_release(struct task_struct *task, enum ds_qualifier qual) goto out; kfree(context->buffer[qual]); - context->buffer[qual] = 0; + context->buffer[qual] = NULL; current->mm->total_vm -= context->pages[qual]; current->mm->locked_vm -= context->pages[qual]; context->pages[qual] = 0; - context->owner[qual] = 0; + context->owner[qual] = NULL; /* * we put the context twice: diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index ba19bb49bd0..5df6093ac77 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -734,7 +734,7 @@ static int ptrace_bts_config(struct task_struct *child, goto errout; if (cfg.flags & PTRACE_BTS_O_ALLOC) { - ds_ovfl_callback_t ovfl = 0; + ds_ovfl_callback_t ovfl = NULL; unsigned int sig = 0; /* we ignore the error in case we were not tracing child */ @@ -748,7 +748,7 @@ static int ptrace_bts_config(struct task_struct *child, ovfl = ptrace_bts_ovfl; } - error = ds_request_bts(child, /* base = */ 0, cfg.size, ovfl); + error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); if (error < 0) goto errout; @@ -1086,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_SIZE: - ret = ds_get_bts_index(child, /* pos = */ 0); + ret = ds_get_bts_index(child, /* pos = */ NULL); break; case PTRACE_BTS_GET: -- cgit v1.2.3 From e51a1ac2dfca9ad869471e88f828281db7e810c0 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 15:20:09 -0700 Subject: x86, olpc: fix endian bug in openfirmware workaround Boardrev is always treated as a u32 everywhere else, no reason to byteswap the 0xc2 value. The only use is to print out if it is a prerelease board, the test being: (olpc_platform_info.boardrev & 0xf) < 8 Which is currently always true as be32_to_cpu(0xc2) & 0xf = 0 but I doubt that was the intention here. The consequences of the bug are pretty minor though (incorrect boardrev displayed in dmesg when ofw support not configured) Also annotate the temporary used to read the boardrev in the ofw case. The confusion was noticed by Sparse: arch/x86/kernel/olpc.c:206:32: warning: cast to restricted __be32 Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/olpc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 3e667227480..7a13fac63a1 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd); static void __init platform_detect(void) { size_t propsize; - u32 rev; + __be32 rev; if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, &propsize) || propsize != 4) { printk(KERN_ERR "ofw: getprop call failed!\n"); - rev = 0; + rev = cpu_to_be32(0); } olpc_platform_info.boardrev = be32_to_cpu(rev); } @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = be32_to_cpu(0xc2); + olpc_platform_info.boardrev = 0xc2; } #endif -- cgit v1.2.3 From 9f6ac57729724b58df81ca5dc005326759a806fe Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:35 +0900 Subject: x86: export pci-nommu's alloc_coherent This patch exports nommu_alloc_coherent (renamed dma_generic_alloc_coherent). GART needs this function. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 31 +++++++++++++++++++++++++++++++ arch/x86/kernel/pci-nommu.c | 39 +-------------------------------------- 2 files changed, 32 insertions(+), 38 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0a1408abcc6..4e612d20170 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -134,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len) EXPORT_SYMBOL(iommu_num_pages); #endif +void *dma_generic_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag) +{ + unsigned long dma_mask; + struct page *page; + dma_addr_t addr; + + dma_mask = dma_alloc_coherent_mask(dev, flag); + + flag |= __GFP_ZERO; +again: + page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); + if (!page) + return NULL; + + addr = page_to_phys(page); + if (!is_buffer_dma_capable(dma_mask, addr, size)) { + __free_pages(page, get_order(size)); + + if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) { + flag = (flag & ~GFP_DMA32) | GFP_DMA; + goto again; + } + + return NULL; + } + + *dma_addr = addr; + return page_address(page); +} + /* * See for the iommu kernel parameter * documentation. diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 1c1c98a31d5..c70ab5a5d4c 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -72,43 +72,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -static void * -nommu_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_addr, gfp_t gfp) -{ - unsigned long dma_mask; - int node; - struct page *page; - dma_addr_t addr; - - dma_mask = dma_alloc_coherent_mask(hwdev, gfp); - - gfp |= __GFP_ZERO; - - node = dev_to_node(hwdev); -again: - page = alloc_pages_node(node, gfp, get_order(size)); - if (!page) - return NULL; - - addr = page_to_phys(page); - if (!is_buffer_dma_capable(dma_mask, addr, size) && !(gfp & GFP_DMA)) { - free_pages((unsigned long)page_address(page), get_order(size)); - gfp |= GFP_DMA; - goto again; - } - - if (check_addr("alloc_coherent", hwdev, addr, size)) { - *dma_addr = addr; - flush_write_buffers(); - return page_address(page); - } - - free_pages((unsigned long)page_address(page), get_order(size)); - - return NULL; -} - static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr) { @@ -116,7 +79,7 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, } struct dma_mapping_ops nommu_dma_ops = { - .alloc_coherent = nommu_alloc_coherent, + .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = nommu_free_coherent, .map_single = nommu_map_single, .map_sg = nommu_map_sg, -- cgit v1.2.3 From ecef533ea68b2fb3baaf459beb2f802a240bdb16 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:36 +0900 Subject: revert "x86: make GART to respect device's dma_mask about virtual mappings" This reverts: commit bee44f294efd8417f5e68553778a6cc957af1547 Author: FUJITA Tomonori Date: Fri Sep 12 19:42:35 2008 +0900 x86: make GART to respect device's dma_mask about virtual mappings I wrote the above commit to fix a GART alloc_coherent regression, that can't handle a device having dma_masks > 24bit < 32bits, introduced by the alloc_coherent rewrite: http://lkml.org/lkml/2008/8/12/200 After the alloc_coherent rewrite, GART alloc_coherent tried to allocate pages with GFP_DMA32. If GART got an address that a device can't access to, GART mapped the address to a virtual I/O address. But GART mapping mechanism didn't take account of dma mask, so GART could use a virtual I/O address that the device can't access to again. Alan pointed out: " This is indeed a specific problem found with things like older AACRAID where control blocks must be below 31bits and the GART is above 0x80000000. " The above commit modified GART mapping mechanism to take care of dma mask. But Andi pointed out, "The GART is somewhere in the 4GB range so you cannot use it to map anything < 4GB. Also GART is pretty small." http://lkml.org/lkml/2008/9/12/43 That means it's possible that GART doesn't have virtual I/O address space that a device can access to. The above commit (to modify GART mapping mechanism to take care of dma mask) can't fix the regression reliably so let's avoid making GART more complicated. We need a solution that always works for dma_masks > 24bit < 32bits. That's how GART worked before the alloc_coherent rewrite. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Acked-by: Alan Cox Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 9e390f1bd46..7e08e466b8a 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -83,34 +83,23 @@ static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, - unsigned long align_mask, u64 dma_mask) + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; unsigned long base_index; - unsigned long limit; base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; - limit = iommu_device_max_index(iommu_pages, - DIV_ROUND_UP(iommu_bus_base, PAGE_SIZE), - dma_mask >> PAGE_SHIFT); - spin_lock_irqsave(&iommu_bitmap_lock, flags); - - if (limit <= next_bit) { - need_flush = 1; - next_bit = 0; - } - - offset = iommu_area_alloc(iommu_gart_bitmap, limit, next_bit, + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, size, base_index, boundary_size, align_mask); - if (offset == -1 && next_bit) { + if (offset == -1) { need_flush = 1; - offset = iommu_area_alloc(iommu_gart_bitmap, limit, 0, + offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, size, base_index, boundary_size, align_mask); } @@ -239,14 +228,12 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir, unsigned long align_mask, - u64 dma_mask) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page; + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; - iommu_page = alloc_iommu(dev, npages, align_mask, dma_mask); if (iommu_page == -1) { if (!nonforced_iommu(dev, phys_mem, size)) return phys_mem; @@ -276,7 +263,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = dma_map_area(dev, paddr, size, dir, 0, dma_get_mask(dev)); + bus = dma_map_area(dev, paddr, size, dir, 0); flush_gart(); return bus; @@ -327,7 +314,6 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, { struct scatterlist *s; int i; - u64 dma_mask = dma_get_mask(dev); #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); @@ -337,8 +323,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir, 0, - dma_mask); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -360,16 +345,14 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start; - unsigned long iommu_page; + unsigned long iommu_start = alloc_iommu(dev, pages, 0); + unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; - iommu_start = alloc_iommu(dev, pages, 0, dma_get_mask(dev)); if (iommu_start == -1) return -1; - iommu_page = iommu_start; for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; @@ -522,7 +505,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, align_mask = (1UL << get_order(size)) - 1; *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, - align_mask, dma_mask); + align_mask); flush_gart(); if (*dma_addr != bad_dma_address) -- cgit v1.2.3 From 1d990882153f36723f9e8717c4401689e64c7a36 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 20:48:37 +0900 Subject: x86: restore old GART alloc_coherent behavior Currently, GART alloc_coherent tries to allocate pages with GFP_DMA32 for a device having dma_masks > 24bit < 32bits. If GART gets an address that a device can't access to, GART try to map the address to a virtual I/O address that the device can access to. But Andi pointed out, "The GART is somewhere in the 4GB range so you cannot use it to map anything < 4GB. Also GART is pretty small." http://lkml.org/lkml/2008/9/12/43 That is, it's possible that GART doesn't have virtual I/O address space that a device can access to. The above behavior doesn't work for a device having dma_masks > 24bit < 32bits. This patch restores old GART alloc_coherent behavior (before the alloc_coherent rewrite). Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 7e08e466b8a..25c94fb96d7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -487,31 +487,28 @@ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { - void *vaddr; dma_addr_t paddr; unsigned long align_mask; - u64 dma_mask = dma_alloc_coherent_mask(dev, flag); - - vaddr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); - if (!vaddr) - return NULL; - - paddr = virt_to_phys(vaddr); - if (is_buffer_dma_capable(dma_mask, paddr, size)) { - *dma_addr = paddr; - return vaddr; - } - - align_mask = (1UL << get_order(size)) - 1; - - *dma_addr = dma_map_area(dev, paddr, size, DMA_BIDIRECTIONAL, - align_mask); - flush_gart(); - - if (*dma_addr != bad_dma_address) - return vaddr; - - free_pages((unsigned long)vaddr, get_order(size)); + struct page *page; + + if (force_iommu && !(flag & GFP_DMA)) { + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + page = alloc_pages(flag | __GFP_ZERO, get_order(size)); + if (!page) + return NULL; + + align_mask = (1UL << get_order(size)) - 1; + paddr = dma_map_area(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, align_mask); + + flush_gart(); + if (paddr != bad_dma_address) { + *dma_addr = paddr; + return page_address(page); + } + __free_pages(page, get_order(size)); + } else + return dma_generic_alloc_coherent(dev, size, dma_addr, flag); return NULL; } -- cgit v1.2.3 From 1615965e54eb94d7bcd298d2163739bd79f602d4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 24 Sep 2008 22:41:10 +0900 Subject: x86 gart: remove unnecessary initialization There is no point to have such initialization in struct dma_mapping_ops. Signed-off-by: FUJITA Tomonori Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 25c94fb96d7..b85a2f9bb34 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -715,12 +715,6 @@ extern int agp_amd64_init(void); static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .unmap_single = gart_unmap_single, - .sync_single_for_cpu = NULL, - .sync_single_for_device = NULL, - .sync_single_range_for_cpu = NULL, - .sync_single_range_for_device = NULL, - .sync_sg_for_cpu = NULL, - .sync_sg_for_device = NULL, .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .alloc_coherent = gart_alloc_coherent, -- cgit v1.2.3 From f6476774f1fe32593d3d71903b1e98514efbf685 Mon Sep 17 00:00:00 2001 From: Bill Nottingham Date: Wed, 24 Sep 2008 14:35:17 -0400 Subject: x86_64: be less annoying on boot Remove mostly useless message on every boot. Signed-off-by: Bill Nottingham Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 9bfc4d72fb2..11aa501c9f4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -112,8 +112,6 @@ void __init x86_64_start_kernel(char * real_mode_data) x86_64_init_pda(); - early_printk("Kernel really alive\n"); - x86_64_start_reservations(real_mode_data); } -- cgit v1.2.3 From d7161a65341556bacb5e6654e133803f46f51063 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 26 Sep 2008 10:36:41 -0500 Subject: kgdb, x86, arm, mips, powerpc: ignore user space single stepping On the x86 arch, user space single step exceptions should be ignored if they occur in the kernel space, such as ptrace stepping through a system call. First check if it is kgdb that is executing a single step, then ensure it is not an accidental traversal into the user space, while in kgdb, any other time the TIF_SINGLESTEP is set, kgdb should ignore the exception. On x86, arm, mips and powerpc, the kgdb_contthread usage was inconsistent with the way single stepping is implemented in the kgdb core. The arch specific stub should always set the kgdb_cpu_doing_single_step correctly if it is single stepping. This allows kgdb to correctly process an instruction steps if ptrace happens to be requesting an instruction step over a system call. Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f47f0eb886b..00f7896c9a1 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -378,10 +378,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, if (remcomInBuffer[0] == 's') { linux_regs->flags |= X86_EFLAGS_TF; kgdb_single_step = 1; - if (kgdb_contthread) { - atomic_set(&kgdb_cpu_doing_single_step, - raw_smp_processor_id()); - } + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); } get_debugreg(dr6, 6); @@ -466,9 +464,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) == - raw_smp_processor_id() && - user_mode(regs)) - return single_step_cont(regs, args); + raw_smp_processor_id()) { + if (user_mode(regs)) + return single_step_cont(regs, args); + break; + } else if (test_thread_flag(TIF_SINGLESTEP)) + /* This means a user thread is single stepping + * a system call which should be ignored + */ + return NOTIFY_DONE; /* fall through */ default: if (user_mode(regs)) -- cgit v1.2.3 From 703a1edcd1534468fc18f733c03bd91a65c8c6f0 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 26 Sep 2008 10:36:42 -0500 Subject: kgdb, x86_64: fix PS CS SS registers in gdb serial On x86_64 the gdb serial register structure defines the PS (also known as eflags), CS and SS registers as 4 bytes entities. This patch splits the x86_64 regnames enum into a 32 and 64 version to account for the 32 bit entities in the gdb serial packets. Also the program counter is properly filled in for the sleeping threads. Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 00f7896c9a1..8282a213968 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -69,6 +69,9 @@ static int gdb_x86vector = -1; */ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif gdb_regs[GDB_AX] = regs->ax; gdb_regs[GDB_BX] = regs->bx; gdb_regs[GDB_CX] = regs->cx; @@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SI] = regs->si; gdb_regs[GDB_DI] = regs->di; gdb_regs[GDB_BP] = regs->bp; - gdb_regs[GDB_PS] = regs->flags; gdb_regs[GDB_PC] = regs->ip; #ifdef CONFIG_X86_32 + gdb_regs[GDB_PS] = regs->flags; gdb_regs[GDB_DS] = regs->ds; gdb_regs[GDB_ES] = regs->es; gdb_regs[GDB_CS] = regs->cs; @@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_R13] = regs->r13; gdb_regs[GDB_R14] = regs->r14; gdb_regs[GDB_R15] = regs->r15; + gdb_regs32[GDB_PS] = regs->flags; + gdb_regs32[GDB_CS] = regs->cs; + gdb_regs32[GDB_SS] = regs->ss; #endif gdb_regs[GDB_SP] = regs->sp; } @@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) */ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif gdb_regs[GDB_AX] = 0; gdb_regs[GDB_BX] = 0; gdb_regs[GDB_CX] = 0; @@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; #else - gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); - gdb_regs[GDB_PC] = 0; + gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); + gdb_regs32[GDB_CS] = __KERNEL_CS; + gdb_regs32[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_PC] = p->thread.ip; gdb_regs[GDB_R8] = 0; gdb_regs[GDB_R9] = 0; gdb_regs[GDB_R10] = 0; @@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) */ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) { +#ifndef CONFIG_X86_32 + u32 *gdb_regs32 = (u32 *)gdb_regs; +#endif regs->ax = gdb_regs[GDB_AX]; regs->bx = gdb_regs[GDB_BX]; regs->cx = gdb_regs[GDB_CX]; @@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) regs->si = gdb_regs[GDB_SI]; regs->di = gdb_regs[GDB_DI]; regs->bp = gdb_regs[GDB_BP]; - regs->flags = gdb_regs[GDB_PS]; regs->ip = gdb_regs[GDB_PC]; #ifdef CONFIG_X86_32 + regs->flags = gdb_regs[GDB_PS]; regs->ds = gdb_regs[GDB_DS]; regs->es = gdb_regs[GDB_ES]; regs->cs = gdb_regs[GDB_CS]; @@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) regs->r13 = gdb_regs[GDB_R13]; regs->r14 = gdb_regs[GDB_R14]; regs->r15 = gdb_regs[GDB_R15]; + regs->flags = gdb_regs32[GDB_PS]; + regs->cs = gdb_regs32[GDB_CS]; + regs->ss = gdb_regs32[GDB_SS]; #endif } -- cgit v1.2.3 From 7fc2368d1d0dce7a778beb2fba3acac8fa7a34b6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:07 -0700 Subject: x86: don't need to go to chunksize to 4G change back chunksize max to 2g otherwise will get strange layout in 2G ram system like 0 - 4g WB, 2040M - 2048M UC, 2048M - 4G NC instead of 0 - 2g WB, 2040M - 2048M UC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a56..cc135572882 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1155,10 +1155,10 @@ struct mtrr_cleanup_result { /* * gran_size: 1M, 2M, ..., 2G - * chunk size: gran_size, ..., 4G - * so we need (2+13)*6 + * chunk size: gran_size, ..., 2G + * so we need (1+12)*6 */ -#define NUM_RESULT 90 +#define NUM_RESULT 78 #define PSHIFT (PAGE_SHIFT - 10) static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; @@ -1276,7 +1276,7 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { - for (chunk_size = gran_size; chunk_size < (1ULL<<33); + for (chunk_size = gran_size; chunk_size < (1ULL<<32); chunk_size <<= 1) { int num_reg; -- cgit v1.2.3 From 2313c2793d290a8cc37c428f8622c53f3fe1d6dc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:08 -0700 Subject: x86: mtrr_cleanup optimization, v2 fix hpa's t61 with 4g ram: change layout from (n - 1)*chunksize + chunk_size - NC to n*chunksize - NC Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 79 +++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 42 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index cc135572882..93d575ac61a 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -970,6 +970,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, /* try to append some small hole */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + + /* no increase */ if (range0_sizek == state->range_sizek) { if (debug_print) printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", @@ -980,13 +982,13 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, return 0; } - range0_sizek -= chunk_sizek; - if (range0_sizek && sizek) { - while (range0_basek + range0_sizek > (basek + sizek)) { - range0_sizek -= chunk_sizek; - if (!range0_sizek) - break; - } + /* only cut back, when it is not the last */ + if (sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { + range0_sizek -= chunk_sizek; + if (!range0_sizek) + break; + } } if (range0_sizek) { @@ -1000,46 +1002,39 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, } range_basek = range0_basek + range0_sizek; - range_sizek = chunk_sizek; - if (range_basek + range_sizek > basek && - range_basek + range_sizek <= (basek + sizek)) { - /* one hole */ - second_basek = basek; - second_sizek = range_basek + range_sizek - basek; - } + /* one hole in the middle */ + if (range_basek > basek && range_basek <= (basek + sizek)) + second_sizek = range_basek - basek; - /* if last piece, only could one hole near end */ - if ((second_basek || !basek) && - range_sizek - (state->range_sizek - range0_sizek) - second_sizek < - (chunk_sizek >> 1)) { - /* - * one hole in middle (second_sizek is 0) or at end - * (second_sizek is 0 ) - */ - hole_sizek = range_sizek - (state->range_sizek - range0_sizek) - - second_sizek; - hole_basek = range_basek + range_sizek - hole_sizek - - second_sizek; - } else { - /* fallback for big hole, or several holes */ + if (range0_sizek > state->range_sizek) { + unsigned long hole_basek, hole_sizek; + + /* one hole in middle or at end */ + hole_sizek = range0_sizek - state->range_sizek - second_sizek; + if (hole_sizek) { + hole_basek = range_basek - hole_sizek - second_sizek; + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, + MTRR_TYPE_UNCACHABLE); + } + } else { + /* need to handle left over */ range_sizek = state->range_sizek - range0_sizek; - second_basek = 0; - second_sizek = 0; - } - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, - (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, + if (range_sizek) { + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK); - if (hole_sizek) { - if (debug_print) - printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, - MTRR_TYPE_UNCACHABLE); - + } } return second_sizek; -- cgit v1.2.3 From 54d45ff4208836e62536fc40b0141586dbf6641f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 00:30:06 -0700 Subject: x86: add mtrr_cleanup_debug command line add mtrr_cleanup_debug to print out more info about layout Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 93d575ac61a..aff46b99ff0 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -836,6 +836,13 @@ static int __init enable_mtrr_cleanup_setup(char *str) } early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); +static int __init mtrr_cleanup_debug_setup(char *str) +{ + debug_print = 1; + return 0; +} +early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); + struct var_mtrr_state { unsigned long range_startk; unsigned long range_sizek; @@ -1227,7 +1234,7 @@ static int __init mtrr_cleanup(unsigned address_bits) if (mtrr_chunk_size && mtrr_gran_size) { int num_reg; - debug_print = 1; + debug_print++; /* convert ranges to var ranges state */ num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, mtrr_gran_size); @@ -1263,7 +1270,7 @@ static int __init mtrr_cleanup(unsigned address_bits) } printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " "will find optimal one\n"); - debug_print = 0; + debug_print--; memset(result, 0, sizeof(result[0])); } @@ -1366,8 +1373,9 @@ static int __init mtrr_cleanup(unsigned address_bits) chunk_size <<= 10; gran_size = result[i].gran_sizek; gran_size <<= 10; - debug_print = 1; + debug_print++; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + debug_print--; set_var_mtrr_all(address_bits); return 1; } -- cgit v1.2.3 From 237a62247c2879331986a300d6ab36ad21264c68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:53 +0200 Subject: x86/iommu: make GART driver checkpatch clean Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index b85a2f9bb34..aa569db73a2 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -27,8 +27,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -175,7 +175,8 @@ static void dump_leak(void) iommu_leak_pages); for (i = 0; i < iommu_leak_pages; i += 2) { printk(KERN_DEBUG "%lu: ", iommu_pages-i); - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); + printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], + 0); printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); } printk(KERN_DEBUG "\n"); @@ -688,7 +689,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) if (!error) error = sysdev_register(&device_gart); if (error) - panic("Could not register gart_sysdev -- would corrupt data on next suspend"); + panic("Could not register gart_sysdev -- " + "would corrupt data on next suspend"); flush_gart(); @@ -710,8 +712,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) return -1; } -extern int agp_amd64_init(void); - static struct dma_mapping_ops gart_dma_ops = { .map_single = gart_map_single, .unmap_single = gart_unmap_single, @@ -777,8 +777,8 @@ void __init gart_iommu_init(void) (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { printk(KERN_WARNING "More than 4GB of memory " - "but GART IOMMU not available.\n" - KERN_WARNING "falling back to iommu=soft.\n"); + "but GART IOMMU not available.\n"); + printk(KERN_WARNING "falling back to iommu=soft.\n"); } return; } @@ -868,7 +868,8 @@ void __init gart_parse_options(char *p) if (!strncmp(p, "leak", 4)) { leak_trace = 1; p += 4; - if (*p == '=') ++p; + if (*p == '=') + ++p; if (isdigit(*p) && get_option(&p, &arg)) iommu_leak_pages = arg; } -- cgit v1.2.3 From 3610f2116e961cdcbd3546a3828470f7aa636212 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:13:54 +0200 Subject: x86/iommu: convert GART need_flush to bool Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aa569db73a2..aecea068f58 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved; AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ -static int need_flush; /* global flush state. set for each gart wrap */ +static bool need_flush; /* global flush state. set for each gart wrap */ static unsigned long alloc_iommu(struct device *dev, int size, unsigned long align_mask) @@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, size, base_index, boundary_size, align_mask); if (offset == -1) { - need_flush = 1; + need_flush = true; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, size, base_index, boundary_size, align_mask); @@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size, next_bit = offset+size; if (next_bit >= iommu_pages) { next_bit = 0; - need_flush = 1; + need_flush = true; } } if (iommu_fullflush) - need_flush = 1; + need_flush = true; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); return offset; @@ -136,7 +136,7 @@ static void flush_gart(void) spin_lock_irqsave(&iommu_bitmap_lock, flags); if (need_flush) { k8_flush_garts(); - need_flush = 0; + need_flush = false; } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } -- cgit v1.2.3 From 0114267be1bebc2e9c913b19579900153583d617 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 25 Sep 2008 12:42:12 +0200 Subject: x86/iommu: use __GFP_ZERO instead of memset for GART Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index aecea068f58..d077116fec1 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -674,13 +674,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) info->aper_size = aper_size >> 20; gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); - gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); + gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(gatt_size)); if (!gatt) panic("Cannot allocate GATT table"); if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) panic("Could not set GART PTEs to uncacheable pages"); - memset(gatt, 0, gatt_size); agp_gatt_table = gatt; enable_gart_translations(); @@ -788,19 +788,16 @@ void __init gart_iommu_init(void) iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_pages = iommu_size >> PAGE_SHIFT; - iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, + iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(iommu_pages/8)); if (!iommu_gart_bitmap) panic("Cannot allocate iommu bitmap\n"); - memset(iommu_gart_bitmap, 0, iommu_pages/8); #ifdef CONFIG_IOMMU_LEAK if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, + iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, get_order(iommu_pages*sizeof(void *))); - if (iommu_leak_tab) - memset(iommu_leak_tab, 0, iommu_pages * 8); - else + if (!iommu_leak_tab) printk(KERN_DEBUG "PCI-DMA: Cannot allocate leak trace area\n"); } -- cgit v1.2.3 From 8f0afaa58e912bbe7d5b0bad9fb024337edf363e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 27 Sep 2008 20:26:06 -0700 Subject: x86: mtrr_cleanup hole size should be less than half of chunk_size, v2 v2: should check with half of range0 size instead of chunk_size So don't have silly big hole. in hpa's case we could auto detect instead of adding mtrr_chunk_size in command line. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 74 ++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 31 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index aff46b99ff0..bccf57f5b61 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -992,22 +992,17 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, /* only cut back, when it is not the last */ if (sizek) { while (range0_basek + range0_sizek > (basek + sizek)) { - range0_sizek -= chunk_sizek; + if (range0_sizek >= chunk_sizek) + range0_sizek -= chunk_sizek; + else + range0_sizek = 0; + if (!range0_sizek) break; } } - if (range0_sizek) { - if (debug_print) - printk(KERN_DEBUG "range0: %016lx - %016lx\n", - range0_basek<<10, - (range0_basek + range0_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range0_basek, - range0_sizek, MTRR_TYPE_WRBACK); - - } - +second_try: range_basek = range0_basek + range0_sizek; /* one hole in the middle */ @@ -1015,33 +1010,50 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, second_sizek = range_basek - basek; if (range0_sizek > state->range_sizek) { - unsigned long hole_basek, hole_sizek; /* one hole in middle or at end */ hole_sizek = range0_sizek - state->range_sizek - second_sizek; - if (hole_sizek) { - hole_basek = range_basek - hole_sizek - second_sizek; - if (debug_print) - printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, - (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, - hole_sizek, - MTRR_TYPE_UNCACHABLE); + + /* hole size should be less than half of range0 size */ + if (hole_sizek > (range0_sizek >> 1) && + range0_sizek >= chunk_sizek) { + range0_sizek -= chunk_sizek; + second_sizek = 0; + hole_sizek = 0; + + goto second_try; } - } else { + } + + if (range0_sizek) { + if (debug_print) + printk(KERN_DEBUG "range0: %016lx - %016lx\n", + range0_basek<<10, + (range0_basek + range0_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range0_basek, + range0_sizek, MTRR_TYPE_WRBACK); + } + + if (range0_sizek < state->range_sizek) { /* need to handle left over */ range_sizek = state->range_sizek - range0_sizek; - if (range_sizek) { - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", - range_basek<<10, - (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, - range_sizek, - MTRR_TYPE_WRBACK); - } + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK); + } + + if (hole_sizek) { + hole_basek = range_basek - hole_sizek - second_sizek; + if (debug_print) + printk(KERN_DEBUG "hole: %016lx - %016lx\n", + hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, MTRR_TYPE_UNCACHABLE); } return second_sizek; -- cgit v1.2.3 From 12544697f12e0ecdcf971075415c7678fae502af Mon Sep 17 00:00:00 2001 From: dcg Date: Sun, 28 Sep 2008 18:49:46 +0200 Subject: x86_64: be less annoying on boot, v2 Honour "quiet" boot parameter in early_printk() calls Signed-off-by: Diego Calleja Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 11aa501c9f4..d16084f9064 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -108,7 +108,8 @@ void __init x86_64_start_kernel(char * real_mode_data) } load_idt((const struct desc_ptr *)&idt_descr); - early_printk("Kernel alive\n"); + if (console_loglevel == 10) + early_printk("Kernel alive\n"); x86_64_init_pda(); -- cgit v1.2.3 From 73436a1d2501575f9c2e6ddb26889145e23cefd8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 13:39:17 -0700 Subject: x86: mtrr_cleanup safe to get more spare regs now Delay exit to make sure we can actually get the optimal result in as many cases as possible. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index bccf57f5b61..ae0ca97e20b 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1353,10 +1353,8 @@ static int __init mtrr_cleanup(unsigned address_bits) nr_mtrr_spare_reg = num_var_ranges - 1; num_reg_good = -1; for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { - if (!min_loss_pfn[i]) { + if (!min_loss_pfn[i]) num_reg_good = i; - break; - } } index_good = -1; -- cgit v1.2.3 From dd7e52224fd7438d701b4bb9834a9ddc06828210 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 18:54:11 -0700 Subject: x86: mtrr_cleanup prepare to make gran_size to less 1M make the print out right with size < 1M Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 103 ++++++++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index ae0ca97e20b..b1bd1038c91 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -905,6 +905,27 @@ set_var_mtrr_all(unsigned int address_bits) } } +static unsigned long to_size_factor(unsigned long sizek, char *factorp) +{ + char factor; + unsigned long base = sizek; + + if (base & ((1<<10) - 1)) { + /* not MB alignment */ + factor = 'K'; + } else if (base & ((1<<20) - 1)){ + factor = 'M'; + base >>= 10; + } else { + factor = 'G'; + base >>= 20; + } + + *factorp = factor; + + return base; +} + static unsigned int __init range_to_mtrr(unsigned int reg, unsigned long range_startk, unsigned long range_sizek, unsigned char type) @@ -926,13 +947,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1 << align; - if (debug_print) + if (debug_print) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + start_base = to_size_factor(range_startk, &start_factor), + size_base = to_size_factor(sizek, &size_factor), + printk(KERN_DEBUG "Setting variable MTRR %d, " - "base: %ldMB, range: %ldMB, type %s\n", - reg, range_startk >> 10, sizek >> 10, + "base: %ld%cB, range: %ld%cB, type %s\n", + reg, start_base, start_factor, + size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE)?"UC": ((type == MTRR_TYPE_WRBACK)?"WB":"Other") ); + } save_var_mtrr(reg++, range_startk, sizek, type); range_startk += sizek; range_sizek -= sizek; @@ -1245,6 +1274,8 @@ static int __init mtrr_cleanup(unsigned address_bits) if (mtrr_chunk_size && mtrr_gran_size) { int num_reg; + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; debug_print++; /* convert ranges to var ranges state */ @@ -1270,12 +1301,15 @@ static int __init mtrr_cleanup(unsigned address_bits) result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad?"*BAD*":" ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", result[i].num_reg, result[i].bad?"-":"", - result[i].lose_cover_sizek >> 10); + lose_base, lose_factor); if (!result[i].bad) { set_var_mtrr_all(address_bits); return 1; @@ -1290,14 +1324,25 @@ static int __init mtrr_cleanup(unsigned address_bits) memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + char gran_factor; + unsigned long gran_base; + + if (debug_print) + gran_base = to_size_factor(gran_size >> 10, &gran_factor); + for (chunk_size = gran_size; chunk_size < (1ULL<<32); chunk_size <<= 1) { int num_reg; - if (debug_print) - printk(KERN_INFO - "\ngran_size: %lldM chunk_size_size: %lldM\n", - gran_size >> 20, chunk_size >> 20); + if (debug_print) { + char chunk_factor; + unsigned long chunk_base; + + chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), + printk(KERN_INFO "\n"); + printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", + gran_base, gran_factor, chunk_base, chunk_factor); + } if (i >= NUM_RESULT) continue; @@ -1340,12 +1385,18 @@ static int __init mtrr_cleanup(unsigned address_bits) /* print out all */ for (i = 0; i < NUM_RESULT; i++) { - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", - result[i].num_reg, result[i].bad?"-":"", - result[i].lose_cover_sizek >> 10); + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad?"*BAD*":" ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", + result[i].num_reg, result[i].bad?"-":"", + lose_base, lose_factor); } /* try to find the optimal index */ @@ -1370,14 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits) } if (index_good != -1) { + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); i = index_good; - printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", - result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", - result[i].num_reg, - result[i].lose_cover_sizek >> 10); + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", + result[i].num_reg, lose_base, lose_factor); /* convert ranges to var ranges state */ chunk_size = result[i].chunk_sizek; chunk_size <<= 10; -- cgit v1.2.3 From 4624065731751a3ace88e5824d8e5654e2d7abd3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 29 Sep 2008 18:54:12 -0700 Subject: x86: mtrr_cleanup try gran_size to less than 1M one have gran < 1M reg00: base=0xd8000000 (3456MB), size= 128MB: uncachable, count=1 reg01: base=0xe0000000 (3584MB), size= 512MB: uncachable, count=1 reg02: base=0x00000000 ( 0MB), size=4096MB: write-back, count=1 reg03: base=0x100000000 (4096MB), size= 512MB: write-back, count=1 reg04: base=0x120000000 (4608MB), size= 128MB: write-back, count=1 reg05: base=0xd7f80000 (3455MB), size= 512KB: uncachable, count=1 will get Found optimal setting for mtrr clean up gran_size: 512K chunk_size: 2M num_reg: 7 lose RAM: 0G range0: 0000000000000000 - 00000000d8000000 Setting variable MTRR 0, base: 0GB, range: 2GB, type WB Setting variable MTRR 1, base: 2GB, range: 1GB, type WB Setting variable MTRR 2, base: 3GB, range: 256MB, type WB Setting variable MTRR 3, base: 3328MB, range: 128MB, type WB hole: 00000000d7f00000 - 00000000d7f80000 Setting variable MTRR 4, base: 3455MB, range: 512KB, type UC rangeX: 0000000100000000 - 0000000128000000 Setting variable MTRR 5, base: 4GB, range: 512MB, type WB Setting variable MTRR 6, base: 4608MB, range: 128MB, type WB so start from 64k instead of 1M Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b1bd1038c91..8f1a342b16b 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1197,11 +1197,11 @@ struct mtrr_cleanup_result { }; /* - * gran_size: 1M, 2M, ..., 2G + * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G * chunk size: gran_size, ..., 2G - * so we need (1+12)*6 + * so we need (1+16)*8 */ -#define NUM_RESULT 78 +#define NUM_RESULT 136 #define PSHIFT (PAGE_SHIFT - 10) static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; @@ -1323,7 +1323,7 @@ static int __init mtrr_cleanup(unsigned address_bits) i = 0; memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); - for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { + for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { char gran_factor; unsigned long gran_base; -- cgit v1.2.3 From 3dcd7e269d2223126f6ee9bc893f5a6166e1770d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2EA=2E=20Magall=C3=B3n?= Date: Tue, 30 Sep 2008 10:02:52 +0200 Subject: x86: fix typo in enable_mtrr_cleanup early parameter Correct typo for 'enable_mtrr_cleanup' early boot param name. Signed-off-by: J.A. Magallon Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b117d7f8a56..885c8265e6b 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -834,7 +834,7 @@ static int __init enable_mtrr_cleanup_setup(char *str) enable_mtrr_cleanup = 1; return 0; } -early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup); +early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); struct var_mtrr_state { unsigned long range_startk; -- cgit v1.2.3 From 9b1568458a3ef006361710dc12848aec891883b5 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Mon, 29 Sep 2008 14:52:03 -0400 Subject: x86, debug printouts: IOMMU setup failures should not be KERN_ERR The number of BIOSes that have an option to enable the IOMMU, or fix anything about its configuration, is vanishingly small. There's no good reason to punish quiet boot for this. Signed-off-by: Adam Jackson Signed-off-by: Ingo Molnar --- arch/x86/kernel/aperture_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 44e21826db1..9a32b37ee2e 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -455,11 +455,11 @@ out: force_iommu || valid_agp || fallback_aper_force) { - printk(KERN_ERR + printk(KERN_INFO "Your BIOS doesn't leave a aperture memory hole\n"); - printk(KERN_ERR + printk(KERN_INFO "Please enable the IOMMU option in the BIOS setup\n"); - printk(KERN_ERR + printk(KERN_INFO "This costs you %d MB of RAM\n", 32 << fallback_aper_order); -- cgit v1.2.3 From de59985e3a623d4d5d6207f1777398ca0606ab1c Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 30 Sep 2008 11:02:12 -0700 Subject: x86: Fix broken LDT access in VMI After investigating a JRE failure, I found this bug was introduced a long time ago, and had already managed to survive another bugfix which occurred on the same line. The result is a total failure of the JRE due to LDT selectors not working properly. This one took a long time to rear up because LDT usage is not very common, but the bug is quite serious. It got introduced along with another bug, already fixed, by 75b8bb3e56ca09a467fbbe5229bc68627f7445be Signed-off-by: Zachary Amsden Cc: Ingo Molnar Cc: Glauber de Oliveira Costa Cc: Signed-off-by: Linus Torvalds --- arch/x86/kernel/vmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 6ca515d6db5..edfb09f3047 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { u32 *ldt_entry = (u32 *)desc; - vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); + vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); } static void vmi_load_sp0(struct tss_struct *tss, -- cgit v1.2.3 From dc63b52673d71f9d49b9d72d263a9f32df18c3ee Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Tue, 30 Sep 2008 11:02:12 -0700 Subject: x86, vmi: fix broken LDT access This one took a long time to rear up because LDT usage is not very common, but the bug is quite serious. It got introduced along with another bug, already fixed, by 75b8bb3e56ca09a467fbbe5229bc68627f7445be After investigating a JRE failure, I found this bug was introduced a long time ago, and had already managed to survive another bugfix which occurred on the same line. The result is a total failure of the JRE due to LDT selectors not working properly. Signed-off-by: Zachary Amsden Cc: Glauber de Oliveira Costa Cc: stable@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 6ca515d6db5..edfb09f3047 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -235,7 +235,7 @@ static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { u32 *ldt_entry = (u32 *)desc; - vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); + vmi_ops.write_ldt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); } static void vmi_load_sp0(struct tss_struct *tss, -- cgit v1.2.3 From 40becd8d5af03ee7935e79c3fccd0d1f380d95b4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 29 Sep 2008 00:06:36 +0900 Subject: AMD IOMMU: use iommu_device_max_index AMD IOMMU can use iommu_device_max_index() instead of the homegrown function. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c19212191c9..3b346c6f551 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -470,10 +470,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, * efficient allocator. * ****************************************************************************/ -static unsigned long dma_mask_to_pages(unsigned long mask) -{ - return PAGE_ALIGN(mask) >> PAGE_SHIFT; -} /* * The address allocator core function. @@ -486,14 +482,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, unsigned long align_mask, u64 dma_mask) { - unsigned long limit = dma_mask_to_pages(dma_mask); + unsigned long limit; unsigned long address; - unsigned long size = dom->aperture_size >> PAGE_SHIFT; unsigned long boundary_size; boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; - limit = limit < size ? limit : size; + limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, + dma_mask >> PAGE_SHIFT); if (dom->next_bit >= limit) { dom->next_bit = 0; -- cgit v1.2.3 From 136c82c6f7f12c9bed8bf709f92123077966512c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2EA=2E=20Magall=C3=B3n?= Date: Fri, 3 Oct 2008 00:32:37 +0200 Subject: x86: mtrr_cleanup try gran_size to less than 1M, cleanup Patch below cleans up formatting, with space for big bases and sizes (64 Gb). Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 84c480bb371..4c4214690dd 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) } /* RED-PEN: base can be > 32bit */ len += seq_printf(seq, - "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", + "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_attrib_to_str(type), mtrr_usage_table[i]); + mtrr_usage_table[i], mtrr_attrib_to_str(type)); } } return 0; -- cgit v1.2.3 From 834836ee6a3a9deadff9394b23db965a08bcde35 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 2 Oct 2008 15:46:20 -0700 Subject: x86: mtrr_cleanup try gran_size to less than 1M, v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit J.A. Magallón reported: >> Also, on a 64 bit box with 4Gb, it gives this: >> >> cicely:~# cat /proc/mtrr >> reg00: base=0x00000000 ( 0MB), size=4096MB: write-back, count=1 >> reg01: base=0x100000000 (4096MB), size=1024MB: write-back, count=1 >> reg02: base=0x140000000 (5120MB), size= 512MB: write-back, count=1 >> reg03: base=0x160000000 (5632MB), size= 256MB: write-back, count=1 >> reg04: base=0x80000000 (2048MB), size=2048MB: uncachable, count=1 boundary handling has a problem ... fix it. Reported-by: J.A. Magallón Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8f1a342b16b..b4a3f0c1a5e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1044,7 +1044,7 @@ second_try: hole_sizek = range0_sizek - state->range_sizek - second_sizek; /* hole size should be less than half of range0 size */ - if (hole_sizek > (range0_sizek >> 1) && + if (hole_sizek >= (range0_sizek >> 1) && range0_sizek >= chunk_sizek) { range0_sizek -= chunk_sizek; second_sizek = 0; -- cgit v1.2.3 From 8bb39311bf461243f6cade3644764d848516dd23 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 2 Oct 2008 23:26:59 -0700 Subject: x86, debug: mtrr_cleanup print out var mtrr before change it Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/main.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index b4a3f0c1a5e..406074c46f1 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1211,13 +1211,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; static int __init mtrr_cleanup(unsigned address_bits) { unsigned long extra_remove_base, extra_remove_size; - unsigned long i, base, size, def, dummy; + unsigned long base, size, def, dummy; mtrr_type type; int nr_range, nr_range_new; u64 chunk_size, gran_size; unsigned long range_sums, range_sums_new; int index_good; int num_reg_good; + int i; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1259,6 +1260,28 @@ static int __init mtrr_cleanup(unsigned address_bits) num_var_ranges - num[MTRR_NUM_TYPES]) return 0; + /* print original var MTRRs at first, for debugging: */ + printk(KERN_DEBUG "original variable MTRRs\n"); + for (i = 0; i < num_var_ranges; i++) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); + if (!size_base) + continue; + + size_base = to_size_factor(size_base, &size_factor), + start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); + start_base = to_size_factor(start_base, &start_factor), + + printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + i, start_base, start_factor, + size_base, size_factor, + (type == MTRR_TYPE_UNCACHABLE) ? "UC" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") + ); + } + memset(range, 0, sizeof(range)); extra_remove_size = 0; if (mtrr_tom2) { -- cgit v1.2.3 From 175e438f7a2de9d94110046be48697969569736a Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Thu, 2 Oct 2008 17:32:06 -0500 Subject: x86: trivial printk fix in efi.c [patch] x86: Trivial printk fix in efi.c The following line is lacking a space between "memdesc" and "doesn't". "Kernel-defined memdescdoesn't match the one from EFI!" Fixed the printk by adding a space. Signed-off-by: Russ Anderson Cc: Russ Anderson Signed-off-by: Ingo Molnar --- arch/x86/kernel/efi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 06cc8d4254b..945a31cdd81 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -414,9 +414,11 @@ void __init efi_init(void) if (memmap.map == NULL) printk(KERN_ERR "Could not map the EFI memory map!\n"); memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + if (memmap.desc_size != sizeof(efi_memory_desc_t)) - printk(KERN_WARNING "Kernel-defined memdesc" - "doesn't match the one from EFI!\n"); + printk(KERN_WARNING + "Kernel-defined memdesc doesn't match the one from EFI!\n"); + if (add_efi_memmap) do_add_efi_memmap(); -- cgit v1.2.3 From 42fde7a05c5d6dc76e518ae12091ea897b1c132b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 19:34:18 -0700 Subject: x86: mtrr_cleanup: print out correct type v2 Print out the correct type when the Write Protected (WP) type is seen. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 406074c46f1..9086b38fbab 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1273,12 +1273,14 @@ static int __init mtrr_cleanup(unsigned address_bits) size_base = to_size_factor(size_base, &size_factor), start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); start_base = to_size_factor(start_base, &start_factor), + type = range_state[i].type; printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", i, start_base, start_factor, size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE) ? "UC" : - ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other") + ((type == MTRR_TYPE_WRPROT) ? "WP" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) ); } -- cgit v1.2.3 From 99e1aa17ce434010dd820b583628370cc15f10f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 14:50:32 -0700 Subject: x86: mtrr_cleanup: first 1M may be covered in var mtrrs The first 1M is don't care when it comes to the variables MTRRs. Cover it as WB as a heuristic approximation; this is generally what we want to minimize the number of registers. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 9086b38fbab..663e530e08e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1293,6 +1293,15 @@ static int __init mtrr_cleanup(unsigned address_bits) } nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); + /* + * [0, 1M) should always be coverred by var mtrr with WB + * and fixed mtrrs should take effective before var mtrr for it + */ + nr_range = add_range_with_merge(range, nr_range, 0, + (1ULL<<(20 - PAGE_SHIFT)) - 1); + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + range_sums = sum_ranges(range, nr_range); printk(KERN_INFO "total RAM coverred: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); -- cgit v1.2.3 From dd5523552c2897e3fde16fc2fc8f6332addf66ab Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 14:50:33 -0700 Subject: x86: mtrr_cleanup: treat WRPROT as UNCACHEABLE For the purpose of MTRR canonicalization, treat WRPROT as UNCACHEABLE. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 663e530e08e..5994a9f78f3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, /* take out UC ranges */ for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; - if (type != MTRR_TYPE_UNCACHABLE) + if (type != MTRR_TYPE_UNCACHABLE && + type != MTRR_TYPE_WRPROT) continue; size = range_state[i].size_pfn; if (!size) @@ -1248,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits) continue; if (!size) type = MTRR_NUM_TYPES; + if (type == MTRR_TYPE_WRPROT) + type = MTRR_TYPE_UNCACHABLE; num[type]++; } -- cgit v1.2.3 From d99e90164e6cf2eb85fa94d547d6336f8127a107 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 15:55:12 -0700 Subject: x86: gart iommu have direct mapping when agp is present too move init_memory_mapping() out of init_k8_gatt. for: http://bugzilla.kernel.org/show_bug.cgi?id=11676 2.6.27-rc2 to rc8, apgart fails, iommu=soft works, regression This is needed because we need to map the GART aperture even if the GATT is not initialized. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8fd4d..be33a5442d8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -626,7 +626,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) struct pci_dev *dev; void *gatt; int i, error; - unsigned long start_pfn, end_pfn; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -672,12 +671,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); - /* need to map that range */ - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); - if (end_pfn > max_low_pfn_mapped) { - start_pfn = (aper_base>>PAGE_SHIFT); - init_memory_mapping(start_pfn<>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { + start_pfn = (aper_base>>PAGE_SHIFT); + init_memory_mapping(start_pfn<> PAGE_SHIFT; -- cgit v1.2.3 From e84956f92a846246b09b34f2a728329c386d250f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Oct 2008 11:59:29 +0200 Subject: x86 ACPI: Blacklist two HP machines with buggy BIOSes There is a bug in the BIOSes of some HP boxes with AMD Turions which connects IO-APIC pins with ACPI thermal trip points in such a way that if the state of the IO-APIC is not as expected by the (buggy) BIOS, the thermal trip points are set to insanely low values (usually all of them become 16 degrees Celsius). As a result, thermal throttling kicks in and knock the system down to its shoes. Unfortunately some of the recent IO-APIC changes made the bug show up. To prevent this from happening, blacklist machines that are known to be affected (nx6115 and 6715b in this particular case). This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11516 listed as a regression from 2.6.26. On my box it was caused by: commit 691874fa96d6349a8b60f8ea9c2bae52ece79941 Author: Maciej W. Rozycki Date: Tue May 27 21:19:51 2008 +0100 x86: I/O APIC: timer through 8259A second-chance Signed-off-by: Maciej W. Rozycki Signed-off-by: Ingo Molnar and the whole story is described in this (huge) thread: http://marc.info/?l=linux-kernel&m=121358440508410&w=4 Matthew Garrett told us about that happening on the nx6125: http://marc.info/?l=linux-kernel&m=121396307411930&w=4 and then Maciej analysed the breakage on the basis of a DSDT from the nx6325: http://marc.info/?l=linux-kernel&m=121401068718826&w=4 As far as the Dmitry's and Jason's boxes are concerned, I recognized the symptoms and asked them to verify that the blacklisting helped. It appears that the buggy BIOS code has been copy-pasted to the entire range of machines, for no good reason. Signed-off-by: Rafael J. Wysocki Tested-by: Dmitry Torokhov Tested-by: Jason Vas Dias Signed-off-by: Linus Torvalds --- arch/x86/kernel/acpi/boot.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index bfd10fd211c..c102af85df9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1603,6 +1603,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { * is not connected at all. Force ignoring BIOS IRQ0 pin2 * override in that cases. */ + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP nx6115 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), + }, + }, { .callback = dmi_ignore_irq0_timer_override, .ident = "HP NX6125 laptop", @@ -1619,6 +1627,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP 6715b laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), + }, + }, {} }; -- cgit v1.2.3 From e85ceae9102f6e3c1d707e7ac88fa48d252e9cfa Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 6 Oct 2008 13:50:59 -0500 Subject: kgdb, x86: Avoid invoking kgdb_nmicallback twice per NMI Stress-testing KVM's latest NMI support with kgdbts inside an SMP guest, I came across spurious unhandled NMIs while running the singlestep test. Looking closer at the code path each NMI takes when KGDB is enabled, I noticed that kgdb_nmicallback is called twice per event: One time via DIE_NMI_IPI notification, the second time on DIE_NMI. Removing the first invocation cures the unhandled NMIs here. Signed-off-by: Jan Kiszka Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8282a213968..10435a120d2 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -455,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_DONE; case DIE_NMI_IPI: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - was_in_debug_nmi[raw_smp_processor_id()] = 1; - touch_nmi_watchdog(); - } + /* Just ignore, we will handle the roundup on DIE_NMI. */ return NOTIFY_DONE; case DIE_NMIUNKNOWN: -- cgit v1.2.3 From 8d5922572038bae9f7b16fcb974eee806727b44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=A9meth=20M=C3=A1rton?= Date: Thu, 9 Oct 2008 14:59:17 +0200 Subject: [CPUFREQ] correct broken links and email addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the no longer working links and email address in the documentation and in source code. Signed-off-by: Márton Németh Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index f1685fb91fb..b8e05ee4f73 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) } if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); return 0; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 15e13c01cc3..3b5f06423e7 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -26,7 +26,7 @@ #include #define PFX "speedstep-centrino: " -#define MAINTAINER "cpufreq@lists.linux.org.uk" +#define MAINTAINER "cpufreq@vger.kernel.org" #define dprintk(msg...) \ cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) -- cgit v1.2.3 From 18c6faa9624bf5d9d7da3906a8a1b909dba5899b Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Mon, 28 Jul 2008 13:00:49 +0200 Subject: [CPUFREQ] Coding style fixes to arch/x86/kernel/cpu/cpufreq/elanfreq.c Before: total: 15 errors, 10 warnings, 308 lines checked After: total: 0 errors, 4 warnings, 308 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/elafreq.o.* add1d36c2f077c5aab7682e8642a9f34 /tmp/elafreq.o.after add1d36c2f077c5aab7682e8642a9f34 /tmp/elafreq.o.before paolo@paolo-desktop:~/linux.trees.git$ size /tmp/elafreq.o.* text data bss dec hex filename 934 270 4 1208 4b8 /tmp/elafreq.o.after 934 270 4 1208 4b8 /tmp/elafreq.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/elanfreq.c | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index e4a4bf870e9..fe613c93b36 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -25,8 +25,8 @@ #include #include -#include -#include +#include +#include #define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ @@ -82,7 +82,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) u8 clockspeed_reg; /* Clock Speed Register */ local_irq_disable(); - outb_p(0x80,REG_CSCIR); + outb_p(0x80, REG_CSCIR); clockspeed_reg = inb_p(REG_CSCDR); local_irq_enable(); @@ -98,10 +98,10 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) } /* 33 MHz is not 32 MHz... */ - if ((clockspeed_reg & 0xE0)==0xA0) + if ((clockspeed_reg & 0xE0) == 0xA0) return 33000; - return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); + return (1<<((clockspeed_reg & 0xE0) >> 5)) * 1000; } @@ -117,7 +117,7 @@ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) * There is no return value. */ -static void elanfreq_set_cpu_state (unsigned int state) +static void elanfreq_set_cpu_state(unsigned int state) { struct cpufreq_freqs freqs; @@ -144,20 +144,20 @@ static void elanfreq_set_cpu_state (unsigned int state) */ local_irq_disable(); - outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ - outb_p(0x00,REG_CSCDR); + outb_p(0x40, REG_CSCIR); /* Disable hyperspeed mode */ + outb_p(0x00, REG_CSCDR); local_irq_enable(); /* wait till internal pipelines and */ udelay(1000); /* buffers have cleaned up */ local_irq_disable(); /* now, set the CPU clock speed register (0x80) */ - outb_p(0x80,REG_CSCIR); - outb_p(elan_multiplier[state].val80h,REG_CSCDR); + outb_p(0x80, REG_CSCIR); + outb_p(elan_multiplier[state].val80h, REG_CSCDR); /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ - outb_p(0x40,REG_CSCIR); - outb_p(elan_multiplier[state].val40h,REG_CSCDR); + outb_p(0x40, REG_CSCIR); + outb_p(elan_multiplier[state].val40h, REG_CSCDR); udelay(10000); local_irq_enable(); @@ -173,12 +173,12 @@ static void elanfreq_set_cpu_state (unsigned int state) * for the hardware supported by the driver. */ -static int elanfreq_verify (struct cpufreq_policy *policy) +static int elanfreq_verify(struct cpufreq_policy *policy) { return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); } -static int elanfreq_target (struct cpufreq_policy *policy, +static int elanfreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -205,7 +205,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) /* capability check */ if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) + (c->x86 != 4) || (c->x86_model != 10)) return -ENODEV; /* max freq */ @@ -213,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) max_freq = elanfreq_get_cpu_frequency(0); /* table init */ - for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i = 0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { if (elanfreq_table[i].frequency > max_freq) elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; } @@ -224,7 +224,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); return 0; @@ -260,7 +260,7 @@ __setup("elanfreq=", elanfreq_setup); #endif -static struct freq_attr* elanfreq_attr[] = { +static struct freq_attr *elanfreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -284,9 +284,9 @@ static int __init elanfreq_init(void) /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) { + (c->x86 != 4) || (c->x86_model != 10)) { printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); - return -ENODEV; + return -ENODEV; } return cpufreq_register_driver(&elanfreq_driver); } @@ -298,7 +298,7 @@ static void __exit elanfreq_exit(void) } -module_param (max_freq, int, 0444); +module_param(max_freq, int, 0444); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); -- cgit v1.2.3 From 8d2d2051e50c4ca0207a42b243369e22f7d90a5c Mon Sep 17 00:00:00 2001 From: Paolo Ciarrocchi Date: Mon, 28 Jul 2008 21:08:16 +0200 Subject: [CPUFREQ] Coding style fixes to arch/x86/kernel/cpu/cpufreq/powernow-k6.c Before: total: 11 errors, 15 warnings, 255 lines checked After: total: 0 errors, 6 warnings, 254 lines checked paolo@paolo-desktop:~/linux.trees.git$ md5sum /tmp/powernow-k6.o.* 476932f5e1ffe365db9d1dfb3f860369 /tmp/powernow-k6.o.after 476932f5e1ffe365db9d1dfb3f860369 /tmp/powernow-k6.o.before Signed-off-by: Paolo Ciarrocchi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 41 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index eb9b62b0830..b5ced806a31 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -15,12 +15,11 @@ #include #include -#include -#include +#include +#include - -#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long - as it is unused */ +#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long + as it is unused */ static unsigned int busfreq; /* FSB, in 10 kHz */ static unsigned int max_multiplier; @@ -53,7 +52,7 @@ static int powernow_k6_get_cpu_multiplier(void) msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = inl(POWERNOW_IOPORT + 0x8); msrval = POWERNOW_IOPORT + 0x0; wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ @@ -67,9 +66,9 @@ static int powernow_k6_get_cpu_multiplier(void) * * Tries to change the PowerNow! multiplier */ -static void powernow_k6_set_state (unsigned int best_i) +static void powernow_k6_set_state(unsigned int best_i) { - unsigned long outvalue=0, invalue=0; + unsigned long outvalue = 0, invalue = 0; unsigned long msrval; struct cpufreq_freqs freqs; @@ -90,10 +89,10 @@ static void powernow_k6_set_state (unsigned int best_i) msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = inl(POWERNOW_IOPORT + 0x8); invalue = invalue & 0xf; outvalue = outvalue | invalue; - outl(outvalue ,(POWERNOW_IOPORT + 0x8)); + outl(outvalue , (POWERNOW_IOPORT + 0x8)); msrval = POWERNOW_IOPORT + 0x0; wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ @@ -124,7 +123,7 @@ static int powernow_k6_verify(struct cpufreq_policy *policy) * * sets a new CPUFreq policy */ -static int powernow_k6_target (struct cpufreq_policy *policy, +static int powernow_k6_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { @@ -152,7 +151,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) busfreq = cpu_khz / max_multiplier; /* table init */ - for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i = 0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { if (clock_ratio[i].index > max_multiplier) clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; else @@ -165,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); if (result) - return (result); + return result; cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); @@ -176,8 +175,8 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) { unsigned int i; - for (i=0; i<8; i++) { - if (i==max_multiplier) + for (i = 0; i < 8; i++) { + if (i == max_multiplier) powernow_k6_set_state(i); } cpufreq_frequency_table_put_attr(policy->cpu); @@ -189,7 +188,7 @@ static unsigned int powernow_k6_get(unsigned int cpu) return busfreq * powernow_k6_get_cpu_multiplier(); } -static struct freq_attr* powernow_k6_attr[] = { +static struct freq_attr *powernow_k6_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, NULL, }; @@ -227,7 +226,7 @@ static int __init powernow_k6_init(void) } if (cpufreq_register_driver(&powernow_k6_driver)) { - release_region (POWERNOW_IOPORT, 16); + release_region(POWERNOW_IOPORT, 16); return -EINVAL; } @@ -243,13 +242,13 @@ static int __init powernow_k6_init(void) static void __exit powernow_k6_exit(void) { cpufreq_unregister_driver(&powernow_k6_driver); - release_region (POWERNOW_IOPORT, 16); + release_region(POWERNOW_IOPORT, 16); } -MODULE_AUTHOR ("Arjan van de Ven , Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Arjan van de Ven , Dave Jones , Dominik Brodowski "); +MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); +MODULE_LICENSE("GPL"); module_init(powernow_k6_init); module_exit(powernow_k6_exit); -- cgit v1.2.3 From 847aef6ffd85787b62395b64719f8f7c5975bf1b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 14 Jul 2008 11:59:44 +0900 Subject: [CPUFREQ] acpi-cpufreq: add error handling for cpufreq_register_driver() error add error handling for cpufreq_register_driver() error Signed-off-by: Akinobu Mita Cc: cpufreq@lists.linux.org.uk Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index dd097b83583..1def5b06fa4 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -785,7 +785,11 @@ static int __init acpi_cpufreq_init(void) if (ret) return ret; - return cpufreq_register_driver(&acpi_cpufreq_driver); + ret = cpufreq_register_driver(&acpi_cpufreq_driver); + if (ret) + free_percpu(acpi_perf_data); + + return ret; } static void __exit acpi_cpufreq_exit(void) @@ -795,8 +799,6 @@ static void __exit acpi_cpufreq_exit(void) cpufreq_unregister_driver(&acpi_cpufreq_driver); free_percpu(acpi_perf_data); - - return; } module_param(acpi_pstate_strict, uint, 0644); -- cgit v1.2.3 From bf0b90e357c883e8efd72954432efe652de74c76 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Mon, 4 Aug 2008 11:59:07 -0700 Subject: [CPUFREQ][1/6] cpufreq: Add cpu number parameter to __cpufreq_driver_getavg() Add a cpu parameter to __cpufreq_driver_getavg(). This is needed for software cpufreq coordination where policy->cpu may not be same as the CPU on which we want to getavg frequency. A follow-on patch will use this parameter to getavg freq from all cpus in policy->cpus. Change since last patch. Fix the offline/online and suspend/resume oops reported by Youquan Song Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1def5b06fa4..c24c4a487b7 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -256,7 +256,8 @@ static u32 get_cur_val(const cpumask_t *mask) * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and * no meaning should be associated with absolute values of these MSRs. */ -static unsigned int get_measured_perf(unsigned int cpu) +static unsigned int get_measured_perf(struct cpufreq_policy *policy, + unsigned int cpu) { union { struct { @@ -326,7 +327,7 @@ static unsigned int get_measured_perf(unsigned int cpu) #endif - retval = per_cpu(drv_data, cpu)->max_freq * perf_percent / 100; + retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100; put_cpu(); set_cpus_allowed_ptr(current, &saved_mask); -- cgit v1.2.3 From 43603c8df97f246e8be7b9cc92a8f968a85108bd Mon Sep 17 00:00:00 2001 From: Hans Schou Date: Thu, 9 Oct 2008 20:47:24 +0200 Subject: x86, debug: print more information about unknown CPUs Write the name of the unknown vendor_id to output instead of just "unknown". Tag changed to 'vendor_id' as used in /proc/cpuinfo Signed-off-by: Hans Schou Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cef3519b3bb..84c4040efa8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -406,7 +406,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) if (!printed) { printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); printk(KERN_ERR "CPU: Your system may be unstable.\n"); } -- cgit v1.2.3 From b2bc27314664c4d1a2f02e6f4cd0c32e4681d61e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Sep 2008 14:00:36 -0700 Subject: x86, cpa: rename PTE attribute macros for kernel direct mapping in early boot Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 34 +++++++++++++++------------------- arch/x86/kernel/head_64.S | 4 ++-- 2 files changed, 17 insertions(+), 21 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index a7010c3a377..e835b4eea70 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -172,10 +172,6 @@ num_subarch_entries = (. - subarch_entries) / 4 * * Note that the stack is not yet set up! */ -#define PTE_ATTR 0x007 /* PRESENT+RW+USER */ -#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ -#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ - default_entry: #ifdef CONFIG_X86_PAE @@ -196,9 +192,9 @@ default_entry: movl $pa(pg0), %edi movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_pmd), %edx - movl $PTE_ATTR, %eax + movl $PTE_IDENT_ATTR, %eax 10: - leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ movl %ecx,(%edx) /* Store PMD entry */ /* Upper half already zero */ addl $8,%edx @@ -215,7 +211,7 @@ default_entry: * End condition: we must map up to and including INIT_MAP_BEYOND_END * bytes beyond the end of our own page tables. */ - leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp + leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b 1: @@ -224,7 +220,7 @@ default_entry: movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) #else /* Not PAE */ @@ -233,9 +229,9 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl $pa(pg0), %edi movl %edi, pa(init_pg_tables_start) movl $pa(swapper_pg_dir), %edx - movl $PTE_ATTR, %eax + movl $PTE_IDENT_ATTR, %eax 10: - leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ addl $4,%edx @@ -249,7 +245,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); * bytes beyond the end of our own page tables; the +0x007 is * the attribute bits */ - leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp + leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b movl %edi,pa(init_pg_tables_end) @@ -257,7 +253,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax movl %eax,pa(swapper_pg_dir+0xffc) #endif jmp 3f @@ -634,19 +630,19 @@ ENTRY(empty_zero_page) /* Page-aligned for the benefit of paravirt? */ .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) - .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ # if KPMDS == 3 - .long pa(swapper_pg_pmd+PGD_ATTR),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 # elif KPMDS == 2 .long 0,0 - .long pa(swapper_pg_pmd+PGD_ATTR),0 - .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 # elif KPMDS == 1 .long 0,0 .long 0,0 - .long pa(swapper_pg_pmd+PGD_ATTR),0 + .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 # else # error "Kernel PMDs should be 1, 2 or 3" # endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index db3280afe88..26cfdc1d7c7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -110,7 +110,7 @@ startup_64: movq %rdi, %rax shrq $PMD_SHIFT, %rax andq $(PTRS_PER_PMD - 1), %rax - leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx + leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx leaq level2_spare_pgt(%rip), %rbx movq %rdx, 0(%rbx, %rax, 8) ident_complete: @@ -374,7 +374,7 @@ NEXT_PAGE(level2_ident_pgt) /* Since I easily can, map the first 1G. * Don't set NX because code runs from these pages. */ - PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) NEXT_PAGE(level2_kernel_pgt) /* -- cgit v1.2.3