From 0406ca6d8e849d9dd027c8cb6791448e81411aef Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 21:02:09 +0200 Subject: perf_counter: Ignore the nmi call frames in the x86-64 backtraces About every callchains recorded with perf record are filled up including the internal perfcounter nmi frame: perf_callchain perf_counter_overflow intel_pmu_handle_irq perf_counter_nmi_handler notifier_call_chain atomic_notifier_call_chain notify_die do_nmi nmi We want ignore this frame as it's not interesting for instrumentation. To solve this, we simply ignore every frames from nmi context. New example of "perf report -s sym -c" after this patch: 9.59% [k] search_by_key 4.88% search_by_key reiserfs_read_locked_inode reiserfs_iget reiserfs_lookup do_lookup __link_path_walk path_walk do_path_lookup user_path_at vfs_fstatat vfs_lstat sys_newlstat system_call_fastpath __lxstat 0x406fb1 3.19% search_by_key search_by_entry_key reiserfs_find_entry reiserfs_lookup do_lookup __link_path_walk path_walk do_path_lookup user_path_at vfs_fstatat vfs_lstat sys_newlstat system_call_fastpath __lxstat 0x406fb1 [...] For now this patch only solves the problem in x86-64. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246474930-6088-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index d4cf4ce19aa..36c3dc7b899 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1561,6 +1561,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); +static DEFINE_PER_CPU(int, in_nmi_frame); static void @@ -1576,7 +1577,9 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - /* Process all stacks: */ + per_cpu(in_nmi_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name); + return 0; } @@ -1584,6 +1587,9 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; + if (per_cpu(in_nmi_frame, smp_processor_id())) + return; + if (reliable) callchain_store(entry, addr); } -- cgit v1.2.3 From c7210e1ff8a95a0a6dba0d04a95b3ddd9d165fab Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 2 Jul 2009 22:35:35 +0530 Subject: x86: Remove unused function lapic_watchdog_ok() lapic_watchdog_ok() is a global function but no one is using it. Signed-off-by: Jaswinder Singh Rajput Cc: Andi Kleen Cc: Yinghai Lu LKML-Reference: <1246554335.2242.29.camel@jaswinder.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 5c481f6205b..e60ed740d2b 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -803,8 +803,3 @@ int __kprobes lapic_wd_event(unsigned nmi_hz) wd_ops->rearm(wd, nmi_hz); return 1; } - -int lapic_watchdog_ok(void) -{ - return wd_ops != NULL; -} -- cgit v1.2.3 From a2e1b4c31257c07f148a89eb7eea7ca959fd0642 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Sun, 26 Jul 2009 10:55:25 -0500 Subject: [CPUFREQ] Powernow-k8: support family 0xf with 2 low p-states Provide support for family 0xf processors with 2 P-states below the elevator voltage. Remove the checks that prevent this configuration from being supported and increase the transition voltage to prevent errors during the transition. Signed-off-by: Mark Langsdorf Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 30 +++++++++++------------------- arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 3 ++- 2 files changed, 13 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 81cbe64ed6b..ae068f59603 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -299,7 +299,7 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate) static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) { - if (core_voltage_pre_transition(data, reqvid)) + if (core_voltage_pre_transition(data, reqvid, reqfid)) return 1; if (core_frequency_transition(data, reqfid)) @@ -327,17 +327,20 @@ static int transition_fid_vid(struct powernow_k8_data *data, /* Phase 1 - core voltage transition ... setup voltage */ static int core_voltage_pre_transition(struct powernow_k8_data *data, - u32 reqvid) + u32 reqvid, u32 reqfid) { u32 rvosteps = data->rvo; u32 savefid = data->currfid; - u32 maxvid, lo; + u32 maxvid, lo, rvomult = 1; dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " "reqvid 0x%x, rvo 0x%x\n", smp_processor_id(), data->currfid, data->currvid, reqvid, data->rvo); + if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP)) + rvomult = 2; + rvosteps *= rvomult; rdmsr(MSR_FIDVID_STATUS, lo, maxvid); maxvid = 0x1f & (maxvid >> 16); dprintk("ph1 maxvid=0x%x\n", maxvid); @@ -351,7 +354,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data, return 1; } - while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { + while ((rvosteps > 0) && + ((rvomult * data->rvo + data->currvid) > reqvid)) { if (data->currvid == maxvid) { rvosteps = 0; } else { @@ -384,13 +388,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) u32 vcoreqfid, vcocurrfid, vcofiddiff; u32 fid_interval, savevid = data->currvid; - if ((reqfid < HI_FID_TABLE_BOTTOM) && - (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX "ph2: illegal lo-lo transition " - "0x%x 0x%x\n", reqfid, data->currfid); - return 1; - } - if (data->currfid == reqfid) { printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); @@ -407,6 +404,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid : vcoreqfid - vcocurrfid; + if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP)) + vcofiddiff = 0; + while (vcofiddiff > 2) { (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); @@ -1081,14 +1081,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, return 0; } - if ((fid < HI_FID_TABLE_BOTTOM) && - (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX - "ignoring illegal change in lo freq table-%x to 0x%x\n", - data->currfid, fid); - return 1; - } - dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", smp_processor_id(), fid, vid); freqs.old = find_khz_freq_from_fid(data->currfid); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index c9c1190b5e1..02ce824073c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -215,7 +215,8 @@ struct pst_s { #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) -static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); +static int core_voltage_pre_transition(struct powernow_k8_data *data, + u32 reqvid, u32 regfid); static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); -- cgit v1.2.3 From ad361c9884e809340f6daca80d56a9e9c871690a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 6 Jul 2009 13:05:40 -0700 Subject: Remove multiple KERN_ prefixes from printk formats Commit 5fd29d6ccbc98884569d6f3105aeca70858b3e0f ("printk: clean up handling of log-levels and newlines") changed printk semantics. printk lines with multiple KERN_ prefixes are no longer emitted as before the patch. is now included in the output on each additional use. Remove all uses of multiple KERN_s in formats. Signed-off-by: Joe Perches Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index ae068f59603..2a50ef89100 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1259,7 +1259,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { static const char ACPI_PSS_BIOS_BUG_MSG[] = KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" - KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; + FW_BUG PFX "Try again with latest BIOS.\n"; struct powernow_k8_data *data; struct init_on_cpu init_on_cpu; int rc; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index af425b83202..484c1e5f658 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -194,14 +194,14 @@ static void print_mce(struct mce *m) m->cs, m->ip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk("\n"); + printk(KERN_CONT "\n"); } printk(KERN_EMERG "TSC %llx ", m->tsc); if (m->addr) - printk("ADDR %llx ", m->addr); + printk(KERN_CONT "ADDR %llx ", m->addr); if (m->misc) - printk("MISC %llx ", m->misc); - printk("\n"); + printk(KERN_CONT "MISC %llx ", m->misc); + printk(KERN_CONT "\n"); printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); @@ -209,13 +209,13 @@ static void print_mce(struct mce *m) static void print_mce_head(void) { - printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); + printk(KERN_EMERG "\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { printk(KERN_EMERG "This is not a software problem!\n" - KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + "Run through mcelog --ascii to decode and contact your hardware vendor\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ -- cgit v1.2.3 From 11d1578f9454159c43499d1d8fe8a7d728c176a3 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Wed, 8 Jul 2009 17:46:14 -0400 Subject: perf_counter: Add P6 PMU support Add basic P6 PMU support. The P6 uses the EVNTSEL0 EN bit to enable/disable both its counters. We use this for the global enable/disable, and clear all config bits (except EN) to disable individual counters. Actual ia32 hardware doesn't support lfence, so use a locked op without side-effect to implement a full barrier. perf stat and perf record seem to function correctly. [a.p.zijlstra@chello.nl: cleanups and complete the enable/disable code] Signed-off-by: Vince Weaver Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 227 ++++++++++++++++++++++++++++++++++--- 1 file changed, 213 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 36c3dc7b899..1910f39ff19 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -65,6 +65,44 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; +/* + * Not sure about some of these + */ +static const u64 p6_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, +}; + +static u64 p6_pmu_event_map(int event) +{ + return p6_perfmon_event_map[event]; +} + +static u64 p6_pmu_raw_event(u64 event) +{ +#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL +#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL +#define P6_EVNTSEL_INV_MASK 0x00800000ULL +#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL + +#define P6_EVNTSEL_MASK \ + (P6_EVNTSEL_EVENT_MASK | \ + P6_EVNTSEL_UNIT_MASK | \ + P6_EVNTSEL_EDGE_MASK | \ + P6_EVNTSEL_INV_MASK | \ + P6_EVNTSEL_COUNTER_MASK) + + return event & P6_EVNTSEL_MASK; +} + + /* * Intel PerfMon v3. Used on Core2 and later. */ @@ -726,6 +764,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter) return 0; } +static void p6_pmu_disable_all(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + static void intel_pmu_disable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -767,6 +822,23 @@ void hw_perf_disable(void) return x86_pmu.disable_all(); } +static void p6_pmu_enable_all(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long val; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + static void intel_pmu_enable_all(void) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); @@ -819,16 +891,13 @@ static inline void intel_pmu_ack_status(u64 ack) static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { - int err; - err = checking_wrmsrl(hwc->config_base + idx, + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { - int err; - err = checking_wrmsrl(hwc->config_base + idx, - hwc->config); + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); } static inline void @@ -836,13 +905,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; - int err; mask = 0xfULL << (idx * 4); rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; - err = checking_wrmsrl(hwc->config_base, ctrl_val); + (void)checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static inline void +p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long val = ARCH_PERFMON_EVENTSEL0_ENABLE; + + if (!cpuc->enabled) + val = 0; + + (void)checking_wrmsrl(hwc->config_base + idx, val); } static inline void @@ -943,6 +1023,17 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) err = checking_wrmsrl(hwc->config_base, ctrl_val); } +static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (cpuc->enabled) + x86_pmu_enable_counter(hwc, idx); + else + x86_pmu_disable_counter(hwc, idx); +} + + static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { @@ -1176,6 +1267,49 @@ static void intel_pmu_reset(void) local_irq_restore(flags); } +static int p6_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_counters *cpuc; + struct perf_counter *counter; + struct hw_perf_counter *hwc; + int idx, handled = 0; + u64 val; + + data.regs = regs; + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_counters); + + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + counter = cpuc->counters[idx]; + hwc = &counter->hw; + + val = x86_perf_counter_update(counter, hwc, idx); + if (val & (1ULL << (x86_pmu.counter_bits - 1))) + continue; + + /* + * counter overflow + */ + handled = 1; + data.period = counter->hw.last_period; + + if (!x86_perf_counter_set_period(counter, hwc, idx)) + continue; + + if (perf_counter_overflow(counter, 1, &data)) + p6_pmu_disable_counter(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} /* * This handler is triggered by the local APIC, so the APIC IRQ handling @@ -1185,14 +1319,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_counters *cpuc; - int bit, cpu, loops; + int bit, loops; u64 ack, status; data.regs = regs; data.addr = 0; - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); status = intel_pmu_get_status(); @@ -1249,14 +1382,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_counters *cpuc; struct perf_counter *counter; struct hw_perf_counter *hwc; - int cpu, idx, handled = 0; + int idx, handled = 0; u64 val; data.regs = regs; data.addr = 0; - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + cpuc = &__get_cpu_var(cpu_hw_counters); for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) @@ -1353,6 +1485,32 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { .priority = 1 }; +static struct x86_pmu p6_pmu = { + .name = "p6", + .handle_irq = p6_pmu_handle_irq, + .disable_all = p6_pmu_disable_all, + .enable_all = p6_pmu_enable_all, + .enable = p6_pmu_enable_counter, + .disable = p6_pmu_disable_counter, + .eventsel = MSR_P6_EVNTSEL0, + .perfctr = MSR_P6_PERFCTR0, + .event_map = p6_pmu_event_map, + .raw_event = p6_pmu_raw_event, + .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_counters = 2, + /* + * Counters have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a counter for P6-like PMU is 32 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ + .counter_bits = 32, + .counter_mask = (1ULL << 32) - 1, +}; + static struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -1392,6 +1550,41 @@ static struct x86_pmu amd_pmu = { .max_period = (1ULL << 47) - 1, }; +static int p6_pmu_init(void) +{ + int high, low; + + switch (boot_cpu_data.x86_model) { + case 1: + case 3: /* Pentium Pro */ + case 5: + case 6: /* Pentium II */ + case 7: + case 8: + case 11: /* Pentium III */ + break; + case 9: + case 13: + /* for Pentium M, we need to check if PMU exist */ + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (low & MSR_IA32_MISC_ENABLE_EMON) + break; + default: + pr_cont("unsupported p6 CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + if (!cpu_has_apic) { + pr_info("no Local APIC, try rebooting with lapic"); + return -ENODEV; + } + + x86_pmu = p6_pmu; + + return 0; +} + static int intel_pmu_init(void) { union cpuid10_edx edx; @@ -1400,8 +1593,14 @@ static int intel_pmu_init(void) unsigned int ebx; int version; - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + /* check for P6 processor family */ + if (boot_cpu_data.x86 == 6) { + return p6_pmu_init(); + } else { return -ENODEV; + } + } /* * Check whether the Architectural PerfMon supports -- cgit v1.2.3 From 9c74fb50867e8fb5f3be3be06716492c0f79309e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Jul 2009 10:21:41 +0200 Subject: perf_counter: Fix up P6 PMU details The P6 doesn't seem to support cache ref/hit/miss counts, so we extend the generic hardware event codes to have 0 and -1 mean the same thing as for the generic cache events. Furthermore, it turns out the 0 event does not count (that is, its reported that on PPro it actually does count something), therefore use a event configuration that's specified not to count to disable the counters. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 1910f39ff19..c7cc6eac14e 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -84,6 +84,14 @@ static u64 p6_pmu_event_map(int event) return p6_perfmon_event_map[event]; } +/* + * Counter setting that is specified not to count anything. + * We use this to effectively disable a counter. + * + * L2_RQSTS with 0 MESI unit mask. + */ +#define P6_NOP_COUNTER 0x0000002EULL + static u64 p6_pmu_raw_event(u64 event) { #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL @@ -704,6 +712,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_attr *attr = &counter->attr; struct hw_perf_counter *hwc = &counter->hw; + u64 config; int err; if (!x86_pmu_initialized()) @@ -756,10 +765,19 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (attr->config >= x86_pmu.max_events) return -EINVAL; + /* * The generic map: */ - hwc->config |= x86_pmu.event_map(attr->config); + config = x86_pmu.event_map(attr->config); + + if (config == 0) + return -ENOENT; + + if (config == -1LL) + return -EINVAL; + + hwc->config |= config; return 0; } @@ -767,7 +785,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) static void p6_pmu_disable_all(void) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - unsigned long val; + u64 val; if (!cpuc->enabled) return; @@ -917,10 +935,10 @@ static inline void p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - unsigned long val = ARCH_PERFMON_EVENTSEL0_ENABLE; + u64 val = P6_NOP_COUNTER; - if (!cpuc->enabled) - val = 0; + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; (void)checking_wrmsrl(hwc->config_base + idx, val); } -- cgit v1.2.3 From 984b838ce69c063a91b87550598ab7f3439dd94a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 10 Jul 2009 09:59:56 +0200 Subject: perf_counter: Clean up global vs counter enable Ingo noticed that both AMD and P6 call x86_pmu_disable_counter() on *_pmu_enable_counter(). This is because we rely on the side effect of that call to program the event config but not touch the EN bit. We change that for AMD by having enable_all() simply write the full config in, and for P6 by explicitly coding it. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index c7cc6eac14e..bed1c4c2f25 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -874,13 +874,13 @@ static void amd_pmu_enable_all(void) barrier(); for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct perf_counter *counter = cpuc->counters[idx]; u64 val; if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) - continue; + + val = counter->hw.config; val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_K7_EVNTSEL0 + idx, val); } @@ -1044,11 +1044,13 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + u64 val; + val = hwc->config; if (cpuc->enabled) - x86_pmu_enable_counter(hwc, idx); - else - x86_pmu_disable_counter(hwc, idx); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); } @@ -1068,8 +1070,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) if (cpuc->enabled) x86_pmu_enable_counter(hwc, idx); - else - x86_pmu_disable_counter(hwc, idx); } static int -- cgit v1.2.3 From f1c6a58121f9846ac665b0fbd3cbab90ce8bcbac Mon Sep 17 00:00:00 2001 From: Daniel Qarras Date: Sun, 12 Jul 2009 04:32:40 -0700 Subject: perf_counter, x86: Extend perf_counter Pentium M support I've attached a patch to remove the Pentium M special casing of EMON and as noticed at least with my Pentium M the hardware PMU now works: Performance counter stats for '/bin/ls /var/tmp': 1.809988 task-clock-msecs # 0.125 CPUs 1 context-switches # 0.001 M/sec 0 CPU-migrations # 0.000 M/sec 224 page-faults # 0.124 M/sec 1425648 cycles # 787.656 M/sec 912755 instructions # 0.640 IPC Vince suggested that this code was trying to address erratum Y17 in Pentium-M's: http://download.intel.com/support/processors/mobile/pm/sb/25266532.pdf But that erratum (related to IA32_MISC_ENABLES.7) does not affect perfcounters as we dont use this toggle to disable RDPMC and WRMSR/RDMSR access to performance counters. We keep cr4's bit 8 (X86_CR4_PCE) clear so unprivileged RDPMC access is not allowed anyway. Cc: Vince Weaver Cc: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index bed1c4c2f25..7e346d4bc0f 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1583,10 +1583,8 @@ static int p6_pmu_init(void) break; case 9: case 13: - /* for Pentium M, we need to check if PMU exist */ - rdmsr(MSR_IA32_MISC_ENABLE, low, high); - if (low & MSR_IA32_MISC_ENABLE_EMON) - break; + /* Pentium M */ + break; default: pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); -- cgit v1.2.3 From e9084ec98bb9aa3abc6cf73181177780ce7546f8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 16 Jul 2009 09:45:11 +0100 Subject: x86, mce: Fix set_trigger() accessor Fix the condition checking the result of strchr() (which previously could result in an oops), and make the function return the number of bytes actively used. [ Impact: fix oops ] Signed-off-by: Jan Beulich Cc: Andi Kleen LKML-Reference: <4A5F04B7020000780000AB59@vpn.id2.novell.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 484c1e5f658..1cfb623ce11 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1692,17 +1692,15 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, const char *buf, size_t siz) { char *p; - int len; strncpy(mce_helper, buf, sizeof(mce_helper)); mce_helper[sizeof(mce_helper)-1] = 0; - len = strlen(mce_helper); p = strchr(mce_helper, '\n'); - if (*p) + if (p) *p = 0; - return len; + return strlen(mce_helper) + !!p; } static ssize_t set_ignore_ce(struct sys_device *s, -- cgit v1.2.3 From 9b7019ae6a542a801a80df6694c66e240e2c3e39 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jul 2009 16:31:36 +0200 Subject: perf_counter: Remove unused variables Fix a gcc unused variables warning. Signed-off-by: Peter Zijlstra --- arch/x86/kernel/cpu/perf_counter.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 7e346d4bc0f..a7aa8f90095 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1570,8 +1570,6 @@ static struct x86_pmu amd_pmu = { static int p6_pmu_init(void) { - int high, low; - switch (boot_cpu_data.x86_model) { case 1: case 3: /* Pentium Pro */ -- cgit v1.2.3 From 2cb078603abb612e3bcd428fb8122c3d39e08832 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 22 Jul 2009 09:59:35 -0700 Subject: x86, amd: Don't probe for extended APIC ID if APICs are disabled If we've logically disabled apics, don't probe the PCI space for the AMD extended APIC ID. [ Impact: prevent boot crash under Xen. ] Signed-off-by: Jeremy Fitzhardinge Reported-by: Bastian Blank Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 28e5f595604..e2485b03f1c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -356,7 +356,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) /* check CPU config space for extended APIC ID */ - if (c->x86 >= 0xf) { + if (cpu_has_apic && c->x86 >= 0xf) { unsigned int val; val = read_pci_config(0, 24, 0, 0x68); if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) -- cgit v1.2.3 From 0d01f31439c1e4d602bf9fdc924ab66f407f5e38 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 9 Aug 2009 21:44:49 -0700 Subject: x86, mce: therm_throt - change when we print messages My Latitude d630 seems to be handling thermal events in SMI by lowering the max frequency of the CPU till it cools down but still leaks the "everything is normal" events. This spams the console and with high priority printks. Adjust therm_throt driver to only print messages about the fact that temperatire returned back to normal when leaving the throttling state. Also lower the severity of "back to normal" message from KERN_CRIT to KERN_INFO. Signed-off-by: Dmitry Torokhov Acked-by: H. Peter Anvin LKML-Reference: <20090810051513.0558F526EC9@mailhub.coreip.homeip.net> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index bff8dd191dd..8bc64cfbe93 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -36,6 +36,7 @@ static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +static DEFINE_PER_CPU(bool, thermal_throttle_active); static atomic_t therm_throt_en = ATOMIC_INIT(0); @@ -96,24 +97,27 @@ static int therm_throt_process(int curr) { unsigned int cpu = smp_processor_id(); __u64 tmp_jiffs = get_jiffies_64(); + bool was_throttled = __get_cpu_var(thermal_throttle_active); + bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; - if (curr) + if (is_throttled) __get_cpu_var(thermal_throttle_count)++; - if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) + if (!(was_throttled ^ is_throttled) && + time_before64(tmp_jiffs, __get_cpu_var(next_check))) return 0; __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; /* if we just entered the thermal event */ - if (curr) { + if (is_throttled) { printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled (total events = %lu)\n", cpu, - __get_cpu_var(thermal_throttle_count)); + "cpu clock throttled (total events = %lu)\n", + cpu, __get_cpu_var(thermal_throttle_count)); add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + } else if (was_throttled) { + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); } return 1; -- cgit v1.2.3 From 3c581a7f94542341bf0da496a226b44ac63521a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Aug 2009 10:47:36 +0200 Subject: perf_counter, x86: Fix lapic printk message Instead of this garbled bootup on UP Pentium-M systems: [ 0.015048] Performance Counters: [ 0.016004] no Local APIC, try rebooting with lapicno PMU driver, software counters only. Print: [ 0.015050] Performance Counters: [ 0.016004] no APIC, boot with the "lapic" boot parameter to force-enable it. [ 0.017003] no PMU driver, software counters only. Cf: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a7aa8f90095..40e233a24d9 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1590,7 +1590,7 @@ static int p6_pmu_init(void) } if (!cpu_has_apic) { - pr_info("no Local APIC, try rebooting with lapic"); + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); return -ENODEV; } -- cgit v1.2.3 From f64ccccb8afa43abdd63fcbd230f818d6ea0883f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Aug 2009 10:26:33 +0200 Subject: perf_counter, x86: Fix generic cache events on P6-mobile CPUs Johannes Stezenbach reported that 'perf stat' does not count cache-miss and cache-references events on his Pentium-M based laptop. This is because we left them blank in p6_perfmon_event_map[], fill them in. Reported-by: Johannes Stezenbach Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 40e233a24d9..fffc126dbdf 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -72,8 +72,8 @@ static const u64 p6_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, -- cgit v1.2.3 From fbd8b1819e80ac5a176d085fdddc3a34d1499318 Mon Sep 17 00:00:00 2001 From: Kevin Winchester Date: Mon, 10 Aug 2009 19:56:45 -0300 Subject: x86: Clear incorrectly forced X86_FEATURE_LAHF_LM flag Due to an erratum with certain AMD Athlon 64 processors, the BIOS may need to force enable the LAHF_LM capability. Unfortunately, in at least one case, the BIOS does this even for processors that do not support the functionality. Add a specific check that will clear the feature bit for processors known not to support the LAHF/SAHF instructions. Signed-off-by: Kevin Winchester Acked-by: Borislav Petkov LKML-Reference: <4A80A5AD.2000209@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e2485b03f1c..63fddcd082c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) level = cpuid_eax(1); if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* + * Some BIOSes incorrectly force this feature, but only K8 + * revision D (model = 0x14) and later actually support it. + */ + if (c->x86_model < 0x14) + clear_cpu_cap(c, X86_FEATURE_LAHF_LM); } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); -- cgit v1.2.3 From e8055139d996e85722984968472868d6dccb1490 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Tue, 11 Aug 2009 20:00:11 +0200 Subject: x86: Fix oops in identify_cpu() on CPUs without CPUID Kernel is broken for x86 CPUs without CPUID since 2.6.28. It crashes with NULL pointer dereference in identify_cpu(): 766 generic_identify(c); 767 768--> if (this_cpu->c_identify) 769 this_cpu->c_identify(c); this_cpu is NULL. This is because it's only initialized in get_cpu_vendor() function, which is not called if the CPU has no CPUID instruction. Signed-off-by: Ondrej Zary LKML-Reference: <200908112000.15993.linux@rainbow-software.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1961c07af9..5ce60a88027 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void) alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); } -static const struct cpu_dev *this_cpu __cpuinitdata; +static void __cpuinit default_init(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +#endif +} + +static const struct cpu_dev __cpuinitconst default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, +}; + +static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 @@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu) static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static const struct cpu_dev __cpuinitconst default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; -- cgit v1.2.3 From 04da8a43da804723a550f00dd158fd5b5e25ae35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Aug 2009 10:40:08 +0200 Subject: perf_counter, x86: Fix/improve apic fallback Johannes Stezenbach reported that his Pentium-M based laptop does not have the local APIC enabled by default, and hence perfcounters do not get initialized. Add a fallback for this case: allow non-sampled counters and return with an error on sampled counters. This allows 'perf stat' to work out of box - and allows 'perf top' and 'perf record' to fall back on a hrtimer based sampling method. ( Passing 'lapic' on the boot line will allow hardware sampling to occur - but if the APIC is disabled permanently by the hardware then this fallback still allows more systems to use perfcounters. ) Also decouple perfcounter support from X86_LOCAL_APIC. -v2: fix typo breaking counters on all other systems ... Reported-by: Johannes Stezenbach Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index fffc126dbdf..900332b800f 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -55,6 +55,7 @@ struct x86_pmu { int num_counters_fixed; int counter_bits; u64 counter_mask; + int apic; u64 max_period; u64 intel_ctrl; }; @@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); static bool reserve_pmc_hardware(void) { +#ifdef CONFIG_X86_LOCAL_APIC int i; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void) if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } +#endif return true; +#ifdef CONFIG_X86_LOCAL_APIC eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); @@ -644,10 +648,12 @@ perfctr_fail: enable_lapic_nmi_watchdog(); return false; +#endif } static void release_pmc_hardware(void) { +#ifdef CONFIG_X86_LOCAL_APIC int i; for (i = 0; i < x86_pmu.num_counters; i++) { @@ -657,6 +663,7 @@ static void release_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); +#endif } static void hw_perf_counter_destroy(struct perf_counter *counter) @@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); + } else { + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * counters (user-space has to fall back and + * sample via a hrtimer based software counter): + */ + if (!x86_pmu.apic) + return -EOPNOTSUPP; } counter->destroy = hw_perf_counter_destroy; @@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) void set_perf_counter_pending(void) { +#ifdef CONFIG_X86_LOCAL_APIC apic->send_IPI_self(LOCAL_PENDING_VECTOR); +#endif } void perf_counters_lapic_init(void) { - if (!x86_pmu_initialized()) +#ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) return; /* * Always use NMI for PMU */ apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif } static int __kprobes @@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self, regs = args->regs; +#ifdef CONFIG_X86_LOCAL_APIC apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif /* * Can't rely on the handled return value to say it was our NMI, two * counters could trigger 'simultaneously' raising two back-to-back NMIs. @@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = { .event_map = p6_pmu_event_map, .raw_event = p6_pmu_raw_event, .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, .max_period = (1ULL << 31) - 1, .version = 0, .num_counters = 2, @@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .raw_event = intel_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of @@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = { .num_counters = 4, .counter_bits = 48, .counter_mask = (1ULL << 48) - 1, + .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, }; @@ -1589,13 +1614,14 @@ static int p6_pmu_init(void) return -ENODEV; } + x86_pmu = p6_pmu; + if (!cpu_has_apic) { pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - return -ENODEV; + pr_info("no hardware sampling interrupt available.\n"); + x86_pmu.apic = 0; } - x86_pmu = p6_pmu; - return 0; } -- cgit v1.2.3 From 4e5c25d405e18a2f279ca2bfc855508ec3a0186b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 16 Aug 2009 15:54:37 +0100 Subject: x86, mce: therm_throt: Don't log redundant normality 0d01f31439c1e4d602bf9fdc924ab66f407f5e38 "x86, mce: therm_throt - change when we print messages" removed redundant announcements of "Temperature/speed normal". They're not worth logging and remove their accompanying "Machine check events logged" messages as well from the console. Signed-off-by: Hugh Dickins Cc: Hidetoshi Seto Cc: Andi Kleen Cc: Dmitry Torokhov LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 8bc64cfbe93..5957a93e517 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -116,11 +116,14 @@ static int therm_throt_process(int curr) cpu, __get_cpu_var(thermal_throttle_count)); add_taint(TAINT_MACHINE_CHECK); - } else if (was_throttled) { + return 1; + } + if (was_throttled) { printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + return 1; } - return 1; + return 0; } #ifdef CONFIG_SYSFS -- cgit v1.2.3 From c7f6fa44115d401e89db730f357629d39f8e4ba6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Jul 2009 23:52:54 +0200 Subject: x86, mce: don't log boot MCEs on Pentium M (model == 13) CPUs On my legacy Pentium M laptop (Acer Extensa 2900) I get bogus MCE on a cold boot with CONFIG_X86_NEW_MCE enabled, i.e. (after decoding it with mcelog): MCE 0 HARDWARE ERROR. This is *NOT* a software problem! Please contact your hardware vendor CPU 0 BANK 1 MCG status: MCi status: Error overflow Uncorrected error Error enabled Processor context corrupt MCA: Data CACHE Level-1 UNKNOWN Error STATUS f200000000000195 MCGSTATUS 0 [ The other STATUS values observed: f2000000000001b5 (... UNKNOWN error) and f200000000000115 (... READ Error). To verify that this is not a CONFIG_X86_NEW_MCE bug I also modified the CONFIG_X86_OLD_MCE code (which doesn't log any MCEs) to dump content of STATUS MSR before it is cleared during initialization. ] Since the bogus MCE results in a kernel taint (which in turn disables lockdep support) don't log boot MCEs on Pentium M (model == 13) CPUs by default ("mce=bootlog" boot parameter can be be used to get the old behavior). Signed-off-by: Bartlomiej Zolnierkiewicz Reviewed-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1cfb623ce11..a0c2910d96a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1273,6 +1273,10 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && monarch_timeout < 0) monarch_timeout = USEC_PER_SEC; + + /* There are also broken BIOSes on some Pentium M systems. */ + if (c->x86 == 6 && c->x86_model == 13 && mce_bootlog < 0) + mce_bootlog = 0; } if (monarch_timeout < 0) monarch_timeout = 0; -- cgit v1.2.3 From e412cd257e0d51e0ecbb89f50953835b5a0681b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 17 Aug 2009 10:19:00 +0200 Subject: x86, mce: Don't initialize MCEs on unknown CPUs An older test-box started hanging at the following point during bootup: [ 0.022996] Mount-cache hash table entries: 512 [ 0.024996] Initializing cgroup subsys debug [ 0.025996] Initializing cgroup subsys cpuacct [ 0.026995] Initializing cgroup subsys devices [ 0.027995] Initializing cgroup subsys freezer [ 0.028995] mce: CPU supports 5 MCE banks I've bisected it down to commit 4efc0670 ("x86, mce: use 64bit machine check code on 32bit"), which utilizes the MCE code on 32-bit systems too. The problem is caused by this detail in my config: # CONFIG_CPU_SUP_INTEL is not set This disables the quirks in mce_cpu_quirks() but still enables MCE support - which then hangs due to the missing quirk workaround needed on this CPU: if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) mce_banks[0].init = 0; The safe solution is to not initialize MCEs if we dont know on what CPU we are running (or if that CPU's support code got disabled in the config). Also be a bit more defensive on 32-bit systems: dont do a boot-time dump of pending MCEs not just on the specific system that we found a problem with (Pentium-M), but earlier ones as well. Now this problem is probably not common and disabling CPU support is rare - but still being more defensive in something we turned on for a wide range of CPUs is prudent. Cc: Hidetoshi Seto LKML-Reference: Message-ID: <4A88E3E4.40506@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a0c2910d96a..01213048f62 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1226,8 +1226,13 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static void mce_cpu_quirks(struct cpuinfo_x86 *c) +static int mce_cpu_quirks(struct cpuinfo_x86 *c) { + if (c->x86_vendor == X86_VENDOR_UNKNOWN) { + pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); + return -EOPNOTSUPP; + } + /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { if (c->x86 == 15 && banks > 4) { @@ -1274,14 +1279,19 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) monarch_timeout < 0) monarch_timeout = USEC_PER_SEC; - /* There are also broken BIOSes on some Pentium M systems. */ - if (c->x86 == 6 && c->x86_model == 13 && mce_bootlog < 0) + /* + * There are also broken BIOSes on some Pentium M and + * earlier systems: + */ + if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) mce_bootlog = 0; } if (monarch_timeout < 0) monarch_timeout = 0; if (mce_bootlog != 0) mce_panic_timeout = 30; + + return 0; } static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) @@ -1342,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) if (!mce_available(c)) return; - if (mce_cap_init() < 0) { + if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { mce_disabled = 1; return; } - mce_cpu_quirks(c); machine_check_vector = do_machine_check; -- cgit v1.2.3 From 5416c2663517ebd0be0664c4d4ce3df0b116c059 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Aug 2009 12:25:41 -0700 Subject: x86: make sure load_percpu_segment has no stackprotector load_percpu_segment() is used to set up the per-cpu segment registers, which are also used for -fstack-protector. Make sure that the load_percpu_segment() function doesn't have stackprotector enabled. [ Impact: allow percpu setup before calling stack-protected functions ] Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/cpu/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4e242f9a06e..8b5b9b625ed 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg endif +# Make sure load_percpu_segment has no stackprotector +nostackp := $(call cc-option, -fno-stack-protector) +CFLAGS_common.o := $(nostackp) + obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o -- cgit v1.2.3