From 39d30770992895d55789de64bad2349510af68d0 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 28 Dec 2009 13:28:25 -0800 Subject: x86: SGI UV: Fix writes to led registers on remote uv hubs The wrong address was being used to write the SCIR led regs on remote hubs. Also, there was an inconsistency between how BIOS and the kernel indexed these regs. Standardize on using the lower 6 bits of the APIC ID as the index. This patch fixes the problem of writing to an errant address to a cpu # >= 64. Signed-off-by: Mike Travis Reviewed-by: Jack Steiner Cc: Robin Holt Cc: Linus Torvalds Cc: stable@kernel.org LKML-Reference: <4B3922F9.3060905@sgi.com> [ v2: fix a number of annoying checkpatch artifacts and whitespace noise ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d56b0efb205..5f92494dab6 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -629,8 +629,10 @@ void __init uv_system_init(void) uv_rtc_init(); for_each_present_cpu(cpu) { + int apicid = per_cpu(x86_cpu_to_apicid, cpu); + nid = cpu_to_node(cpu); - pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; @@ -651,15 +653,13 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; - uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; + uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); - printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " - "lcpu %d, blade %d\n", - cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, - lcpu, blade); + printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", + cpu, apicid, pnode, nid, lcpu, blade); } /* Add blade/pnode info for nodes without cpus */ -- cgit v1.2.3 From 7f41c2e1523f628cc248e34192162aec5728bed7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 6 Jan 2010 10:56:31 -0800 Subject: x86, irq: Check move_in_progress before freeing the vector mapping With the recent irq migration fixes (post 2.6.32), Gary Hade has noticed "No IRQ handler for vector" messages during the 2.6.33-rc1 kernel boot on IBM AMD platforms and root caused the issue to this commit: > commit 23359a88e7eca3c4f402562b102f23014db3c2aa > Author: Suresh Siddha > Date: Mon Oct 26 14:24:33 2009 -0800 > > x86: Remove move_cleanup_count from irq_cfg As part of this patch, we have removed the move_cleanup_count check in smp_irq_move_cleanup_interrupt(). With this change, we can run into a situation where an irq cleanup interrupt on a cpu can cleanup the vector mappings associated with multiple irqs, of which one of the irq's migration might be still in progress. As such when that irq hits the old cpu, we get the "No IRQ handler" messages. Fix this by checking for the irq_cfg's move_in_progress and if the move is still in progress delay the vector cleanup to another irq cleanup interrupt request (which will happen when the irq starts arriving at the new cpu destination). Reported-and-tested-by: Gary Hade Signed-off-by: Suresh Siddha LKML-Reference: <1262804191.2732.7.camel@sbs-t61.sc.intel.com> Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index de00c4619a5..53243ca7816 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2434,6 +2434,13 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); raw_spin_lock(&desc->lock); + /* + * Check if the irq migration is in progress. If so, we + * haven't received the cleanup request yet for this irq. + */ + if (cfg->move_in_progress) + goto unlock; + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; -- cgit v1.2.3 From 066000dd856709b6980123eb39b957fe26993f7b Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 11 Jan 2010 15:51:04 -0800 Subject: Revert "x86, apic: Use logical flat on intel with <= 8 logical cpus" Revert commit 2fbd07a5f5d1295fa9b0c0564ec27da7c276a75a, as this commit breaks an IBM platform with quad-core Xeon cpu's. According to Suresh, this might be an IBM platform issue, as on other Intel platforms with <= 8 logical cpu's, logical flat mode works fine irespective of physical apic id values (inline with the xapic architecture). Revert this for now because of the IBM platform breakage. Another version will be re-submitted after the complete analysis. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Suresh Siddha Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic.c | 26 ++++++++++++++++++-------- arch/x86/kernel/apic/probe_64.c | 15 ++++----------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index aa57c079c98..e80f291472a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -62,7 +62,7 @@ unsigned int boot_cpu_physical_apicid = -1U; /* * The highest APIC ID seen during enumeration. * - * On AMD, this determines the messaging protocol we can use: if all APIC IDs + * This determines the messaging protocol we can use: if all APIC IDs * are in the 0 ... 7 range, then we can use logical addressing which * has some performance advantages (better broadcasting). * @@ -1898,14 +1898,24 @@ void __cpuinit generic_processor_info(int apicid, int version) max_physical_apicid = apicid; #ifdef CONFIG_X86_32 - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (num_processors > 8) - def_to_bigsmp = 1; - break; - case X86_VENDOR_AMD: - if (max_physical_apicid >= 8) + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: def_to_bigsmp = 1; + } } #endif diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index c4cbd3080c1..65edc180fc8 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -64,23 +64,16 @@ void __init default_setup_apic_routing(void) apic = &apic_x2apic_phys; else apic = &apic_x2apic_cluster; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif if (apic == &apic_flat) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (num_processors > 8) - apic = &apic_physflat; - break; - case X86_VENDOR_AMD: - if (max_physical_apicid >= 8) - apic = &apic_physflat; - } + if (max_physical_apicid >= 8) + apic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); - if (is_vsmp_box()) { /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; -- cgit v1.2.3 From fcfbb2b5facd65efa7284cc315225bfe3d1856c2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 8 Jan 2010 12:13:54 -0800 Subject: x86: SGI UV: Fix mapping of MMIO registers This fixes the problem of the initialization code not correctly mapping the entire MMIO space on a UV system. A side effect is the map_high() interface needed to be changed to accommodate different address and size shifts. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: Cc: Jack Steiner Cc: Linus Torvalds LKML-Reference: <4B479202.7080705@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 5f92494dab6..b8bb869a661 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -374,13 +374,13 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) enum map_type {map_wb, map_uc}; -static __init void map_high(char *id, unsigned long base, int shift, - int max_pnode, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int pshift, + int bshift, int max_pnode, enum map_type map_type) { unsigned long bytes, paddr; - paddr = base << shift; - bytes = (1UL << shift) * (max_pnode + 1); + paddr = base << pshift; + bytes = (1UL << bshift) * (max_pnode + 1); printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); if (map_type == map_uc) @@ -396,7 +396,7 @@ static __init void map_gru_high(int max_pnode) gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); if (gru.s.enable) { - map_high("GRU", gru.s.base, shift, max_pnode, map_wb); + map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); gru_start_paddr = ((u64)gru.s.base << shift); gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); @@ -410,7 +410,7 @@ static __init void map_mmr_high(int max_pnode) mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); + map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); } static __init void map_mmioh_high(int max_pnode) @@ -420,7 +420,8 @@ static __init void map_mmioh_high(int max_pnode) mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); if (mmioh.s.enable) - map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); + map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, + max_pnode, map_uc); } static __init void map_low_mmrs(void) -- cgit v1.2.3 From 7a1110e861b2666ac09f5708d6fbe71d18ce64bb Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 12 Jan 2010 15:09:04 -0600 Subject: x86, uv: Add function retrieving node controller revision number Add function for determining the revision id of the SGI UV node controller chip (HUB). This function is needed in a subsequent patch. Signed-off-by: Jack Steiner LKML-Reference: <20100112210904.GA24546@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/x2apic_uv_x.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index b8bb869a661..0e48de9ff86 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -36,6 +36,8 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; static u64 gru_start_paddr, gru_end_paddr; +int uv_min_hub_revision_id; +EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); static inline bool is_GRU_range(u64 start, u64 end) { @@ -55,6 +57,10 @@ static int early_get_nodeid(void) mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); node_id.v = *mmr; early_iounmap(mmr, sizeof(*mmr)); + + /* Currently, all blades have same revision number */ + uv_min_hub_revision_id = node_id.s.revision; + return node_id.s.node_id; } -- cgit v1.2.3 From 1d2c867c941d635e53e8ad7bf37d060bb5b25ec5 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 15 Jan 2010 12:09:09 -0600 Subject: x86, uv: Ensure hub revision set for all ACPI modes. Ensure that UV hub revision is set for all ACPI modes. Signed-off-by: Russ Anderson LKML-Reference: <20100115180908.GB7757@sgi.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/x2apic_uv_x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 0e48de9ff86..21db3cbea7d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -66,7 +66,10 @@ static int early_get_nodeid(void) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { + int nodeid; + if (!strcmp(oem_id, "SGI")) { + nodeid = early_get_nodeid(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; if (!strcmp(oem_table_id, "UVL")) uv_system_type = UV_LEGACY_APIC; @@ -74,7 +77,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { __get_cpu_var(x2apic_extra_bits) = - early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); + nodeid << (UV_APIC_PNODE_SHIFT - 1); uv_system_type = UV_NON_UNIQUE_APIC; return 1; } -- cgit v1.2.3 From dfea91d5a7c795fd6f4e1a97489a98e4e767463e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 18 Jan 2010 12:10:48 -0800 Subject: x86, apic: use physical mode for IBM summit platforms Chris McDermott from IBM confirmed that hurricane chipset in IBM summit platforms doesn't support logical flat mode. Irrespective of the other things like apic_id's, total number of logical cpu's, Linux kernel should default to physical mode for this system. The 32-bit kernel does so using the OEM checks for the IBM summit platform. Add a similar OEM platform check for the 64bit kernel too. Otherwise the linux kernel boot can hang on this platform under certain bios/platform settings. Signed-off-by: Suresh Siddha Tested-by: Ananth N Mavinakayanahalli Cc: Chris McDermott Cc: Yinghai Lu Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic_flat_64.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index eacbd2b31d2..e3c3d820c32 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -240,6 +240,11 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; } + + if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { + printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); + return 1; + } #endif return 0; -- cgit v1.2.3 From bb668da6d6f2bec8a63838c098d9515eccb22cc4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 18 Jan 2010 12:10:49 -0800 Subject: x86, apic: use logical flat for systems with <= 8 logical cpus We can use logical flat mode if there are <= 8 logical cpu's (irrespective of physical apic id values). This will enable simplified and efficient IPI and device interrupt routing on such platforms. This has been tested to work on both Intel and AMD platforms. Exceptions like IBM summit platform which can't use logical flat mode are addressed by using OEM platform checks. Signed-off-by: Suresh Siddha Signed-off-by: Yinghai Lu Cc: Ananth N Mavinakayanahalli Cc: Chris McDermott Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic.c | 15 +-------------- arch/x86/kernel/apic/probe_64.c | 8 +++----- 2 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e80f291472a..3987e4408f7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -61,12 +61,6 @@ unsigned int boot_cpu_physical_apicid = -1U; /* * The highest APIC ID seen during enumeration. - * - * This determines the messaging protocol we can use: if all APIC IDs - * are in the 0 ... 7 range, then we can use logical addressing which - * has some performance advantages (better broadcasting). - * - * If there's an APIC ID above 8, we use physical addressing. */ unsigned int max_physical_apicid; @@ -1898,14 +1892,7 @@ void __cpuinit generic_processor_info(int apicid, int version) max_physical_apicid = apicid; #ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { + if (num_processors > 8) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: if (!APIC_XAPIC(version)) { diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 65edc180fc8..450fe2064a1 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -64,15 +64,13 @@ void __init default_setup_apic_routing(void) apic = &apic_x2apic_phys; else apic = &apic_x2apic_cluster; - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif - if (apic == &apic_flat) { - if (max_physical_apicid >= 8) + if (apic == &apic_flat && num_processors > 8) apic = &apic_physflat; - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); - } + + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); if (is_vsmp_box()) { /* need to update phys_pkg_id */ -- cgit v1.2.3 From 681ee44d40d7c93b42118320e4620d07d8704fd6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 9 Feb 2010 18:01:44 -0800 Subject: x86, apic: Don't use logical-flat mode when CPU hotplug may exceed 8 CPUs We need to fall back from logical-flat APIC mode to physical-flat mode when we have more than 8 CPUs. However, in the presence of CPU hotplug(with bios listing not enabled but possible cpus as disabled cpus in MADT), we have to consider the number of possible CPUs rather than the number of current CPUs; otherwise we may cross the 8-CPU boundary when CPUs are added later. 32bit apic code can use more cleanups (like the removal of vendor checks in 32bit default_setup_apic_routing()) and more unifications with 64bit code. Yinghai has some patches in works already. This patch addresses the boot issue that is reported in the virtualization guest context. [ hpa: incorporated function annotation feedback from Yinghai Lu ] Signed-off-by: Suresh Siddha LKML-Reference: <1265767304.2833.19.camel@sbs-t61.sc.intel.com> Acked-by: Shaohui Zheng Reviewed-by: Yinghai Lu Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/apic.c | 17 ----------------- arch/x86/kernel/apic/probe_32.c | 29 +++++++++++++++++++++++++++-- arch/x86/kernel/apic/probe_64.c | 2 +- 3 files changed, 28 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3987e4408f7..dfca210f6a1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1641,9 +1641,7 @@ int __init APIC_init_uniprocessor(void) #endif enable_IR_x2apic(); -#ifdef CONFIG_X86_64 default_setup_apic_routing(); -#endif verify_local_APIC(); connect_bsp_APIC(); @@ -1891,21 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; -#ifdef CONFIG_X86_32 - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1a6559f6768..99d2fe01608 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -52,7 +52,32 @@ static int __init print_ipi_mode(void) } late_initcall(print_ipi_mode); -void default_setup_apic_routing(void) +void __init default_setup_apic_routing(void) +{ + int version = apic_version[boot_cpu_physical_apicid]; + + if (num_possible_cpus() > 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } + +#ifdef CONFIG_X86_BIGSMP + generic_bigsmp_probe(); +#endif + + if (apic->setup_apic_routing) + apic->setup_apic_routing(); +} + +static void setup_apic_flat_routing(void) { #ifdef CONFIG_X86_IO_APIC printk(KERN_INFO @@ -103,7 +128,7 @@ struct apic apic_default = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = default_setup_apic_routing, + .setup_apic_routing = setup_apic_flat_routing, .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 450fe2064a1..83e9be4778e 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -67,7 +67,7 @@ void __init default_setup_apic_routing(void) } #endif - if (apic == &apic_flat && num_processors > 8) + if (apic == &apic_flat && num_possible_cpus() > 8) apic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); -- cgit v1.2.3 From 18dce6ba5c8c6bd0f3ab4efa4cbdd698dab5c40a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:05 -0800 Subject: x86: Fix SCI on IOAPIC != 0 Thomas Renninger reported on IBM x3330 booting a latest kernel on this machine results in: PCI: PCI BIOS revision 2.10 entry at 0xfd61c, last bus=1 PCI: Using configuration type 1 for base access bio: create slab at 0 ACPI: SCI (IRQ30) allocation failed ACPI Exception: AE_NOT_ACQUIRED, Unable to install System Control Interrupt handler (20090903/evevent-161) ACPI: Unable to start the ACPI Interpreter Later all kind of devices fail... and bisect it down to this commit: commit b9c61b70075c87a8612624736faf4a2de5b1ed30 x86/pci: update pirq_enable_irq() to setup io apic routing it turns out we need to set irq routing for the sci on ioapic1 early. -v2: make it work without sparseirq too. -v3: fix checkpatch.pl warning, and cc to stable Reported-by: Thomas Renninger Bisected-by: Thomas Renninger Tested-by: Thomas Renninger Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-2-git-send-email-yinghai@kernel.org> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca7816..5e4cce254e4 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1538,6 +1538,56 @@ static void __init setup_IO_APIC_irqs(void) " (apicid-pin) not connected\n"); } +/* + * for the gsit that is not in first ioapic + * but could not use acpi_register_gsi() + * like some special sci in IBM x3330 + */ +void setup_IO_APIC_irq_extra(u32 gsi) +{ + int apic_id = 0, pin, idx, irq; + int node = cpu_to_node(boot_cpu_id); + struct irq_desc *desc; + struct irq_cfg *cfg; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + apic_id = mp_find_ioapic(gsi); + if (apic_id < 0) + return; + + pin = mp_find_ioapic_pin(apic_id, gsi); + idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) + return; + + irq = pin_2_irq(idx, apic_id, pin); +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(irq); + if (desc) + return; +#endif + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + return; + } + + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, apic_id, pin); + + if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[apic_id].apicid, pin); + return; + } + set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); + + setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); +} + /* * Set up the timer pin, possibly with the 8259A-master behind. */ -- cgit v1.2.3 From ced5b697a76d325e7a7ac7d382dbbb632c765093 Mon Sep 17 00:00:00 2001 From: Brandon Phiilps Date: Wed, 10 Feb 2010 01:20:06 -0800 Subject: x86: Avoid race condition in pci_enable_msix() Keep chip_data in create_irq_nr and destroy_irq. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org> Signed-off-by: Brandon Phililps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca7816..c86591b906f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq > 0) { - dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; - } + if (irq > 0) + dynamic_irq_init_keep_chip_data(irq); + return irq; } @@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq) { unsigned long flags; struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - desc->chip_data = cfg; + dynamic_irq_cleanup_keep_chip_data(irq); free_irte(irq); spin_lock_irqsave(&vector_lock, flags); + cfg = irq_to_desc(irq)->chip_data; __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); } -- cgit v1.2.3 From 5619c28061ff9d2559a93eaba492935530f2a513 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Jul 2009 18:35:11 +0200 Subject: x86: Convert i8259_lock to raw_spinlock Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c86591b906f..f5e40339622 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1830,7 +1830,7 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "\nprinting PIC contents\n"); - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); v = inb(0xa1) << 8 | inb(0x21); printk(KERN_DEBUG "... PIC IMR: %04x\n", v); @@ -1844,7 +1844,7 @@ __apicdebuginit(void) print_PIC(void) outb(0x0a,0xa0); outb(0x0a,0x20); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); printk(KERN_DEBUG "... PIC ISR: %04x\n", v); -- cgit v1.2.3 From 6738762d73a237ec322b04d8b9d55c8fd5d84713 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:36 -0800 Subject: x86, irq: Remove arch_probe_nr_irqs So keep nr_irqs == NR_IRQS. With radix trees is matters less. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-33-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f5e40339622..c64ddd9d997 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3826,28 +3826,6 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } -#ifdef CONFIG_SPARSE_IRQ -int __init arch_probe_nr_irqs(void) -{ - int nr; - - if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) - nr_irqs = NR_VECTORS * nr_cpu_ids; - - nr = nr_irqs_gsi + 8 * nr_cpu_ids; -#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) - /* - * for MSI and HT dyn irq - */ - nr += nr_irqs_gsi * 16; -#endif - if (nr < nr_irqs) - nr_irqs = nr; - - return 0; -} -#endif - static int __io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr) { -- cgit v1.2.3 From eb5b3794062824ba12d883901eea49ea89d0a678 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 7 Feb 2010 13:02:50 -0800 Subject: x86, irq: Keep chip_data in create_irq_nr and destroy_irq Version 4: use get_irq_chip_data() in destroy_irq() to get rid of some local vars. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <20100207210250.GB8256@jenkins.home.ifup.org> Signed-off-by: Brandon Phiilps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5e4cce254e4..e93a76bc867 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3278,12 +3278,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq > 0) { - dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; - } + if (irq > 0) + dynamic_irq_init_keep_chip_data(irq); + return irq; } @@ -3305,19 +3302,12 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; - struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - desc->chip_data = cfg; + dynamic_irq_cleanup_keep_chip_data(irq); free_irte(irq); spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); + __clear_irq_vector(irq, get_irq_chip_data(irq)); spin_unlock_irqrestore(&vector_lock, flags); } -- cgit v1.2.3