From 674c6479b7bdc78528ea83dd43897e3161558b8b Mon Sep 17 00:00:00 2001 From: Colin Ngam Date: Wed, 3 Aug 2005 13:35:00 -0700 Subject: [IA64-SGI] Altix only: Add PCI Domain number support. This patch enables PCI Domain numbering on Altix. Signed-off-by: Colin Ngam Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_init.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index a6649baf629..829ea798501 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -44,6 +44,9 @@ int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ +static int max_segment_number = 0; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ + /* * Hooks and struct for unsupported pci providers */ @@ -157,13 +160,28 @@ static void sn_fixup_ionodes(void) uint64_t nasid; int i, widget; + /* + * Get SGI Specific HUB chipset information. + * Inform Prom that this kernel can support domain bus numbering. + */ for (i = 0; i < numionodes; i++) { hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); nasid = cnodeid_to_nasid(i); + hubdev->max_segment_number = 0xffffffff; + hubdev->max_pcibus_number = 0xff; status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev)); if (status) continue; + /* Save the largest Domain and pcibus numbers found. */ + if (hubdev->max_segment_number) { + /* + * Dealing with a Prom that supports segments. + */ + max_segment_number = hubdev->max_segment_number; + max_pcibus_number = hubdev->max_pcibus_number; + } + /* Attach the error interrupt handlers */ if (nasid & 1) ice_error_init(hubdev); @@ -229,7 +247,7 @@ void sn_pci_unfixup_slot(struct pci_dev *dev) void sn_pci_fixup_slot(struct pci_dev *dev) { int idx; - int segment = 0; + int segment = pci_domain_nr(dev->bus); int status = 0; struct pcibus_bussoft *bs; struct pci_bus *host_pci_bus; @@ -282,9 +300,9 @@ void sn_pci_fixup_slot(struct pci_dev *dev) * PCI host_pci_dev struct and set up host bus linkages */ - bus_no = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32; + bus_no = (SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32) & 0xff; devfn = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle & 0xffffffff; - host_pci_bus = pci_find_bus(pci_domain_nr(dev->bus), bus_no); + host_pci_bus = pci_find_bus(segment, bus_no); host_pci_dev = pci_get_slot(host_pci_bus, devfn); SN_PCIDEV_INFO(dev)->host_pci_dev = host_pci_dev; @@ -332,6 +350,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) prom_bussoft_ptr = __va(prom_bussoft_ptr); controller = kcalloc(1,sizeof(struct pci_controller), GFP_KERNEL); + controller->segment = segment; if (!controller) BUG(); @@ -387,7 +406,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) if (controller->node >= num_online_nodes()) { struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); - printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%lu" + printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u" "L_IO=%lx L_MEM=%lx BASE=%lx\n", b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); @@ -442,6 +461,7 @@ sn_sysdata_free_start: static int __init sn_pci_init(void) { int i = 0; + int j = 0; struct pci_dev *pci_dev = NULL; extern void sn_init_cpei_timer(void); #ifdef CONFIG_PROC_FS @@ -476,8 +496,9 @@ static int __init sn_pci_init(void) #endif /* busses are not known yet ... */ - for (i = 0; i < PCI_BUSES_TO_SCAN; i++) - sn_pci_controller_fixup(0, i, NULL); + for (i = 0; i <= max_segment_number; i++) + for (j = 0; j <= max_pcibus_number; j++) + sn_pci_controller_fixup(i, j, NULL); /* * Generic Linux PCI Layer has created the pci_bus and pci_dev -- cgit v1.2.3 From 735e60f4c67823a3e01655c990296e2e56574885 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:06:00 -0700 Subject: [IA64-SGI] abstract force_interrupt() mechanism Altix patch to abstract the force_interrupt() mechanism away from the pcibr provider. Signed-off-by: Mark Maule Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/irq.c | 50 ++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 22 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 84d276a14ec..af061dba246 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -317,6 +317,16 @@ void sn_irq_unfixup(struct pci_dev *pci_dev) pci_dev_put(pci_dev); } +static inline void +sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info) +{ + struct sn_pcibus_provider *pci_provider; + + pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type]; + if (pci_provider && pci_provider->force_interrupt) + (*pci_provider->force_interrupt)(sn_irq_info); +} + static void force_interrupt(int irq) { struct sn_irq_info *sn_irq_info; @@ -325,11 +335,9 @@ static void force_interrupt(int irq) return; rcu_read_lock(); - list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) { - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) - pcibr_force_interrupt(sn_irq_info); - } + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) + sn_call_force_intr_provider(sn_irq_info); + rcu_read_unlock(); } @@ -351,6 +359,14 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) struct pcidev_info *pcidev_info; struct pcibus_info *pcibus_info; + /* + * Bridge types attached to TIO (anything but PIC) do not need this WAR + * since they do not target Shub II interrupt registers. If that + * ever changes, this check needs to accomodate. + */ + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC) + return; + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; if (!pcidev_info) return; @@ -377,16 +393,12 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) break; } if (!test_bit(irr_bit, &irr_reg)) { - if (!test_bit(irq, pda->sn_soft_irr)) { - if (!test_bit(irq, pda->sn_in_service_ivecs)) { - regval &= 0xff; - if (sn_irq_info->irq_int_bit & regval & - sn_irq_info->irq_last_intr) { - regval &= - ~(sn_irq_info-> - irq_int_bit & regval); - pcibr_force_interrupt(sn_irq_info); - } + if (!test_bit(irq, pda->sn_in_service_ivecs)) { + regval &= 0xff; + if (sn_irq_info->irq_int_bit & regval & + sn_irq_info->irq_last_intr) { + regval &= ~(sn_irq_info->irq_int_bit & regval); + sn_call_force_intr_provider(sn_irq_info); } } } @@ -404,13 +416,7 @@ void sn_lb_int_war_check(void) rcu_read_lock(); for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { - /* - * Only call for PCI bridges that are fully - * initialized. - */ - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) - sn_check_intr(i, sn_irq_info); + sn_check_intr(i, sn_irq_info); } } rcu_read_unlock(); -- cgit v1.2.3 From c9221da9f2796f082642c3498edb2c8783ad4774 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:07:00 -0700 Subject: [IA64-SGI] sn pci provider for TIOCE (pci Altix patch to add an SN pci provider for TIOCE, which is SGI's PCI Express implementation. Signed-off-by: Mark Maule Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 829ea798501..d1fc09b1d51 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" @@ -481,6 +482,7 @@ static int __init sn_pci_init(void) pcibr_init_provider(); tioca_init_provider(); + tioce_init_provider(); /* * This is needed to avoid bounce limit checks in the blk layer -- cgit v1.2.3 From 0aa2c72e59cf1d09a0b321e4e6292af78a51b8b3 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:26:00 -0700 Subject: [IA64-SGI] - New SN hardware support - use_alias_space Use local SHUB alias space when referencing MMRs that are known to be node local. There is a slight performance benefit & code simplification. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/irq.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index af061dba246..607938c288b 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. */ #include @@ -76,16 +76,14 @@ static void sn_enable_irq(unsigned int irq) static void sn_ack_irq(unsigned int irq) { - uint64_t event_occurred, mask = 0; - int nasid; + u64 event_occurred, mask = 0; irq = irq & 0xff; - nasid = get_nasid(); event_occurred = - HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED)); + HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); mask = event_occurred & SH_ALL_INT_MASK; - HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS), - mask); + HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), + mask); __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); move_irq(irq); @@ -93,15 +91,12 @@ static void sn_ack_irq(unsigned int irq) static void sn_end_irq(unsigned int irq) { - int nasid; int ivec; - uint64_t event_occurred; + u64 event_occurred; ivec = irq & 0xff; if (ivec == SGI_UART_VECTOR) { - nasid = get_nasid(); - event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR - (nasid, SH_EVENT_OCCURRED)); + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR (SH_EVENT_OCCURRED)); /* If the UART bit is set here, we may have received an * interrupt from the UART that the driver missed. To * make sure, we IPI ourselves to force us to look again. -- cgit v1.2.3 From 2fdbb590e4f9b346e5d06cf7f85dcb7a9f2e0a48 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:26:00 -0700 Subject: [IA64-SGI] - New SN hardware support - boot_init_shub2 Update the addresses of the pio_write_status_addr so that they are correct for newer processors. Shub2 did not number the threads in the order that I had expected. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 7c7fe441d62..ed77715393e 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -532,8 +532,8 @@ void __init sn_cpu_init(void) */ { u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0}; - u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_1, - SH2_PIO_WRITE_STATUS_2, SH2_PIO_WRITE_STATUS_3}; + u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2, + SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; u64 *pio; pio = is_shub1() ? pio1 : pio2; pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); -- cgit v1.2.3 From 1007d02160974a46cc4fd3b1737c183c0471b88f Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:27:00 -0700 Subject: [IA64-SGI] - New SN hardware support - no_wars Disable some shub1-specific code when running on systems with shub2. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/timer_interrupt.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c index cde7375390b..adf5db2e2af 100644 --- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -1,7 +1,7 @@ /* * * - * Copyright (c) 2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -50,14 +50,16 @@ void sn_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT); } - if (enable_shub_wars_1_1()) { - /* Bugfix code for SHUB 1.1 */ - if (pda->pio_shub_war_cam_addr) - *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + if (is_shub1()) { + if (enable_shub_wars_1_1()) { + /* Bugfix code for SHUB 1.1 */ + if (pda->pio_shub_war_cam_addr) + *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + } + if (pda->sn_lb_int_war_ticks == 0) + sn_lb_int_war_check(); + pda->sn_lb_int_war_ticks++; + if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) + pda->sn_lb_int_war_ticks = 0; } - if (pda->sn_lb_int_war_ticks == 0) - sn_lb_int_war_check(); - pda->sn_lb_int_war_ticks++; - if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) - pda->sn_lb_int_war_ticks = 0; } -- cgit v1.2.3 From 7e95b9d6e21eda23bac1ff024d465d2072c8996d Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:27:00 -0700 Subject: [IA64-SGI] - New SN hardware support - bte_fixes Change the BTE driver so that it works for both shub1 and shub2. Most of the changes are related to the number of cores that use the BTE engine, to the MMR addresses of various shub registers, and to using the correct processor or network physical address. Signed-off-by: Russ Anderson Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/bte.c | 82 +++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 31 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 647deae9bfc..7adef84190d 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -29,16 +29,30 @@ /* two interfaces on two btes */ #define MAX_INTERFACES_TO_TRY 4 +#define MAX_NODES_TO_TRY 2 static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) { nodepda_t *tmp_nodepda; + if (nasid_to_cnodeid(nasid) == -1) + return (struct bteinfo_s *)NULL;; + tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); return &tmp_nodepda->bte_if[interface]; } +static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) +{ + if (is_shub2()) { + BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24))); + } else { + BTE_LNSTAT_STORE(bte, len); + BTE_CTRL_STORE(bte, mode); + } +} + /************************************************************************ * Block Transfer Engine copy related functions. * @@ -67,13 +81,15 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) { u64 transfer_size; u64 transfer_stat; + u64 notif_phys_addr; struct bteinfo_s *bte; bte_result_t bte_status; unsigned long irq_flags; unsigned long itc_end = 0; - struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY]; - int bte_if_index; - int bte_pri, bte_sec; + int nasid_to_try[MAX_NODES_TO_TRY]; + int my_nasid = get_nasid(); + int bte_if_index, nasid_index; + int bte_first, btes_per_node = BTES_PER_NODE; BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", src, dest, len, mode, notification)); @@ -86,36 +102,26 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)); BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT))); - /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */ - if (cpuid_to_subnode(smp_processor_id()) == 0) { - bte_pri = 0; - bte_sec = 1; - } else { - bte_pri = 1; - bte_sec = 0; - } + /* + * Start with interface corresponding to cpu number + */ + bte_first = get_cpu() % btes_per_node; if (mode & BTE_USE_DEST) { /* try remote then local */ - btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri); - btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec); + nasid_to_try[0] = NASID_GET(dest); if (mode & BTE_USE_ANY) { - btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri); - btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec); + nasid_to_try[1] = my_nasid; } else { - btes_to_try[2] = NULL; - btes_to_try[3] = NULL; + nasid_to_try[1] = (int)NULL; } } else { /* try local then remote */ - btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri); - btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec); + nasid_to_try[0] = my_nasid; if (mode & BTE_USE_ANY) { - btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri); - btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec); + nasid_to_try[1] = NASID_GET(dest); } else { - btes_to_try[2] = NULL; - btes_to_try[3] = NULL; + nasid_to_try[1] = (int)NULL; } } @@ -123,11 +129,12 @@ retry_bteop: do { local_irq_save(irq_flags); - bte_if_index = 0; + bte_if_index = bte_first; + nasid_index = 0; /* Attempt to lock one of the BTE interfaces. */ - while (bte_if_index < MAX_INTERFACES_TO_TRY) { - bte = btes_to_try[bte_if_index++]; + while (nasid_index < MAX_NODES_TO_TRY) { + bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index); if (bte == NULL) { continue; @@ -143,6 +150,15 @@ retry_bteop: break; } } + + bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */ + if (bte_if_index == bte_first) { + /* + * We've tried all interfaces on this node + */ + nasid_index++; + } + bte = NULL; } @@ -169,7 +185,13 @@ retry_bteop: /* Initialize the notification to a known value. */ *bte->most_rcnt_na = BTE_WORD_BUSY; + notif_phys_addr = TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)); + if (is_shub2()) { + src = SH2_TIO_PHYS_TO_DMA(src); + dest = SH2_TIO_PHYS_TO_DMA(dest); + notif_phys_addr = SH2_TIO_PHYS_TO_DMA(notif_phys_addr); + } /* Set the source and destination registers */ BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src)))); BTE_SRC_STORE(bte, TO_PHYS(src)); @@ -177,14 +199,12 @@ retry_bteop: BTE_DEST_STORE(bte, TO_PHYS(dest)); /* Set the notification register */ - BTE_PRINTKV(("IBNA = 0x%lx)\n", - TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)))); - BTE_NOTIF_STORE(bte, - TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))); + BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); + BTE_NOTIF_STORE(bte, notif_phys_addr); /* Initiate the transfer */ BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); - BTE_START_TRANSFER(bte, transfer_size, BTE_VALID_MODE(mode)); + bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode)); itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); -- cgit v1.2.3 From 68b9753f47953930cb94de0223c163f289399091 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:28:00 -0700 Subject: [IA64-SGI] - New SN hardware support - cpu_relax Add a few missing calls to "hint @pause". Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/bte.c | 1 + arch/ia64/sn/kernel/huberror.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 7adef84190d..b75814efadb 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -215,6 +215,7 @@ retry_bteop: } while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { + cpu_relax(); if (ia64_get_itc() > itc_end) { BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", NASID_GET(bte->bte_base_addr), bte->bte_num, diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 5c39b43ba3c..5c5eb01c50f 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -76,7 +76,7 @@ void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum) */ REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum)); while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND) - udelay(1); + cpu_relax(); } -- cgit v1.2.3 From 470ceb05d9a2b4d61c19fac615a79e56e8401565 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:28:00 -0700 Subject: [IA64-SGI] - New SN hardware support - ptc_fixes Shub2 provides a much improved mechanism for issuing internode TLB purges. Add code to support the newer mechanism. There is also some debug code (disabled) that is useful for testing. Collect statistics on the number, type & duration of TLB purges. This data will be useful for making future improvements in the algorithms. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 1 + arch/ia64/sn/kernel/sn2/ptc_deadlock.S | 13 +- arch/ia64/sn/kernel/sn2/sn2_smp.c | 256 ++++++++++++++++++++++++++++++--- 3 files changed, 243 insertions(+), 27 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index ed77715393e..6648deb778a 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -403,6 +403,7 @@ static void __init sn_init_pdas(char **cmdline_p) memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); memset(nodepdaindr[cnode]->phys_cpuid, -1, sizeof(nodepdaindr[cnode]->phys_cpuid)); + spin_lock_init(&nodepdaindr[cnode]->ptc_lock); } /* diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S index 96cb71d1568..3fa95065a44 100644 --- a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S +++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include @@ -11,7 +11,7 @@ #define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT #define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK -#define ALIAS_OFFSET (SH1_PIO_WRITE_STATUS_0_ALIAS-SH1_PIO_WRITE_STATUS_0) +#define ALIAS_OFFSET 8 .global sn2_ptc_deadlock_recovery_core @@ -36,13 +36,15 @@ sn2_ptc_deadlock_recovery_core: extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address dep piowcphy=-1,piowcphy,63,1 movl mask=WRITECOUNTMASK + mov r8=r0 1: add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register - mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR - st8.rel [scr2]=scr1;; + ;; + ld8.acq scr1=[scr2];; 5: ld8.acq scr1=[piowc];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b @@ -57,6 +59,7 @@ sn2_ptc_deadlock_recovery_core: st8.rel [ptc0]=data0 // Write PTC0 & wait for completion. 5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b;; @@ -67,6 +70,7 @@ sn2_ptc_deadlock_recovery_core: (p7) st8.rel [ptc1]=data1;; // Now write PTC1. 5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b @@ -77,6 +81,7 @@ sn2_ptc_deadlock_recovery_core: srlz.i;; ////////////// END PHYSICAL MODE //////////////////// +(p8) add r8=1,r8 (p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred. br.ret.sptk rp diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 7af05a7ac74..0a4ee50c302 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -39,12 +41,120 @@ #include #include -void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0, - volatile unsigned long *, unsigned long data1); +DEFINE_PER_CPU(struct ptc_stats, ptcstats); +DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); -static unsigned long sn2_ptc_deadlock_count; +void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0, + volatile unsigned long *, unsigned long data1); + +#ifdef DEBUG_PTC +/* + * ptctest: + * + * xyz - 3 digit hex number: + * x - Force PTC purges to use shub: + * 0 - no force + * 1 - force + * y - interupt enable + * 0 - disable interrupts + * 1 - leave interuupts enabled + * z - type of lock: + * 0 - global lock + * 1 - node local lock + * 2 - no lock + * + * Note: on shub1, only ptctest == 0 is supported. Don't try other values! + */ + +static unsigned int sn2_ptctest = 0; + +static int __init ptc_test(char *str) +{ + get_option(&str, &sn2_ptctest); + return 1; +} +__setup("ptctest=", ptc_test); + +static inline int ptc_lock(unsigned long *flagp) +{ + unsigned long opt = sn2_ptctest & 255; + + switch (opt) { + case 0x00: + spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); + break; + case 0x01: + spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp); + break; + case 0x02: + local_irq_save(*flagp); + break; + case 0x10: + spin_lock(&sn2_global_ptc_lock); + break; + case 0x11: + spin_lock(&sn_nodepda->ptc_lock); + break; + case 0x12: + break; + default: + BUG(); + } + return opt; +} + +static inline void ptc_unlock(unsigned long flags, int opt) +{ + switch (opt) { + case 0x00: + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + break; + case 0x01: + spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags); + break; + case 0x02: + local_irq_restore(flags); + break; + case 0x10: + spin_unlock(&sn2_global_ptc_lock); + break; + case 0x11: + spin_unlock(&sn_nodepda->ptc_lock); + break; + case 0x12: + break; + default: + BUG(); + } +} +#else + +#define sn2_ptctest 0 + +static inline int ptc_lock(unsigned long *flagp) +{ + spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); + return 0; +} + +static inline void ptc_unlock(unsigned long flags, int opt) +{ + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); +} +#endif + +struct ptc_stats { + unsigned long ptc_l; + unsigned long change_rid; + unsigned long shub_ptc_flushes; + unsigned long nodes_flushed; + unsigned long deadlocks; + unsigned long lock_itc_clocks; + unsigned long shub_itc_clocks; + unsigned long shub_itc_clocks_max; +}; static inline unsigned long wait_piowc(void) { @@ -89,9 +199,9 @@ void sn2_global_tlb_purge(unsigned long start, unsigned long end, unsigned long nbits) { - int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; + int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; volatile unsigned long *ptc0, *ptc1; - unsigned long flags = 0, data0 = 0, data1 = 0; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0; struct mm_struct *mm = current->active_mm; short nasids[MAX_NUMNODES], nix; nodemask_t nodes_flushed; @@ -114,16 +224,19 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, start += (1UL << nbits); } while (start < end); ia64_srlz_i(); + __get_cpu_var(ptcstats).ptc_l++; preempt_enable(); return; } if (atomic_read(&mm->mm_users) == 1) { flush_tlb_mm(mm); + __get_cpu_var(ptcstats).change_rid++; preempt_enable(); return; } + itc = ia64_get_itc(); nix = 0; for_each_node_mask(cnode, nodes_flushed) nasids[nix++] = cnodeid_to_nasid(cnode); @@ -148,7 +261,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, mynasid = get_nasid(); - spin_lock_irqsave(&sn2_global_ptc_lock, flags); + itc = ia64_get_itc(); + opt = ptc_lock(&flags); + itc2 = ia64_get_itc(); + __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; + __get_cpu_var(ptcstats).shub_ptc_flushes++; + __get_cpu_var(ptcstats).nodes_flushed += nix; do { if (shub1) @@ -157,7 +275,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); for (i = 0; i < nix; i++) { nasid = nasids[i]; - if (unlikely(nasid == mynasid)) { + if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) { ia64_ptcga(start, nbits << 2); ia64_srlz_i(); } else { @@ -169,18 +287,22 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, flushed = 1; } } - if (flushed && (wait_piowc() & - SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) { - sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1); + (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) { + sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1); } start += (1UL << nbits); } while (start < end); - spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + itc2 = ia64_get_itc() - itc2; + __get_cpu_var(ptcstats).shub_itc_clocks += itc2; + if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) + __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; + + ptc_unlock(flags, opt); preempt_enable(); } @@ -192,31 +314,29 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */ -void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0, +void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0, volatile unsigned long *ptc1, unsigned long data1) { extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); - int cnode, mycnode, nasid; - volatile unsigned long *piows; - volatile unsigned long zeroval; + short nasid, i; + unsigned long *piows, zeroval; - sn2_ptc_deadlock_count++; + __get_cpu_var(ptcstats).deadlocks++; - piows = pda->pio_write_status_addr; + piows = (unsigned long *) pda->pio_write_status_addr; zeroval = pda->pio_write_status_val; - mycnode = numa_node_id(); - - for_each_online_node(cnode) { - if (is_headless_node(cnode) || cnode == mycnode) + for (i=0; i < nix; i++) { + nasid = nasids[i]; + if (!(sn2_ptctest & 3) && nasid == mynasid) continue; - nasid = cnodeid_to_nasid(cnode); ptc0 = CHANGE_NASID(nasid, ptc0); if (ptc1) ptc1 = CHANGE_NASID(nasid, ptc1); sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); } + } /** @@ -293,3 +413,93 @@ void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) sn_send_IPI_phys(nasid, physid, vector, delivery_mode); } + +#ifdef CONFIG_PROC_FS + +#define PTC_BASENAME "sgi_sn/ptc_statistics" + +static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) +{ + if (*offset < NR_CPUS) + return offset; + return NULL; +} + +static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) +{ + (*offset)++; + if (*offset < NR_CPUS) + return offset; + return NULL; +} + +static void sn2_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static int sn2_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *) data; + + if (!cpu) { + seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n"); + seq_printf(file, "# ptctest %d\n", sn2_ptctest); + } + + if (cpu < NR_CPUS && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, + stat->deadlocks, + 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec); + } + + return 0; +} + +static struct seq_operations sn2_ptc_seq_ops = { + .start = sn2_ptc_seq_start, + .next = sn2_ptc_seq_next, + .stop = sn2_ptc_seq_stop, + .show = sn2_ptc_seq_show +}; + +int sn2_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sn2_ptc_seq_ops); +} + +static struct file_operations proc_sn2_ptc_operations = { + .open = sn2_ptc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_dir_entry *proc_sn2_ptc; + +static int __init sn2_ptc_init(void) +{ + if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) { + printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); + return -EINVAL; + } + proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations; + spin_lock_init(&sn2_global_ptc_lock); + return 0; +} + +static void __exit sn2_ptc_exit(void) +{ + remove_proc_entry(PTC_BASENAME, NULL); +} + +module_init(sn2_ptc_init); +module_exit(sn2_ptc_exit); +#endif /* CONFIG_PROC_FS */ + -- cgit v1.2.3 From ecc3c30ae39c4d3cbf249a1ebd599465e0e25708 Mon Sep 17 00:00:00 2001 From: Mark Goodwin Date: Tue, 16 Aug 2005 00:50:00 -0700 Subject: [IA64] - SGI SN hwperf enhancements - export_pci_topology Bugfix to export PCI topology information in /proc/sgi_sn/sn_topology. Signed-off-by: Mark Goodwin Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/sn_hwperf.c | 50 ++++++++++++------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 833e700fdac..58e48fdf5b4 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -174,29 +174,22 @@ static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, return slabname; } -static void print_pci_topology(struct seq_file *s, - struct sn_hwperf_object_info *obj, int *ordinal, - u64 rack, u64 bay, u64 slot, u64 slab) +static void print_pci_topology(struct seq_file *s) { - char *p1; - char *p2; - char *pg; - - if (!(pg = (char *)get_zeroed_page(GFP_KERNEL))) - return; /* ignore */ - if (ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab, - __pa(pg), PAGE_SIZE) == SN_HWPERF_OP_OK) { - for (p1=pg; *p1 && p1 < pg + PAGE_SIZE;) { - if (!(p2 = strchr(p1, '\n'))) - break; - *p2 = '\0'; - seq_printf(s, "pcibus %d %s-%s\n", - *ordinal, obj->location, p1); - (*ordinal)++; - p1 = p2 + 1; - } + char *p; + size_t sz; + int e; + + for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) { + if (!(p = (char *)kmalloc(sz, GFP_KERNEL))) + break; + e = ia64_sn_ioif_get_pci_topology(__pa(p), sz); + if (e == SALRET_OK) + seq_puts(s, p); + kfree(p); + if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED) + break; } - free_page((unsigned long)pg); } static int sn_topology_show(struct seq_file *s, void *d) @@ -215,7 +208,6 @@ static int sn_topology_show(struct seq_file *s, void *d) struct sn_hwperf_object_info *p; struct sn_hwperf_object_info *obj = d; /* this object */ struct sn_hwperf_object_info *objs = s->private; /* all objects */ - int rack, bay, slot, slab; u8 shubtype; u8 system_size; u8 sharing_size; @@ -225,7 +217,6 @@ static int sn_topology_show(struct seq_file *s, void *d) u8 region_size; u16 nasid_mask; int nasid_msb; - int pci_bus_ordinal = 0; if (obj == objs) { seq_printf(s, "# sn_topology version 2\n"); @@ -253,6 +244,8 @@ static int sn_topology_show(struct seq_file *s, void *d) shubtype ? "shub2" : "shub1", (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, system_size, sharing_size, coher, region_size); + + print_pci_topology(s); } if (SN_HWPERF_FOREIGN(obj)) { @@ -300,17 +293,6 @@ static int sn_topology_show(struct seq_file *s, void *d) seq_putc(s, '\n'); } } - - /* - * PCI busses attached to this node, if any - */ - if (sn_hwperf_location_to_bpos(obj->location, - &rack, &bay, &slot, &slab)) { - /* export pci bus info */ - print_pci_topology(s, obj, &pci_bus_ordinal, - rack, bay, slot, slab); - - } } if (obj->ports) { -- cgit v1.2.3 From 60a3ba0bb45995ecf9cfe208527d7cfd6128d053 Mon Sep 17 00:00:00 2001 From: Mark Goodwin Date: Wed, 17 Aug 2005 15:17:00 -0700 Subject: [IA64] - SGI SN hwperf enhancements - Add a new exported function for determining the nearest node with CPUs for I/O nodes and fix a bug where the hwperf dynamic misc device was being registered before misc_init(). Signed-off-by: Mark Goodwin Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/sn_hwperf.c | 266 +++++++++++++++++++++++++++++++++--- 1 file changed, 246 insertions(+), 20 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 58e48fdf5b4..0261452d08d 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -59,7 +59,7 @@ static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) struct sn_hwperf_object_info *objbuf = NULL; if ((e = sn_hwperf_init()) < 0) { - printk("sn_hwperf_init failed: err %d\n", e); + printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e); goto out; } @@ -111,7 +111,7 @@ static int sn_hwperf_geoid_to_cnode(char *location) if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) return -1; - for (cnode = 0; cnode < numionodes; cnode++) { + for_each_node(cnode) { geoid = cnodeid_get_geoid(cnode); module_id = geo_module(geoid); this_rack = MODULE_GET_RACK(module_id); @@ -124,11 +124,13 @@ static int sn_hwperf_geoid_to_cnode(char *location) } } - return cnode < numionodes ? cnode : -1; + return node_possible(cnode) ? cnode : -1; } static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) { + if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + BUG(); if (!obj->sn_hwp_this_part) return -1; return sn_hwperf_geoid_to_cnode(obj->location); @@ -192,6 +194,181 @@ static void print_pci_topology(struct seq_file *s) } } +static inline int sn_hwperf_has_cpus(cnodeid_t node) +{ + return node_online(node) && nr_cpus_node(node); +} + +static inline int sn_hwperf_has_mem(cnodeid_t node) +{ + return node_online(node) && NODE_DATA(node)->node_present_pages; +} + +static struct sn_hwperf_object_info * +sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf, + int nobj, int id) +{ + int i; + struct sn_hwperf_object_info *p = objbuf; + + for (i=0; i < nobj; i++, p++) { + if (p->id == id) + return p; + } + + return NULL; + +} + +static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf, + int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + struct sn_hwperf_object_info *nodeobj = NULL; + struct sn_hwperf_object_info *op; + struct sn_hwperf_object_info *dest; + struct sn_hwperf_object_info *router; + struct sn_hwperf_port_info ptdata[16]; + int sz, i, j; + cnodeid_t c; + int found_mem = 0; + int found_cpu = 0; + + if (!node_possible(node)) + return -EINVAL; + + if (sn_hwperf_has_cpus(node)) { + if (near_cpu_node) + *near_cpu_node = node; + found_cpu++; + } + + if (sn_hwperf_has_mem(node)) { + if (near_mem_node) + *near_mem_node = node; + found_mem++; + } + + if (found_cpu && found_mem) + return 0; /* trivially successful */ + + /* find the argument node object */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op)) + continue; + if (node == sn_hwperf_obj_to_cnode(op)) { + nodeobj = op; + break; + } + } + if (!nodeobj) { + e = -ENOENT; + goto err; + } + + /* get it's interconnect topology */ + sz = op->ports * sizeof(struct sn_hwperf_port_info); + if (sz > sizeof(ptdata)) + BUG(); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, nodeobj->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + + /* find nearest node with cpus and nearest memory */ + for (router=NULL, j=0; j < op->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id); + if (!dest || SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (SN_HWPERF_IS_ROUTER(dest)) + router = dest; + } + + if (router && (!found_cpu || !found_mem)) { + /* search for a node connected to the same router */ + sz = router->ports * sizeof(struct sn_hwperf_port_info); + if (sz > sizeof(ptdata)) + BUG(); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, router->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + for (j=0; j < router->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, + ptdata[j].conn_id); + if (!dest || dest->id == node || + SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || + SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) { + /* resort to _any_ node with CPUs and memory */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (SN_HWPERF_FOREIGN(op) || + SN_HWPERF_IS_IONODE(op) || + !SN_HWPERF_IS_NODE(op)) { + continue; + } + c = sn_hwperf_obj_to_cnode(op); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) + e = -ENODATA; + +err: + return e; +} + + static int sn_topology_show(struct seq_file *s, void *d) { int sz; @@ -265,11 +442,24 @@ static int sn_topology_show(struct seq_file *s, void *d) if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) seq_putc(s, '\n'); else { + cnodeid_t near_mem = -1; + cnodeid_t near_cpu = -1; + seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal)); - for (i=0; i < numionodes; i++) { - seq_printf(s, i ? ":%d" : ", dist %d", - node_distance(ordinal, i)); + + if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt, + ordinal, &near_mem, &near_cpu) == 0) { + seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d", + near_mem, near_cpu); + } + + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_online_node(i) { + seq_printf(s, i ? ":%d" : ", dist %d", + node_distance(ordinal, i)); + } } + seq_putc(s, '\n'); /* @@ -554,6 +744,8 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { memset(p, 0, a.sz); for (i = 0; i < nobj; i++) { + if (!SN_HWPERF_IS_NODE(objs + i)) + continue; node = sn_hwperf_obj_to_cnode(objs + i); for_each_online_cpu(j) { if (node != cpu_to_node(j)) @@ -580,7 +772,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) case SN_HWPERF_GET_NODE_NASID: if (a.sz != sizeof(u64) || - (node = a.arg) < 0 || node >= numionodes) { + (node = a.arg) < 0 || !node_possible(node)) { r = -EINVAL; goto error; } @@ -609,6 +801,14 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) vfree(objs); goto error; } + + if (!SN_HWPERF_IS_NODE(objs + i) && + !SN_HWPERF_IS_IONODE(objs + i)) { + r = -ENOENT; + vfree(objs); + goto error; + } + *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i); vfree(objs); } @@ -674,6 +874,7 @@ static int sn_hwperf_init(void) /* single threaded, once-only initialization */ down(&sn_hwperf_init_mutex); + if (sn_hwperf_salheap) { up(&sn_hwperf_init_mutex); return e; @@ -724,19 +925,6 @@ out: sn_hwperf_salheap = NULL; sn_hwperf_obj_cnt = 0; } - - if (!e) { - /* - * Register a dynamic misc device for ioctl. Platforms - * supporting hotplug will create /dev/sn_hwperf, else - * user can to look up the minor number in /proc/misc. - */ - if ((e = misc_register(&sn_hwperf_dev)) != 0) { - printk(KERN_ERR "sn_hwperf_init: misc register " - "for \"sn_hwperf\" failed, err %d\n", e); - } - } - up(&sn_hwperf_init_mutex); return e; } @@ -764,3 +952,41 @@ int sn_topology_release(struct inode *inode, struct file *file) vfree(seq->private); return seq_release(inode, file); } + +int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + int nobj; + struct sn_hwperf_object_info *objbuf; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj, + node, near_mem_node, near_cpu_node); + vfree(objbuf); + } + + return e; +} + +static int __devinit sn_hwperf_misc_register_init(void) +{ + int e; + + sn_hwperf_init(); + + /* + * Register a dynamic misc device for hwperf ioctls. Platforms + * supporting hotplug will create /dev/sn_hwperf, else user + * can to look up the minor number in /proc/misc. + */ + if ((e = misc_register(&sn_hwperf_dev)) != 0) { + printk(KERN_ERR "sn_hwperf_misc_register_init: failed to " + "register misc device for \"%s\"\n", sn_hwperf_dev.name); + } + + return e; +} + +device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */ +EXPORT_SYMBOL(sn_hwperf_get_nearest_node); -- cgit v1.2.3 From 5b9021bc5800796e23e4994f8cf2dc61536be0a7 Mon Sep 17 00:00:00 2001 From: Russ Anderson <(rja@sgi.com)> Date: Wed, 17 Aug 2005 10:00:00 -0700 Subject: [IA64] SGI SN remove redundant partition SAL call Clean up of SGI SN partitioning related code. The SN_SAL_GET_SN_INFO SAL call returns the partition ID, making the SN_SAL_SYSCTL_PARTITION_GET SAL call redundant. Remove sn_partid and use sn_partition_id. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 2 -- arch/ia64/sn/kernel/sn2/sn_proc_fs.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 6648deb778a..a594aca959e 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -80,8 +80,6 @@ EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); EXPORT_PER_CPU_SYMBOL(__sn_nodepda); -partid_t sn_partid = -1; -EXPORT_SYMBOL(sn_partid); char sn_system_serial_number_string[128]; EXPORT_SYMBOL(sn_system_serial_number_string); u64 sn_partition_serial_number; diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index 6a80fca807b..51bf82720d9 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include #include @@ -15,7 +15,7 @@ static int partition_id_show(struct seq_file *s, void *p) { - seq_printf(s, "%d\n", sn_local_partid()); + seq_printf(s, "%d\n", sn_partition_id); return 0; } -- cgit v1.2.3 From 8409668b561fbe464f7a392e8dc77eca225d27ac Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Thu, 25 Aug 2005 11:45:00 -0700 Subject: [IA64] altix: Abstract irq_affinity at the sn pci provider Altix patch to abstract irq_affinity down to the pci provider level since different SGI hardware implements this in different ways. Signed-off-by: Mark Maule Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/irq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 607938c288b..9fc74631ba8 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -127,6 +127,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) int local_widget, status; nasid_t local_nasid; struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); if (new_irq_info == NULL) @@ -166,8 +167,9 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) new_irq_info->irq_cpuid = cpuid; register_intr_pda(new_irq_info); - if (IS_PCI_BRIDGE_ASIC(new_irq_info->irq_bridge_type)) - pcibr_change_devices_irq(new_irq_info); + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + if (pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); spin_lock(&sn_irq_info_lock); list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); -- cgit v1.2.3 From d1e079b3fc90c7c114f46771e983a72ac8740882 Mon Sep 17 00:00:00 2001 From: Russ Anderson <(rja@sgi.com)> Date: Mon, 15 Aug 2005 14:46:00 -0700 Subject: [IA64-SGI] fix bte_copy() calling smp_processor_id() while preemptible bte_copy() calls calls smp_processor_id(), which will get flagged if preemption if enabled. raw_smp_processor_id() is used instead because we are just using it to pick a BTE interface and are not tied to a specific cpu. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/bte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ia64/sn/kernel') diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index b75814efadb..45854c637e9 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -105,7 +105,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) /* * Start with interface corresponding to cpu number */ - bte_first = get_cpu() % btes_per_node; + bte_first = raw_smp_processor_id() % btes_per_node; if (mode & BTE_USE_DEST) { /* try remote then local */ -- cgit v1.2.3