diff options
Diffstat (limited to 'arch/ppc64')
29 files changed, 628 insertions, 4313 deletions
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index b987164fca4..c9d32db9d76 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -47,11 +47,16 @@ config ARCH_MAY_HAVE_PC_FDC bool default y +config PPC_STD_MMU + bool + default y + # We optimistically allocate largepages from the VM, so make the limit # large enough (16MB). This badly named config option is actually # max order + 1 config FORCE_MAX_ZONEORDER int + default "9" if PPC_64K_PAGES default "13" source "init/Kconfig" @@ -169,6 +174,16 @@ config KEXEC support. As of this writing the exact hardware interface is strongly in flux, so no good recommendation can be made. +source "drivers/cpufreq/Kconfig" + +config CPU_FREQ_PMAC64 + bool "Support for some Apple G5s" + depends on CPU_FREQ && PMAC_SMU && PPC64 + select CPU_FREQ_TABLE + help + This adds support for frequency switching on Apple iMac G5, + and some of the more recent desktop G5 machines as well. + config IBMVIO depends on PPC_PSERIES || PPC_ISERIES bool @@ -282,6 +297,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y depends on NEED_MULTIPLE_NODES +config ARCH_MEMORY_PROBE + def_bool y + depends on MEMORY_HOTPLUG + # Some NUMA nodes have memory ranges that span # other nodes. Even though a pfn is valid and # between a node's start and end pfns, it may not @@ -294,6 +313,15 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES +config PPC_64K_PAGES + bool "64k page size" + help + This option changes the kernel logical page size to 64k. On machines + without processor support for 64k pages, the kernel will simulate + them by loading each individual 4k page on demand transparently, + while on hardware with such support, it will be used to map + normal application pages. + config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on SMP diff --git a/arch/ppc64/Kconfig.debug b/arch/ppc64/Kconfig.debug index f16a5030527..b258c9314a1 100644 --- a/arch/ppc64/Kconfig.debug +++ b/arch/ppc64/Kconfig.debug @@ -55,10 +55,6 @@ config XMON_DEFAULT xmon is normally disabled unless booted with 'xmon=on'. Use 'xmon=off' to disable xmon init during runtime. -config PPCDBG - bool "Include PPCDBG realtime debugging" - depends on DEBUG_KERNEL - config IRQSTACKS bool "Use separate kernel stacks when processing interrupts" help diff --git a/arch/ppc64/boot/addRamDisk.c b/arch/ppc64/boot/addRamDisk.c index 7f2c0947339..c02a99952be 100644 --- a/arch/ppc64/boot/addRamDisk.c +++ b/arch/ppc64/boot/addRamDisk.c @@ -5,11 +5,59 @@ #include <sys/types.h> #include <sys/stat.h> #include <string.h> +#include <elf.h> #define ElfHeaderSize (64 * 1024) #define ElfPages (ElfHeaderSize / 4096) #define KERNELBASE (0xc000000000000000) +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +struct addr_range { + unsigned long long addr; + unsigned long memsize; + unsigned long offset; +}; + +static int check_elf64(void *p, int size, struct addr_range *r) +{ + Elf64_Ehdr *elf64 = p; + Elf64_Phdr *elf64ph; + + if (elf64->e_ident[EI_MAG0] != ELFMAG0 || + elf64->e_ident[EI_MAG1] != ELFMAG1 || + elf64->e_ident[EI_MAG2] != ELFMAG2 || + elf64->e_ident[EI_MAG3] != ELFMAG3 || + elf64->e_ident[EI_CLASS] != ELFCLASS64 || + elf64->e_ident[EI_DATA] != ELFDATA2MSB || + elf64->e_type != ET_EXEC || elf64->e_machine != EM_PPC64) + return 0; + + if ((elf64->e_phoff + sizeof(Elf64_Phdr)) > size) + return 0; + + elf64ph = (Elf64_Phdr *) ((unsigned long)elf64 + + (unsigned long)elf64->e_phoff); + + r->memsize = (unsigned long)elf64ph->p_memsz; + r->offset = (unsigned long)elf64ph->p_offset; + r->addr = (unsigned long long)elf64ph->p_vaddr; + +#ifdef DEBUG + printf("PPC64 ELF file, ph:\n"); + printf("p_type 0x%08x\n", elf64ph->p_type); + printf("p_flags 0x%08x\n", elf64ph->p_flags); + printf("p_offset 0x%016llx\n", elf64ph->p_offset); + printf("p_vaddr 0x%016llx\n", elf64ph->p_vaddr); + printf("p_paddr 0x%016llx\n", elf64ph->p_paddr); + printf("p_filesz 0x%016llx\n", elf64ph->p_filesz); + printf("p_memsz 0x%016llx\n", elf64ph->p_memsz); + printf("p_align 0x%016llx\n", elf64ph->p_align); + printf("... skipping 0x%08lx bytes of ELF header\n", + (unsigned long)elf64ph->p_offset); +#endif + + return 64; +} void get4k(FILE *file, char *buf ) { unsigned j; @@ -34,97 +82,92 @@ void death(const char *msg, FILE *fdesc, const char *fname) int main(int argc, char **argv) { char inbuf[4096]; - FILE *ramDisk = NULL; - FILE *sysmap = NULL; - FILE *inputVmlinux = NULL; - FILE *outputVmlinux = NULL; - - unsigned i = 0; - unsigned long ramFileLen = 0; - unsigned long ramLen = 0; - unsigned long roundR = 0; - - unsigned long sysmapFileLen = 0; - unsigned long sysmapLen = 0; - unsigned long sysmapPages = 0; - char* ptr_end = NULL; - unsigned long offset_end = 0; - - unsigned long kernelLen = 0; - unsigned long actualKernelLen = 0; - unsigned long round = 0; - unsigned long roundedKernelLen = 0; - unsigned long ramStartOffs = 0; - unsigned long ramPages = 0; - unsigned long roundedKernelPages = 0; - unsigned long hvReleaseData = 0; + struct addr_range vmlinux; + FILE *ramDisk; + FILE *inputVmlinux; + FILE *outputVmlinux; + + char *rd_name, *lx_name, *out_name; + + size_t i; + unsigned long ramFileLen; + unsigned long ramLen; + unsigned long roundR; + unsigned long offset_end; + + unsigned long kernelLen; + unsigned long actualKernelLen; + unsigned long round; + unsigned long roundedKernelLen; + unsigned long ramStartOffs; + unsigned long ramPages; + unsigned long roundedKernelPages; + unsigned long hvReleaseData; u_int32_t eyeCatcher = 0xc8a5d9c4; - unsigned long naca = 0; - unsigned long xRamDisk = 0; - unsigned long xRamDiskSize = 0; - long padPages = 0; + unsigned long naca; + unsigned long xRamDisk; + unsigned long xRamDiskSize; + long padPages; if (argc < 2) { fprintf(stderr, "Name of RAM disk file missing.\n"); exit(1); } + rd_name = argv[1]; if (argc < 3) { - fprintf(stderr, "Name of System Map input file is missing.\n"); - exit(1); - } - - if (argc < 4) { fprintf(stderr, "Name of vmlinux file missing.\n"); exit(1); } + lx_name = argv[2]; - if (argc < 5) { + if (argc < 4) { fprintf(stderr, "Name of vmlinux output file missing.\n"); exit(1); } + out_name = argv[3]; - ramDisk = fopen(argv[1], "r"); + ramDisk = fopen(rd_name, "r"); if ( ! ramDisk ) { - fprintf(stderr, "RAM disk file \"%s\" failed to open.\n", argv[1]); + fprintf(stderr, "RAM disk file \"%s\" failed to open.\n", rd_name); exit(1); } - sysmap = fopen(argv[2], "r"); - if ( ! sysmap ) { - fprintf(stderr, "System Map file \"%s\" failed to open.\n", argv[2]); - exit(1); - } - - inputVmlinux = fopen(argv[3], "r"); + inputVmlinux = fopen(lx_name, "r"); if ( ! inputVmlinux ) { - fprintf(stderr, "vmlinux file \"%s\" failed to open.\n", argv[3]); + fprintf(stderr, "vmlinux file \"%s\" failed to open.\n", lx_name); exit(1); } - outputVmlinux = fopen(argv[4], "w+"); + outputVmlinux = fopen(out_name, "w+"); if ( ! outputVmlinux ) { - fprintf(stderr, "output vmlinux file \"%s\" failed to open.\n", argv[4]); + fprintf(stderr, "output vmlinux file \"%s\" failed to open.\n", out_name); exit(1); } - - - + + i = fread(inbuf, 1, sizeof(inbuf), inputVmlinux); + if (i != sizeof(inbuf)) { + fprintf(stderr, "can not read vmlinux file %s: %u\n", lx_name, i); + exit(1); + } + + i = check_elf64(inbuf, sizeof(inbuf), &vmlinux); + if (i == 0) { + fprintf(stderr, "You must have a linux kernel specified as argv[2]\n"); + exit(1); + } + /* Input Vmlinux file */ fseek(inputVmlinux, 0, SEEK_END); kernelLen = ftell(inputVmlinux); fseek(inputVmlinux, 0, SEEK_SET); - printf("kernel file size = %d\n", kernelLen); - if ( kernelLen == 0 ) { - fprintf(stderr, "You must have a linux kernel specified as argv[3]\n"); - exit(1); - } + printf("kernel file size = %lu\n", kernelLen); actualKernelLen = kernelLen - ElfHeaderSize; - printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen); + printf("actual kernel length (minus ELF header) = %lu\n", actualKernelLen); round = actualKernelLen % 4096; roundedKernelLen = actualKernelLen; @@ -134,39 +177,7 @@ int main(int argc, char **argv) roundedKernelPages = roundedKernelLen / 4096; printf("Vmlinux pages to copy = %ld/0x%lx \n", roundedKernelPages, roundedKernelPages); - - - /* Input System Map file */ - /* (needs to be processed simply to determine if we need to add pad pages due to the static variables not being included in the vmlinux) */ - fseek(sysmap, 0, SEEK_END); - sysmapFileLen = ftell(sysmap); - fseek(sysmap, 0, SEEK_SET); - printf("%s file size = %ld/0x%lx \n", argv[2], sysmapFileLen, sysmapFileLen); - - sysmapLen = sysmapFileLen; - - roundR = 4096 - (sysmapLen % 4096); - if (roundR) { - printf("Rounding System Map file up to a multiple of 4096, adding %ld/0x%lx \n", roundR, roundR); - sysmapLen += roundR; - } - printf("Rounded System Map size is %ld/0x%lx \n", sysmapLen, sysmapLen); - - /* Process the Sysmap file to determine where _end is */ - sysmapPages = sysmapLen / 4096; - /* read the whole file line by line, expect that it doesn't fail */ - while ( fgets(inbuf, 4096, sysmap) ) ; - /* search for _end in the last page of the system map */ - ptr_end = strstr(inbuf, " _end"); - if (!ptr_end) { - fprintf(stderr, "Unable to find _end in the sysmap file \n"); - fprintf(stderr, "inbuf: \n"); - fprintf(stderr, "%s \n", inbuf); - exit(1); - } - printf("Found _end in the last page of the sysmap - backing up 10 characters it looks like %s", ptr_end-10); - /* convert address of _end in system map to hex offset. */ - offset_end = (unsigned int)strtol(ptr_end-10, NULL, 16); + offset_end = _ALIGN_UP(vmlinux.memsize, 4096); /* calc how many pages we need to insert between the vmlinux and the start of the ram disk */ padPages = offset_end/4096 - roundedKernelPages; @@ -194,7 +205,7 @@ int main(int argc, char **argv) fseek(ramDisk, 0, SEEK_END); ramFileLen = ftell(ramDisk); fseek(ramDisk, 0, SEEK_SET); - printf("%s file size = %ld/0x%lx \n", argv[1], ramFileLen, ramFileLen); + printf("%s file size = %ld/0x%lx \n", rd_name, ramFileLen, ramFileLen); ramLen = ramFileLen; @@ -248,19 +259,19 @@ int main(int argc, char **argv) /* fseek to the hvReleaseData pointer */ fseek(outputVmlinux, ElfHeaderSize + 0x24, SEEK_SET); if (fread(&hvReleaseData, 4, 1, outputVmlinux) != 1) { - death("Could not read hvReleaseData pointer\n", outputVmlinux, argv[4]); + death("Could not read hvReleaseData pointer\n", outputVmlinux, out_name); } hvReleaseData = ntohl(hvReleaseData); /* Convert to native int */ - printf("hvReleaseData is at %08x\n", hvReleaseData); + printf("hvReleaseData is at %08lx\n", hvReleaseData); /* fseek to the hvReleaseData */ fseek(outputVmlinux, ElfHeaderSize + hvReleaseData, SEEK_SET); if (fread(inbuf, 0x40, 1, outputVmlinux) != 1) { - death("Could not read hvReleaseData\n", outputVmlinux, argv[4]); + death("Could not read hvReleaseData\n", outputVmlinux, out_name); } /* Check hvReleaseData sanity */ if (memcmp(inbuf, &eyeCatcher, 4) != 0) { - death("hvReleaseData is invalid\n", outputVmlinux, argv[4]); + death("hvReleaseData is invalid\n", outputVmlinux, out_name); } /* Get the naca pointer */ naca = ntohl(*((u_int32_t*) &inbuf[0x0C])) - KERNELBASE; @@ -269,13 +280,13 @@ int main(int argc, char **argv) /* fseek to the naca */ fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET); if (fread(inbuf, 0x18, 1, outputVmlinux) != 1) { - death("Could not read naca\n", outputVmlinux, argv[4]); + death("Could not read naca\n", outputVmlinux, out_name); } xRamDisk = ntohl(*((u_int32_t *) &inbuf[0x0c])); xRamDiskSize = ntohl(*((u_int32_t *) &inbuf[0x14])); /* Make sure a RAM disk isn't already present */ if ((xRamDisk != 0) || (xRamDiskSize != 0)) { - death("RAM disk is already attached to this kernel\n", outputVmlinux, argv[4]); + death("RAM disk is already attached to this kernel\n", outputVmlinux, out_name); } /* Fill in the values */ *((u_int32_t *) &inbuf[0x0c]) = htonl(ramStartOffs); @@ -285,15 +296,15 @@ int main(int argc, char **argv) fflush(outputVmlinux); fseek(outputVmlinux, ElfHeaderSize + naca, SEEK_SET); if (fwrite(inbuf, 0x18, 1, outputVmlinux) != 1) { - death("Could not write naca\n", outputVmlinux, argv[4]); + death("Could not write naca\n", outputVmlinux, out_name); } - printf("Ram Disk of 0x%lx pages is attached to the kernel at offset 0x%08x\n", + printf("Ram Disk of 0x%lx pages is attached to the kernel at offset 0x%08lx\n", ramPages, ramStartOffs); /* Done */ fclose(outputVmlinux); /* Set permission to executable */ - chmod(argv[4], S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); + chmod(out_name, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH); return 0; } diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c index c1dc876bcca..e0dde24a72c 100644 --- a/arch/ppc64/boot/main.c +++ b/arch/ppc64/boot/main.c @@ -203,8 +203,15 @@ void start(unsigned long a1, unsigned long a2, void *promptr, void *sp) if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0) break; } - vmlinux.size = (unsigned long)elf64ph->p_filesz; - vmlinux.memsize = (unsigned long)elf64ph->p_memsz; + vmlinux.size = (unsigned long)elf64ph->p_filesz + + (unsigned long)elf64ph->p_offset; + /* We need to claim the memsize plus the file offset since gzip + * will expand the header (file offset), then the kernel, then + * possible rubbish we don't care about. But the kernel bss must + * be claimed (it will be zero'd by the kernel itself) + */ + vmlinux.memsize = (unsigned long)elf64ph->p_memsz + + (unsigned long)elf64ph->p_offset; printf("Allocating 0x%lx bytes for kernel ...\n\r", vmlinux.memsize); vmlinux.addr = try_claim(vmlinux.memsize); if (vmlinux.addr == 0) { diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index c441aebe764..58b19f10765 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -11,12 +11,11 @@ obj-y := misc.o prom.o endif -obj-y += irq.o idle.o dma.o \ - align.o pacaData.o \ - udbg.o ioctl32.o \ +obj-y += idle.o dma.o \ + align.o \ + udbg.o \ rtc.o \ - cpu_setup_power4.o \ - iommu.o sysfs.o vdso.o firmware.o + iommu.o vdso.o obj-y += vdso32/ vdso64/ pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o @@ -31,15 +30,10 @@ endif obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o obj-$(CONFIG_KEXEC) += machine_kexec.o -obj-$(CONFIG_EEH) += eeh.o -obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_MODULES) += module.o ifneq ($(CONFIG_PPC_MERGE),y) obj-$(CONFIG_MODULES) += ppc_ksyms.o endif -obj-$(CONFIG_PPC_RTAS) += rtas_pci.o -obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o ifneq ($(CONFIG_PPC_MERGE),y) obj-$(CONFIG_BOOTX_TEXT) += btext.o @@ -52,8 +46,6 @@ obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o obj-$(CONFIG_KPROBES) += kprobes.o -CFLAGS_ioctl32.o += -Ifs/ - ifneq ($(CONFIG_PPC_MERGE),y) ifeq ($(CONFIG_PPC_ISERIES),y) arch/ppc64/kernel/head.o: arch/powerpc/kernel/lparmap.s diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c index 504dee836d2..84ab5c18ef5 100644 --- a/arch/ppc64/kernel/asm-offsets.c +++ b/arch/ppc64/kernel/asm-offsets.c @@ -74,7 +74,6 @@ int main(void) DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); - DEFINE(PLATFORM, offsetof(struct systemcfg, platform)); DEFINE(PLATFORM_LPAR, PLATFORM_LPAR); /* paca */ @@ -93,6 +92,9 @@ int main(void) DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_PPC_64K_PAGES + DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir)); +#endif #ifdef CONFIG_HUGETLB_PAGE DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); diff --git a/arch/ppc64/kernel/cpu_setup_power4.S b/arch/ppc64/kernel/cpu_setup_power4.S deleted file mode 100644 index 1fb673c511f..00000000000 --- a/arch/ppc64/kernel/cpu_setup_power4.S +++ /dev/null @@ -1,233 +0,0 @@ -/* - * This file contains low level CPU setup functions. - * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <linux/config.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/cputable.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cache.h> - -_GLOBAL(__970_cpu_preinit) - /* - * Do nothing if not running in HV mode - */ - mfmsr r0 - rldicl. r0,r0,4,63 - beqlr - - /* - * Deal only with PPC970 and PPC970FX. - */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 - beq 1f - cmpwi r0,0x3c - beq 1f - cmpwi r0,0x44 - bnelr -1: - - /* Make sure HID4:rm_ci is off before MMU is turned off, that large - * pages are enabled with HID4:61 and clear HID5:DCBZ_size and - * HID5:DCBZ32_ill - */ - li r0,0 - mfspr r3,SPRN_HID4 - rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ - rldimi r3,r0,2,61 /* clear bit 61 (lg_pg_en) */ - sync - mtspr SPRN_HID4,r3 - isync - sync - mfspr r3,SPRN_HID5 - rldimi r3,r0,6,56 /* clear bits 56 & 57 (DCBZ*) */ - sync - mtspr SPRN_HID5,r3 - isync - sync - - /* Setup some basic HID1 features */ - mfspr r0,SPRN_HID1 - li r3,0x1200 /* enable i-fetch cacheability */ - sldi r3,r3,44 /* and prefetch */ - or r0,r0,r3 - mtspr SPRN_HID1,r0 - mtspr SPRN_HID1,r0 - isync - - /* Clear HIOR */ - li r0,0 - sync - mtspr SPRN_HIOR,0 /* Clear interrupt prefix */ - isync - blr - -_GLOBAL(__setup_cpu_power4) - blr - -_GLOBAL(__setup_cpu_be) - /* Set large page sizes LP=0: 16MB, LP=1: 64KB */ - addi r3, 0, 0 - ori r3, r3, HID6_LB - sldi r3, r3, 32 - nor r3, r3, r3 - mfspr r4, SPRN_HID6 - and r4, r4, r3 - addi r3, 0, 0x02000 - sldi r3, r3, 32 - or r4, r4, r3 - mtspr SPRN_HID6, r4 - blr - -_GLOBAL(__setup_cpu_ppc970) - mfspr r0,SPRN_HID0 - li r11,5 /* clear DOZE and SLEEP */ - rldimi r0,r11,52,8 /* set NAP and DPM */ - mtspr SPRN_HID0,r0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - mfspr r0,SPRN_HID0 - sync - isync - blr - -/* Definitions for the table use to save CPU states */ -#define CS_HID0 0 -#define CS_HID1 8 -#define CS_HID4 16 -#define CS_HID5 24 -#define CS_SIZE 32 - - .data - .balign L1_CACHE_BYTES,0 -cpu_state_storage: - .space CS_SIZE - .balign L1_CACHE_BYTES,0 - .text - -/* Called in normal context to backup CPU 0 state. This - * does not include cache settings. This function is also - * called for machine sleep. This does not include the MMU - * setup, BATs, etc... but rather the "special" registers - * like HID0, HID1, HID4, etc... - */ -_GLOBAL(__save_cpu_setup) - /* Some CR fields are volatile, we back it up all */ - mfcr r7 - - /* Get storage ptr */ - LOADADDR(r5,cpu_state_storage) - - /* We only deal with 970 for now */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 - beq 1f - cmpwi r0,0x3c - beq 1f - cmpwi r0,0x44 - bne 2f - -1: /* Save HID0,1,4 and 5 */ - mfspr r3,SPRN_HID0 - std r3,CS_HID0(r5) - mfspr r3,SPRN_HID1 - std r3,CS_HID1(r5) - mfspr r3,SPRN_HID4 - std r3,CS_HID4(r5) - mfspr r3,SPRN_HID5 - std r3,CS_HID5(r5) - -2: - mtcr r7 - blr - -/* Called with no MMU context (typically MSR:IR/DR off) to - * restore CPU state as backed up by the previous - * function. This does not include cache setting - */ -_GLOBAL(__restore_cpu_setup) - /* Get storage ptr (FIXME when using anton reloc as we - * are running with translation disabled here - */ - LOADADDR(r5,cpu_state_storage) - - /* We only deal with 970 for now */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 - beq 1f - cmpwi r0,0x3c - beq 1f - cmpwi r0,0x44 - bnelr - -1: /* Before accessing memory, we make sure rm_ci is clear */ - li r0,0 - mfspr r3,SPRN_HID4 - rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ - sync - mtspr SPRN_HID4,r3 - isync - sync - - /* Clear interrupt prefix */ - li r0,0 - sync - mtspr SPRN_HIOR,0 - isync - - /* Restore HID0 */ - ld r3,CS_HID0(r5) - sync - isync - mtspr SPRN_HID0,r3 - mfspr r3,SPRN_HID0 - mfspr r3,SPRN_HID0 - mfspr r3,SPRN_HID0 - mfspr r3,SPRN_HID0 - mfspr r3,SPRN_HID0 - mfspr r3,SPRN_HID0 - sync - isync - - /* Restore HID1 */ - ld r3,CS_HID1(r5) - sync - isync - mtspr SPRN_HID1,r3 - mtspr SPRN_HID1,r3 - sync - isync - - /* Restore HID4 */ - ld r3,CS_HID4(r5) - sync - isync - mtspr SPRN_HID4,r3 - sync - isync - - /* Restore HID5 */ - ld r3,CS_HID5(r5) - sync - isync - mtspr SPRN_HID5,r3 - sync - isync - blr - diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c deleted file mode 100644 index 035d1b14a20..00000000000 --- a/arch/ppc64/kernel/eeh.c +++ /dev/null @@ -1,943 +0,0 @@ -/* - * eeh.c - * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/bootmem.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/mm.h> -#include <linux/notifier.h> -#include <linux/pci.h> -#include <linux/proc_fs.h> -#include <linux/rbtree.h> -#include <linux/seq_file.h> -#include <linux/spinlock.h> -#include <asm/eeh.h> -#include <asm/io.h> -#include <asm/machdep.h> -#include <asm/rtas.h> -#include <asm/atomic.h> -#include <asm/systemcfg.h> -#include <asm/ppc-pci.h> - -#undef DEBUG - -/** Overview: - * EEH, or "Extended Error Handling" is a PCI bridge technology for - * dealing with PCI bus errors that can't be dealt with within the - * usual PCI framework, except by check-stopping the CPU. Systems - * that are designed for high-availability/reliability cannot afford - * to crash due to a "mere" PCI error, thus the need for EEH. - * An EEH-capable bridge operates by converting a detected error - * into a "slot freeze", taking the PCI adapter off-line, making - * the slot behave, from the OS'es point of view, as if the slot - * were "empty": all reads return 0xff's and all writes are silently - * ignored. EEH slot isolation events can be triggered by parity - * errors on the address or data busses (e.g. during posted writes), - * which in turn might be caused by dust, vibration, humidity, - * radioactivity or plain-old failed hardware. - * - * Note, however, that one of the leading causes of EEH slot - * freeze events are buggy device drivers, buggy device microcode, - * or buggy device hardware. This is because any attempt by the - * device to bus-master data to a memory address that is not - * assigned to the device will trigger a slot freeze. (The idea - * is to prevent devices-gone-wild from corrupting system memory). - * Buggy hardware/drivers will have a miserable time co-existing - * with EEH. - * - * Ideally, a PCI device driver, when suspecting that an isolation - * event has occured (e.g. by reading 0xff's), will then ask EEH - * whether this is the case, and then take appropriate steps to - * reset the PCI slot, the PCI device, and then resume operations. - * However, until that day, the checking is done here, with the - * eeh_check_failure() routine embedded in the MMIO macros. If - * the slot is found to be isolated, an "EEH Event" is synthesized - * and sent out for processing. - */ - -/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */ -#define BUID_HI(buid) ((buid) >> 32) -#define BUID_LO(buid) ((buid) & 0xffffffff) - -/* EEH event workqueue setup. */ -static DEFINE_SPINLOCK(eeh_eventlist_lock); -LIST_HEAD(eeh_eventlist); -static void eeh_event_handler(void *); -DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL); - -static struct notifier_block *eeh_notifier_chain; - -/* - * If a device driver keeps reading an MMIO register in an interrupt - * handler after a slot isolation event has occurred, we assume it - * is broken and panic. This sets the threshold for how many read - * attempts we allow before panicking. - */ -#define EEH_MAX_FAILS 1000 -static atomic_t eeh_fail_count; - -/* RTAS tokens */ -static int ibm_set_eeh_option; -static int ibm_set_slot_reset; -static int ibm_read_slot_reset_state; -static int ibm_read_slot_reset_state2; -static int ibm_slot_error_detail; - -static int eeh_subsystem_enabled; - -/* Buffer for reporting slot-error-detail rtas calls */ -static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; -static DEFINE_SPINLOCK(slot_errbuf_lock); -static int eeh_error_buf_size; - -/* System monitoring statistics */ -static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); -static DEFINE_PER_CPU(unsigned long, false_positives); -static DEFINE_PER_CPU(unsigned long, ignored_failures); -static DEFINE_PER_CPU(unsigned long, slot_resets); - -/** - * The pci address cache subsystem. This subsystem places - * PCI device address resources into a red-black tree, sorted - * according to the address range, so that given only an i/o - * address, the corresponding PCI device can be **quickly** - * found. It is safe to perform an address lookup in an interrupt - * context; this ability is an important feature. - * - * Currently, the only customer of this code is the EEH subsystem; - * thus, this code has been somewhat tailored to suit EEH better. - * In particular, the cache does *not* hold the addresses of devices - * for which EEH is not enabled. - * - * (Implementation Note: The RB tree seems to be better/faster - * than any hash algo I could think of for this problem, even - * with the penalty of slow pointer chases for d-cache misses). - */ -struct pci_io_addr_range -{ - struct rb_node rb_node; - unsigned long addr_lo; - unsigned long addr_hi; - struct pci_dev *pcidev; - unsigned int flags; -}; - -static struct pci_io_addr_cache -{ - struct rb_root rb_root; - spinlock_t piar_lock; -} pci_io_addr_cache_root; - -static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) -{ - struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; - - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - - if (addr < piar->addr_lo) { - n = n->rb_left; - } else { - if (addr > piar->addr_hi) { - n = n->rb_right; - } else { - pci_dev_get(piar->pcidev); - return piar->pcidev; - } - } - } - - return NULL; -} - -/** - * pci_get_device_by_addr - Get device, given only address - * @addr: mmio (PIO) phys address or i/o port number - * - * Given an mmio phys address, or a port number, find a pci device - * that implements this address. Be sure to pci_dev_put the device - * when finished. I/O port numbers are assumed to be offset - * from zero (that is, they do *not* have pci_io_addr added in). - * It is safe to call this function within an interrupt. - */ -static struct pci_dev *pci_get_device_by_addr(unsigned long addr) -{ - struct pci_dev *dev; - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - dev = __pci_get_device_by_addr(addr); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); - return dev; -} - -#ifdef DEBUG -/* - * Handy-dandy debug print routine, does nothing more - * than print out the contents of our addr cache. - */ -static void pci_addr_cache_print(struct pci_io_addr_cache *cache) -{ - struct rb_node *n; - int cnt = 0; - - n = rb_first(&cache->rb_root); - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", - (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); - cnt++; - n = rb_next(n); - } -} -#endif - -/* Insert address range into the rb tree. */ -static struct pci_io_addr_range * -pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, - unsigned long ahi, unsigned int flags) -{ - struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; - struct rb_node *parent = NULL; - struct pci_io_addr_range *piar; - - /* Walk tree, find a place to insert into tree */ - while (*p) { - parent = *p; - piar = rb_entry(parent, struct pci_io_addr_range, rb_node); - if (alo < piar->addr_lo) { - p = &parent->rb_left; - } else if (ahi > piar->addr_hi) { - p = &parent->rb_right; - } else { - if (dev != piar->pcidev || - alo != piar->addr_lo || ahi != piar->addr_hi) { - printk(KERN_WARNING "PIAR: overlapping address range\n"); - } - return piar; - } - } - piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); - if (!piar) - return NULL; - - piar->addr_lo = alo; - piar->addr_hi = ahi; - piar->pcidev = dev; - piar->flags = flags; - - rb_link_node(&piar->rb_node, parent, p); - rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); - - return piar; -} - -static void __pci_addr_cache_insert_device(struct pci_dev *dev) -{ - struct device_node *dn; - struct pci_dn *pdn; - int i; - int inserted = 0; - - dn = pci_device_to_OF_node(dev); - if (!dn) { - printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", - pci_name(dev)); - return; - } - - /* Skip any devices for which EEH is not enabled. */ - pdn = dn->data; - if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || - pdn->eeh_mode & EEH_MODE_NOCHECK) { -#ifdef DEBUG - printk(KERN_INFO "PCI: skip building address cache for=%s\n", - pci_name(dev)); -#endif - return; - } - - /* The cache holds a reference to the device... */ - pci_dev_get(dev); - - /* Walk resources on this device, poke them into the tree */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - unsigned long start = pci_resource_start(dev,i); - unsigned long end = pci_resource_end(dev,i); - unsigned int flags = pci_resource_flags(dev,i); - - /* We are interested only bus addresses, not dma or other stuff */ - if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if (start == 0 || ~start == 0 || end == 0 || ~end == 0) - continue; - pci_addr_cache_insert(dev, start, end, flags); - inserted = 1; - } - - /* If there was nothing to add, the cache has no reference... */ - if (!inserted) - pci_dev_put(dev); -} - -/** - * pci_addr_cache_insert_device - Add a device to the address cache - * @dev: PCI device whose I/O addresses we are interested in. - * - * In order to support the fast lookup of devices based on addresses, - * we maintain a cache of devices that can be quickly searched. - * This routine adds a device to that cache. - */ -void pci_addr_cache_insert_device(struct pci_dev *dev) -{ - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_insert_device(dev); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -} - -static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) -{ - struct rb_node *n; - int removed = 0; - -restart: - n = rb_first(&pci_io_addr_cache_root.rb_root); - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - - if (piar->pcidev == dev) { - rb_erase(n, &pci_io_addr_cache_root.rb_root); - removed = 1; - kfree(piar); - goto restart; - } - n = rb_next(n); - } - - /* The cache no longer holds its reference to this device... */ - if (removed) - pci_dev_put(dev); -} - -/** - * pci_addr_cache_remove_device - remove pci device from addr cache - * @dev: device to remove - * - * Remove a device from the addr-cache tree. - * This is potentially expensive, since it will walk - * the tree multiple times (once per resource). - * But so what; device removal doesn't need to be that fast. - */ -void pci_addr_cache_remove_device(struct pci_dev *dev) -{ - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_remove_device(dev); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -} - -/** - * pci_addr_cache_build - Build a cache of I/O addresses - * - * Build a cache of pci i/o addresses. This cache will be used to - * find the pci device that corresponds to a given address. - * This routine scans all pci busses to build the cache. - * Must be run late in boot process, after the pci controllers - * have been scaned for devices (after all device resources are known). - */ -void __init pci_addr_cache_build(void) -{ - struct pci_dev *dev = NULL; - - spin_lock_init(&pci_io_addr_cache_root.piar_lock); - - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - /* Ignore PCI bridges ( XXX why ??) */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) { - continue; - } - pci_addr_cache_insert_device(dev); - } - -#ifdef DEBUG - /* Verify tree built up above, echo back the list of addrs. */ - pci_addr_cache_print(&pci_io_addr_cache_root); -#endif -} - -/* --------------------------------------------------------------- */ -/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */ - -/** - * eeh_register_notifier - Register to find out about EEH events. - * @nb: notifier block to callback on events - */ -int eeh_register_notifier(struct notifier_block *nb) -{ - return notifier_chain_register(&eeh_notifier_chain, nb); -} - -/** - * eeh_unregister_notifier - Unregister to an EEH event notifier. - * @nb: notifier block to callback on events - */ -int eeh_unregister_notifier(struct notifier_block *nb) -{ - return notifier_chain_unregister(&eeh_notifier_chain, nb); -} - -/** - * read_slot_reset_state - Read the reset state of a device node's slot - * @dn: device node to read - * @rets: array to return results in - */ -static int read_slot_reset_state(struct device_node *dn, int rets[]) -{ - int token, outputs; - struct pci_dn *pdn = dn->data; - - if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { - token = ibm_read_slot_reset_state2; - outputs = 4; - } else { - token = ibm_read_slot_reset_state; - outputs = 3; - } - - return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr, - BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); -} - -/** - * eeh_panic - call panic() for an eeh event that cannot be handled. - * The philosophy of this routine is that it is better to panic and - * halt the OS than it is to risk possible data corruption by - * oblivious device drivers that don't know better. - * - * @dev pci device that had an eeh event - * @reset_state current reset state of the device slot - */ -static void eeh_panic(struct pci_dev *dev, int reset_state) -{ - /* - * XXX We should create a separate sysctl for this. - * - * Since the panic_on_oops sysctl is used to halt the system - * in light of potential corruption, we can use it here. - */ - if (panic_on_oops) - panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, - pci_name(dev)); - else { - __get_cpu_var(ignored_failures)++; - printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", - reset_state, pci_name(dev)); - } -} - -/** - * eeh_event_handler - dispatch EEH events. The detection of a frozen - * slot can occur inside an interrupt, where it can be hard to do - * anything about it. The goal of this routine is to pull these - * detection events out of the context of the interrupt handler, and - * re-dispatch them for processing at a later time in a normal context. - * - * @dummy - unused - */ -static void eeh_event_handler(void *dummy) -{ - unsigned long flags; - struct eeh_event *event; - - while (1) { - spin_lock_irqsave(&eeh_eventlist_lock, flags); - event = NULL; - if (!list_empty(&eeh_eventlist)) { - event = list_entry(eeh_eventlist.next, struct eeh_event, list); - list_del(&event->list); - } - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - if (event == NULL) - break; - - printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device " - "%s\n", event->reset_state, - pci_name(event->dev)); - - atomic_set(&eeh_fail_count, 0); - notifier_call_chain (&eeh_notifier_chain, - EEH_NOTIFY_FREEZE, event); - - __get_cpu_var(slot_resets)++; - - pci_dev_put(event->dev); - kfree(event); - } -} - -/** - * eeh_token_to_phys - convert EEH address token to phys address - * @token i/o token, should be address in the form 0xE.... - */ -static inline unsigned long eeh_token_to_phys(unsigned long token) -{ - pte_t *ptep; - unsigned long pa; - - ptep = find_linux_pte(init_mm.pgd, token); - if (!ptep) - return token; - pa = pte_pfn(*ptep) << PAGE_SHIFT; - - return pa | (token & (PAGE_SIZE-1)); -} - -/** - * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze - * @dn device node - * @dev pci device, if known - * - * Check for an EEH failure for the given device node. Call this - * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze. This routine - * will query firmware for the EEH status. - * - * Returns 0 if there has not been an EEH error; otherwise returns - * a non-zero value and queues up a solt isolation event notification. - * - * It is safe to call this routine in an interrupt context. - */ -int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) -{ - int ret; - int rets[3]; - unsigned long flags; - int rc, reset_state; - struct eeh_event *event; - struct pci_dn *pdn; - - __get_cpu_var(total_mmio_ffs)++; - - if (!eeh_subsystem_enabled) - return 0; - - if (!dn) - return 0; - pdn = dn->data; - - /* Access to IO BARs might get this far and still not want checking. */ - if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) || - pdn->eeh_mode & EEH_MODE_NOCHECK) { - return 0; - } - - if (!pdn->eeh_config_addr) { - return 0; - } - - /* - * If we already have a pending isolation event for this - * slot, we know it's bad already, we don't need to check... - */ - if (pdn->eeh_mode & EEH_MODE_ISOLATED) { - atomic_inc(&eeh_fail_count); - if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) { - /* re-read the slot reset state */ - if (read_slot_reset_state(dn, rets) != 0) - rets[0] = -1; /* reset state unknown */ - eeh_panic(dev, rets[0]); - } - return 0; - } - - /* - * Now test for an EEH failure. This is VERY expensive. - * Note that the eeh_config_addr may be a parent device - * in the case of a device behind a bridge, or it may be - * function zero of a multi-function device. - * In any case they must share a common PHB. - */ - ret = read_slot_reset_state(dn, rets); - if (!(ret == 0 && rets[1] == 1 && (rets[0] == 2 || rets[0] == 4))) { - __get_cpu_var(false_positives)++; - return 0; - } - - /* prevent repeated reports of this failure */ - pdn->eeh_mode |= EEH_MODE_ISOLATED; - - reset_state = rets[0]; - - spin_lock_irqsave(&slot_errbuf_lock, flags); - memset(slot_errbuf, 0, eeh_error_buf_size); - - rc = rtas_call(ibm_slot_error_detail, - 8, 1, NULL, pdn->eeh_config_addr, - BUID_HI(pdn->phb->buid), - BUID_LO(pdn->phb->buid), NULL, 0, - virt_to_phys(slot_errbuf), - eeh_error_buf_size, - 1 /* Temporary Error */); - - if (rc == 0) - log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); - spin_unlock_irqrestore(&slot_errbuf_lock, flags); - - printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n", - rets[0], dn->name, dn->full_name); - event = kmalloc(sizeof(*event), GFP_ATOMIC); - if (event == NULL) { - eeh_panic(dev, reset_state); - return 1; - } - - event->dev = dev; - event->dn = dn; - event->reset_state = reset_state; - - /* We may or may not be called in an interrupt context */ - spin_lock_irqsave(&eeh_eventlist_lock, flags); - list_add(&event->list, &eeh_eventlist); - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - - /* Most EEH events are due to device driver bugs. Having - * a stack trace will help the device-driver authors figure - * out what happened. So print that out. */ - dump_stack(); - schedule_work(&eeh_event_wq); - - return 0; -} - -EXPORT_SYMBOL(eeh_dn_check_failure); - -/** - * eeh_check_failure - check if all 1's data is due to EEH slot freeze - * @token i/o token, should be address in the form 0xA.... - * @val value, should be all 1's (XXX why do we need this arg??) - * - * Check for an eeh failure at the given token address. - * Check for an EEH failure at the given token address. Call this - * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze event. This routine - * will query firmware for the EEH status. - * - * Note this routine is safe to call in an interrupt context. - */ -unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) -{ - unsigned long addr; - struct pci_dev *dev; - struct device_node *dn; - - /* Finding the phys addr + pci device; this is pretty quick. */ - addr = eeh_token_to_phys((unsigned long __force) token); - dev = pci_get_device_by_addr(addr); - if (!dev) - return val; - - dn = pci_device_to_OF_node(dev); - eeh_dn_check_failure (dn, dev); - - pci_dev_put(dev); - return val; -} - -EXPORT_SYMBOL(eeh_check_failure); - -struct eeh_early_enable_info { - unsigned int buid_hi; - unsigned int buid_lo; -}; - -/* Enable eeh for the given device node. */ -static void *early_enable_eeh(struct device_node *dn, void *data) -{ - struct eeh_early_enable_info *info = data; - int ret; - char *status = get_property(dn, "status", NULL); - u32 *class_code = (u32 *)get_property(dn, "class-code", NULL); - u32 *vendor_id = (u32 *)get_property(dn, "vendor-id", NULL); - u32 *device_id = (u32 *)get_property(dn, "device-id", NULL); - u32 *regs; - int enable; - struct pci_dn *pdn = dn->data; - - pdn->eeh_mode = 0; - - if (status && strcmp(status, "ok") != 0) - return NULL; /* ignore devices with bad status */ - - /* Ignore bad nodes. */ - if (!class_code || !vendor_id || !device_id) - return NULL; - - /* There is nothing to check on PCI to ISA bridges */ - if (dn->type && !strcmp(dn->type, "isa")) { - pdn->eeh_mode |= EEH_MODE_NOCHECK; - return NULL; - } - - /* - * Now decide if we are going to "Disable" EEH checking - * for this device. We still run with the EEH hardware active, - * but we won't be checking for ff's. This means a driver - * could return bad data (very bad!), an interrupt handler could - * hang waiting on status bits that won't change, etc. - * But there are a few cases like display devices that make sense. - */ - enable = 1; /* i.e. we will do checking */ - if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY) - enable = 0; - - if (!enable) - pdn->eeh_mode |= EEH_MODE_NOCHECK; - - /* Ok... see if this device supports EEH. Some do, some don't, - * and the only way to find out is to check each and every one. */ - regs = (u32 *)get_property(dn, "reg", NULL); - if (regs) { - /* First register entry is addr (00BBSS00) */ - /* Try to enable eeh */ - ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, - regs[0], info->buid_hi, info->buid_lo, - EEH_ENABLE); - if (ret == 0) { - eeh_subsystem_enabled = 1; - pdn->eeh_mode |= EEH_MODE_SUPPORTED; - pdn->eeh_config_addr = regs[0]; -#ifdef DEBUG - printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name); -#endif - } else { - - /* This device doesn't support EEH, but it may have an - * EEH parent, in which case we mark it as supported. */ - if (dn->parent && dn->parent->data - && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { - /* Parent supports EEH. */ - pdn->eeh_mode |= EEH_MODE_SUPPORTED; - pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr; - return NULL; - } - } - } else { - printk(KERN_WARNING "EEH: %s: unable to get reg property.\n", - dn->full_name); - } - - return NULL; -} - -/* - * Initialize EEH by trying to enable it for all of the adapters in the system. - * As a side effect we can determine here if eeh is supported at all. - * Note that we leave EEH on so failed config cycles won't cause a machine - * check. If a user turns off EEH for a particular adapter they are really - * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't - * grant access to a slot if EEH isn't enabled, and so we always enable - * EEH for all slots/all devices. - * - * The eeh-force-off option disables EEH checking globally, for all slots. - * Even if force-off is set, the EEH hardware is still enabled, so that - * newer systems can boot. - */ -void __init eeh_init(void) -{ - struct device_node *phb, *np; - struct eeh_early_enable_info info; - - np = of_find_node_by_path("/rtas"); - if (np == NULL) - return; - - ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); - ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); - ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); - ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); - ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); - - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) - return; - - eeh_error_buf_size = rtas_token("rtas-error-log-max"); - if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { - eeh_error_buf_size = 1024; - } - if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { - printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated " - "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX); - eeh_error_buf_size = RTAS_ERROR_LOG_MAX; - } - - /* Enable EEH for all adapters. Note that eeh requires buid's */ - for (phb = of_find_node_by_name(NULL, "pci"); phb; - phb = of_find_node_by_name(phb, "pci")) { - unsigned long buid; - struct pci_dn *pci; - - buid = get_phb_buid(phb); - if (buid == 0 || phb->data == NULL) - continue; - - pci = phb->data; - info.buid_lo = BUID_LO(buid); - info.buid_hi = BUID_HI(buid); - traverse_pci_devices(phb, early_enable_eeh, &info); - } - - if (eeh_subsystem_enabled) - printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n"); - else - printk(KERN_WARNING "EEH: No capable adapters found\n"); -} - -/** - * eeh_add_device_early - enable EEH for the indicated device_node - * @dn: device node for which to set up EEH - * - * This routine must be used to perform EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - * This routine must be called before any i/o is performed to the - * adapter (inluding any config-space i/o). - * Whether this actually enables EEH or not for this device depends - * on the CEC architecture, type of the device, on earlier boot - * command-line arguments & etc. - */ -void eeh_add_device_early(struct device_node *dn) -{ - struct pci_controller *phb; - struct eeh_early_enable_info info; - - if (!dn || !dn->data) - return; - phb = PCI_DN(dn)->phb; - if (NULL == phb || 0 == phb->buid) { - printk(KERN_WARNING "EEH: Expected buid but found none\n"); - return; - } - - info.buid_hi = BUID_HI(phb->buid); - info.buid_lo = BUID_LO(phb->buid); - early_enable_eeh(dn, &info); -} -EXPORT_SYMBOL(eeh_add_device_early); - -/** - * eeh_add_device_late - perform EEH initialization for the indicated pci device - * @dev: pci device for which to set up EEH - * - * This routine must be used to complete EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - */ -void eeh_add_device_late(struct pci_dev *dev) -{ - if (!dev || !eeh_subsystem_enabled) - return; - -#ifdef DEBUG - printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev)); -#endif - - pci_addr_cache_insert_device (dev); -} -EXPORT_SYMBOL(eeh_add_device_late); - -/** - * eeh_remove_device - undo EEH setup for the indicated pci device - * @dev: pci device to be removed - * - * This routine should be when a device is removed from a running - * system (e.g. by hotplug or dlpar). - */ -void eeh_remove_device(struct pci_dev *dev) -{ - if (!dev || !eeh_subsystem_enabled) - return; - - /* Unregister the device with the EEH/PCI address search system */ -#ifdef DEBUG - printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev)); -#endif - pci_addr_cache_remove_device(dev); -} -EXPORT_SYMBOL(eeh_remove_device); - -static int proc_eeh_show(struct seq_file *m, void *v) -{ - unsigned int cpu; - unsigned long ffs = 0, positives = 0, failures = 0; - unsigned long resets = 0; - - for_each_cpu(cpu) { - ffs += per_cpu(total_mmio_ffs, cpu); - positives += per_cpu(false_positives, cpu); - failures += per_cpu(ignored_failures, cpu); - resets += per_cpu(slot_resets, cpu); - } - - if (0 == eeh_subsystem_enabled) { - seq_printf(m, "EEH Subsystem is globally disabled\n"); - seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs); - } else { - seq_printf(m, "EEH Subsystem is enabled\n"); - seq_printf(m, "eeh_total_mmio_ffs=%ld\n" - "eeh_false_positives=%ld\n" - "eeh_ignored_failures=%ld\n" - "eeh_slot_resets=%ld\n" - "eeh_fail_count=%d\n", - ffs, positives, failures, resets, - eeh_fail_count.counter); - } - - return 0; -} - -static int proc_eeh_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_eeh_show, NULL); -} - -static struct file_operations proc_eeh_operations = { - .open = proc_eeh_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init eeh_init_proc(void) -{ - struct proc_dir_entry *e; - - if (systemcfg->platform & PLATFORM_PSERIES) { - e = create_proc_entry("ppc64/eeh", 0, NULL); - if (e) - e->proc_fops = &proc_eeh_operations; - } - - return 0; -} -__initcall(eeh_init_proc); diff --git a/arch/ppc64/kernel/firmware.c b/arch/ppc64/kernel/firmware.c deleted file mode 100644 index d8432c0fb27..00000000000 --- a/arch/ppc64/kernel/firmware.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * arch/ppc64/kernel/firmware.c - * - * Extracted from cputable.c - * - * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) - * - * Modifications for ppc64: - * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> - * Copyright (C) 2005 Stephen Rothwell, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/config.h> - -#include <asm/firmware.h> - -unsigned long ppc64_firmware_features; - -#ifdef CONFIG_PPC_PSERIES -firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, -}; -#endif diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index db1cf397be2..1c869ea72d2 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -28,7 +28,6 @@ #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/systemcfg.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/bug.h> @@ -195,11 +194,11 @@ exception_marker: #define EX_R12 24 #define EX_R13 32 #define EX_SRR0 40 -#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */ #define EX_DAR 48 -#define EX_LR 48 /* SLB miss saves LR, but not DAR */ #define EX_DSISR 56 #define EX_CCR 60 +#define EX_R3 64 +#define EX_LR 72 #define EXCEPTION_PROLOG_PSERIES(area, label) \ mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ @@ -419,17 +418,22 @@ data_access_slb_pSeries: mtspr SPRN_SPRG1,r13 RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + mfcr r9 +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r9,SPRN_SPRG1 - std r9,PACA_EXSLB+EX_R13(r13) - mfcr r9 + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ - mfspr r3,SPRN_DAR - b .do_slb_miss /* Rel. branch works in real mode */ + b .slb_miss_realmode /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x400, instruction_access) @@ -440,17 +444,22 @@ instruction_access_slb_pSeries: mtspr SPRN_SPRG1,r13 RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + mfcr r9 +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r9,SPRN_SPRG1 - std r9,PACA_EXSLB+EX_R13(r13) - mfcr r9 + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - b .do_slb_miss /* Rel. branch works in real mode */ + b .slb_miss_realmode /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) STD_EXCEPTION_PSERIES(0x600, alignment) @@ -509,6 +518,38 @@ _GLOBAL(do_stab_bolted_pSeries) EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) /* + * We have some room here we use that to put + * the peries slb miss user trampoline code so it's reasonably + * away from slb_miss_user_common to avoid problems with rfid + * + * This is used for when the SLB miss handler has to go virtual, + * which doesn't happen for now anymore but will once we re-implement + * dynamic VSIDs for shared page tables + */ +#ifdef __DISABLED__ +slb_miss_user_pseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r10,SPRG1 + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + clrrdi r12,r13,32 + mfmsr r10 + mfspr r11,SRR0 /* save SRR0 */ + ori r12,r12,slb_miss_user_common@l /* virt addr of handler */ + ori r10,r10,MSR_IR|MSR_DR|MSR_RI + mtspr SRR0,r12 + mfspr r12,SRR1 /* and SRR1 */ + mtspr SRR1,r10 + rfid + b . /* prevent spec. execution */ +#endif /* __DISABLED__ */ + +/* * Vectors for the FWNMI option. Share common code. */ .globl system_reset_fwnmi @@ -559,22 +600,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) .globl data_access_slb_iSeries data_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) mfspr r3,SPRN_DAR - b .do_slb_miss + std r9,PACA_EXSLB+EX_R9(r13) + mfcr r9 +#ifdef __DISABLED__ + cmpdi r3,0 + bge slb_miss_user_iseries +#endif + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) + ld r12,PACALPPACA+LPPACASRR1(r13); + b .slb_miss_realmode STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) .globl instruction_access_slb_iSeries instruction_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) - ld r3,PACALPPACA+LPPACASRR0(r13) - b .do_slb_miss + ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ + std r9,PACA_EXSLB+EX_R9(r13) + mfcr r9 +#ifdef __DISABLED__ + cmpdi r3,0 + bge .slb_miss_user_iseries +#endif + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) + ld r12,PACALPPACA+LPPACASRR1(r13); + b .slb_miss_realmode + +#ifdef __DISABLED__ +slb_miss_user_iseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r10,SPRG1 + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + EXCEPTION_PROLOG_ISERIES_2 + b slb_miss_user_common +#endif MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) @@ -809,6 +887,126 @@ instruction_access_common: li r5,0x400 b .do_hash_page /* Try to handle as hpte fault */ +/* + * Here is the common SLB miss user that is used when going to virtual + * mode for SLB misses, that is currently not used + */ +#ifdef __DISABLED__ + .align 7 + .globl slb_miss_user_common +slb_miss_user_common: + mflr r10 + std r3,PACA_EXGEN+EX_DAR(r13) + stw r9,PACA_EXGEN+EX_CCR(r13) + std r10,PACA_EXGEN+EX_LR(r13) + std r11,PACA_EXGEN+EX_SRR0(r13) + bl .slb_allocate_user + + ld r10,PACA_EXGEN+EX_LR(r13) + ld r3,PACA_EXGEN+EX_R3(r13) + lwz r9,PACA_EXGEN+EX_CCR(r13) + ld r11,PACA_EXGEN+EX_SRR0(r13) + mtlr r10 + beq- slb_miss_fault + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_user_slb + mfmsr r10 + +.machine push +.machine "power4" + mtcrf 0x80,r9 +.machine pop + + clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */ + mtmsrd r10,1 + + mtspr SRR0,r11 + mtspr SRR1,r12 + + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + rfid + b . + +slb_miss_fault: + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN) + ld r4,PACA_EXGEN+EX_DAR(r13) + li r5,0 + std r4,_DAR(r1) + std r5,_DSISR(r1) + b .handle_page_fault + +unrecov_user_slb: + EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +#endif /* __DISABLED__ */ + + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(slb_miss_realmode) + mflr r10 + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate_realmode + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +#ifdef CONFIG_PPC_ISERIES + ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ +#endif /* CONFIG_PPC_ISERIES */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_slb + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + +#ifdef CONFIG_PPC_ISERIES + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +#endif /* CONFIG_PPC_ISERIES */ + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + .align 7 .globl hardware_interrupt_common .globl hardware_interrupt_entry @@ -1139,62 +1337,6 @@ _GLOBAL(do_stab_bolted) b . /* prevent speculative execution */ /* - * r13 points to the PACA, r9 contains the saved CR, - * r11 and r12 contain the saved SRR0 and SRR1. - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. - */ -_GLOBAL(do_slb_miss) - mflr r10 - - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - - bl .slb_allocate /* handle it */ - - /* All done -- return from exception. */ - - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ -#ifdef CONFIG_PPC_ISERIES - ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ -#endif /* CONFIG_PPC_ISERIES */ - - mtlr r10 - - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- unrecov_slb - -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - -#ifdef CONFIG_PPC_ISERIES - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -#endif /* CONFIG_PPC_ISERIES */ - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ - -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - -/* * Space for CPU0's segment table. * * On iSeries, the hypervisor must fill in at least one entry before @@ -1558,18 +1700,9 @@ _GLOBAL(__secondary_start) HMT_MEDIUM /* Set thread priority to MEDIUM */ ld r2,PACATOC(r13) - li r6,0 - stb r6,PACAPROCENABLED(r13) - -#ifndef CONFIG_PPC_ISERIES - /* Initialize the page table pointer register. */ - LOADADDR(r6,_SDR1) - ld r6,0(r6) /* get the value of _SDR1 */ - mtspr SPRN_SDR1,r6 /* set the htab location */ -#endif - /* Initialize the first segment table (or SLB) entry */ - ld r3,PACASTABVIRT(r13) /* get addr of segment table */ - bl .stab_initialize + + /* Do early setup for that CPU */ + bl .early_setup_secondary /* Initialize the kernel stack. Just a repeat for iSeries. */ LOADADDR(r3,current_set) @@ -1578,37 +1711,6 @@ _GLOBAL(__secondary_start) addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD std r1,PACAKSAVE(r13) - ld r3,PACASTABREAL(r13) /* get raddr of segment table */ - ori r4,r3,1 /* turn on valid bit */ - -#ifdef CONFIG_PPC_ISERIES - li r0,-1 /* hypervisor call */ - li r3,1 - sldi r3,r3,63 /* 0x8000000000000000 */ - ori r3,r3,4 /* 0x8000000000000004 */ - sc /* HvCall_setASR */ -#else - /* set the ASR */ - ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ - ld r3,0(r3) - lwz r3,PLATFORM(r3) /* r3 = platform flags */ - andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ - beq 98f /* branch if result is 0 */ - mfspr r3,SPRN_PVR - srwi r3,r3,16 - cmpwi r3,0x37 /* SStar */ - beq 97f - cmpwi r3,0x36 /* IStar */ - beq 97f - cmpwi r3,0x34 /* Pulsar */ - bne 98f -97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ - HVSC /* Invoking hcall */ - b 99f -98: /* !(rpa hypervisor) || !(star) */ - mtasr r4 /* set the stab location */ -99: -#endif li r7,0 mtlr r7 @@ -1750,40 +1852,6 @@ _STATIC(start_here_multiplatform) mr r3,r31 bl .early_setup - /* set the ASR */ - ld r3,PACASTABREAL(r13) - ori r4,r3,1 /* turn on valid bit */ - ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ - ld r3,0(r3) - lwz r3,PLATFORM(r3) /* r3 = platform flags */ - andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ - beq 98f /* branch if result is 0 */ - mfspr r3,SPRN_PVR - srwi r3,r3,16 - cmpwi r3,0x37 /* SStar */ - beq 97f - cmpwi r3,0x36 /* IStar */ - beq 97f - cmpwi r3,0x34 /* Pulsar */ - bne 98f -97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ - HVSC /* Invoking hcall */ - b 99f -98: /* !(rpa hypervisor) || !(star) */ - mtasr r4 /* set the stab location */ -99: - /* Set SDR1 (hash table pointer) */ - ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ - ld r3,0(r3) - lwz r3,PLATFORM(r3) /* r3 = platform flags */ - /* Test if bit 0 is set (LPAR bit) */ - andi. r3,r3,PLATFORM_LPAR - bne 98f /* branch if result is !0 */ - LOADADDR(r6,_SDR1) /* Only if NOT LPAR */ - sub r6,r6,r26 - ld r6,0(r6) /* get the value of _SDR1 */ - mtspr SPRN_SDR1,r6 /* set the htab location */ -98: LOADADDR(r3,.start_here_common) SET_REG_TO_CONST(r4, MSR_KERNEL) mtspr SPRN_SRR0,r3 diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 8abd2ad9283..b879d3057ef 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -26,22 +26,18 @@ #include <asm/processor.h> #include <asm/cputable.h> #include <asm/time.h> -#include <asm/systemcfg.h> #include <asm/machdep.h> +#include <asm/smp.h> extern void power4_idle(void); void default_idle(void) { - long oldval; unsigned int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - + if (!need_resched()) { while (!need_resched() && !cpu_is_offline(cpu)) { ppc64_runlatch_off(); @@ -54,13 +50,12 @@ void default_idle(void) } HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); } ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } @@ -76,7 +71,9 @@ void native_idle(void) if (need_resched()) { ppc64_runlatch_on(); + preempt_enable_no_resched(); schedule(); + preempt_disable(); } if (cpu_is_offline(smp_processor_id()) && diff --git a/arch/ppc64/kernel/ioctl32.c b/arch/ppc64/kernel/ioctl32.c deleted file mode 100644 index ba4a899045c..00000000000 --- a/arch/ppc64/kernel/ioctl32.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * ioctl32.c: Conversion between 32bit and 64bit native ioctls. - * - * Based on sparc64 ioctl32.c by: - * - * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) - * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) - * - * ppc64 changes: - * - * Copyright (C) 2000 Ken Aaker (kdaaker@rchland.vnet.ibm.com) - * Copyright (C) 2001 Anton Blanchard (antonb@au.ibm.com) - * - * These routines maintain argument size conversion between 32bit and 64bit - * ioctls. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define INCLUDES -#include "compat_ioctl.c" -#include <linux/syscalls.h> - -#define CODE -#include "compat_ioctl.c" - -#define HANDLE_IOCTL(cmd,handler) { cmd, (ioctl_trans_handler_t)handler, NULL }, -#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) - -#define IOCTL_TABLE_START \ - struct ioctl_trans ioctl_start[] = { -#define IOCTL_TABLE_END \ - }; - -IOCTL_TABLE_START -#include <linux/compat_ioctl.h> -#define DECLARES -#include "compat_ioctl.c" - -/* Little p (/dev/rtc, /dev/envctrl, etc.) */ -COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */ -COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */ - -IOCTL_TABLE_END - -int ioctl_table_size = ARRAY_SIZE(ioctl_start); diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c deleted file mode 100644 index 87474584033..00000000000 --- a/arch/ppc64/kernel/irq.c +++ /dev/null @@ -1,519 +0,0 @@ -/* - * arch/ppc/kernel/irq.c - * - * Derived from arch/i386/kernel/irq.c - * Copyright (C) 1992 Linus Torvalds - * Adapted from arch/i386 by Gary Thomas - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * Updated and modified by Cort Dougan (cort@cs.nmt.edu) - * Copyright (C) 1996 Cort Dougan - * Adapted for Power Macintosh by Paul Mackerras - * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) - * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * This file contains the code used by various IRQ handling routines: - * asking for different IRQ's should be done through these routines - * instead of just grabbing them. Thus setups with different IRQ numbers - * shouldn't result in any weird surprises, and installing new handlers - * should be easier. - */ - -#include <linux/errno.h> -#include <linux/module.h> -#include <linux/threads.h> -#include <linux/kernel_stat.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/config.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/irq.h> -#include <linux/proc_fs.h> -#include <linux/random.h> -#include <linux/kallsyms.h> -#include <linux/profile.h> -#include <linux/bitops.h> - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/irq.h> -#include <asm/cache.h> -#include <asm/prom.h> -#include <asm/ptrace.h> -#include <asm/iseries/it_lp_queue.h> -#include <asm/machdep.h> -#include <asm/paca.h> - -#ifdef CONFIG_SMP -extern void iSeries_smp_message_recv( struct pt_regs * ); -#endif - -extern irq_desc_t irq_desc[NR_IRQS]; -EXPORT_SYMBOL(irq_desc); - -int distribute_irqs = 1; -int __irq_offset_value; -int ppc_spurious_interrupts; -u64 ppc64_interrupt_controller; - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v, j; - struct irqaction * action; - irq_desc_t *desc; - unsigned long flags; - - if (i == 0) { - seq_printf(p, " "); - for (j=0; j<NR_CPUS; j++) { - if (cpu_online(j)) - seq_printf(p, "CPU%d ",j); - } - seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - desc = get_irq_desc(i); - spin_lock_irqsave(&desc->lock, flags); - action = desc->action; - if (!action || !action->handler) - goto skip; - seq_printf(p, "%3d: ", i); -#ifdef CONFIG_SMP - for (j = 0; j < NR_CPUS; j++) { - if (cpu_online(j)) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); - } -#else - seq_printf(p, "%10u ", kstat_irqs(i)); -#endif /* CONFIG_SMP */ - if (desc->handler) - seq_printf(p, " %s ", desc->handler->typename ); - else - seq_printf(p, " None "); - seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge "); - seq_printf(p, " %s",action->name); - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&desc->lock, flags); - } else if (i == NR_IRQS) - seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts); - return 0; -} - -#ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(cpumask_t map) -{ - unsigned int irq; - static int warned; - - for_each_irq(irq) { - cpumask_t mask; - - if (irq_desc[irq].status & IRQ_PER_CPU) - continue; - - cpus_and(mask, irq_affinity[irq], map); - if (any_online_cpu(mask) == NR_CPUS) { - printk("Breaking affinity for irq %i\n", irq); - mask = map; - } - if (irq_desc[irq].handler->set_affinity) - irq_desc[irq].handler->set_affinity(irq, mask); - else if (irq_desc[irq].action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); - } - - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif - -extern int noirqdebug; - -/* - * Eventually, this should take an array of interrupts and an array size - * so it can dispatch multiple interrupts. - */ -void ppc_irq_dispatch_handler(struct pt_regs *regs, int irq) -{ - int status; - struct irqaction *action; - int cpu = smp_processor_id(); - irq_desc_t *desc = get_irq_desc(irq); - irqreturn_t action_ret; -#ifdef CONFIG_IRQSTACKS - struct thread_info *curtp, *irqtp; -#endif - - kstat_cpu(cpu).irqs[irq]++; - - if (desc->status & IRQ_PER_CPU) { - /* no locking required for CPU-local interrupts: */ - ack_irq(irq); - action_ret = handle_IRQ_event(irq, regs, desc->action); - desc->handler->end(irq); - return; - } - - spin_lock(&desc->lock); - ack_irq(irq); - /* - REPLAY is when Linux resends an IRQ that was dropped earlier - WAITING is used by probe to mark irqs that are being tested - */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* - * If the IRQ is disabled for whatever reason, we cannot - * use the action we have. - */ - action = NULL; - if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { - action = desc->action; - if (!action || !action->handler) { - ppc_spurious_interrupts++; - printk(KERN_DEBUG "Unhandled interrupt %x, disabled\n", irq); - /* We can't call disable_irq here, it would deadlock */ - if (!desc->depth) - desc->depth = 1; - desc->status |= IRQ_DISABLED; - /* This is not a real spurrious interrupt, we - * have to eoi it, so we jump to out - */ - mask_irq(irq); - goto out; - } - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. - Since we set PENDING, if another processor is handling - a different instance of this same irq, the other processor - will take care of it. - */ - if (unlikely(!action)) - goto out; - - /* - * Edge triggered interrupts need to remember - * pending events. - * This applies to any hw interrupts that allow a second - * instance of the same irq to arrive while we are in do_IRQ - * or in the handler. But the code here only handles the _second_ - * instance of the irq, not the third or fourth. So it is mostly - * useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - spin_unlock(&desc->lock); - -#ifdef CONFIG_IRQSTACKS - /* Switch to the irq stack to handle this */ - curtp = current_thread_info(); - irqtp = hardirq_ctx[smp_processor_id()]; - if (curtp != irqtp) { - irqtp->task = curtp->task; - irqtp->flags = 0; - action_ret = call_handle_IRQ_event(irq, regs, action, irqtp); - irqtp->task = NULL; - if (irqtp->flags) - set_bits(irqtp->flags, &curtp->flags); - } else -#endif - action_ret = handle_IRQ_event(irq, regs, action); - - spin_lock(&desc->lock); - if (!noirqdebug) - note_interrupt(irq, desc, action_ret, regs); - if (likely(!(desc->status & IRQ_PENDING))) - break; - desc->status &= ~IRQ_PENDING; - } -out: - desc->status &= ~IRQ_INPROGRESS; - /* - * The ->end() handler has to deal with interrupts which got - * disabled while the handler was running. - */ - if (desc->handler) { - if (desc->handler->end) - desc->handler->end(irq); - else if (desc->handler->enable) - desc->handler->enable(irq); - } - spin_unlock(&desc->lock); -} - -#ifdef CONFIG_PPC_ISERIES -void do_IRQ(struct pt_regs *regs) -{ - struct paca_struct *lpaca; - - irq_enter(); - -#ifdef CONFIG_DEBUG_STACKOVERFLOW - /* Debugging check for stack overflow: is there less than 2KB free? */ - { - long sp; - - sp = __get_SP() & (THREAD_SIZE-1); - - if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { - printk("do_IRQ: stack overflow: %ld\n", - sp - sizeof(struct thread_info)); - dump_stack(); - } - } -#endif - - lpaca = get_paca(); -#ifdef CONFIG_SMP - if (lpaca->lppaca.int_dword.fields.ipi_cnt) { - lpaca->lppaca.int_dword.fields.ipi_cnt = 0; - iSeries_smp_message_recv(regs); - } -#endif /* CONFIG_SMP */ - if (hvlpevent_is_pending()) - process_hvlpevents(regs); - - irq_exit(); - - if (lpaca->lppaca.int_dword.fields.decr_int) { - lpaca->lppaca.int_dword.fields.decr_int = 0; - /* Signal a fake decrementer interrupt */ - timer_interrupt(regs); - } -} - -#else /* CONFIG_PPC_ISERIES */ - -void do_IRQ(struct pt_regs *regs) -{ - int irq; - - irq_enter(); - -#ifdef CONFIG_DEBUG_STACKOVERFLOW - /* Debugging check for stack overflow: is there less than 2KB free? */ - { - long sp; - - sp = __get_SP() & (THREAD_SIZE-1); - - if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { - printk("do_IRQ: stack overflow: %ld\n", - sp - sizeof(struct thread_info)); - dump_stack(); - } - } -#endif - - irq = ppc_md.get_irq(regs); - - if (irq >= 0) - ppc_irq_dispatch_handler(regs, irq); - else - /* That's not SMP safe ... but who cares ? */ - ppc_spurious_interrupts++; - - irq_exit(); -} -#endif /* CONFIG_PPC_ISERIES */ - -void __init init_IRQ(void) -{ - static int once = 0; - - if (once) - return; - - once++; - - ppc_md.init_IRQ(); - irq_ctx_init(); -} - -#ifndef CONFIG_PPC_ISERIES -/* - * Virtual IRQ mapping code, used on systems with XICS interrupt controllers. - */ - -#define UNDEFINED_IRQ 0xffffffff -unsigned int virt_irq_to_real_map[NR_IRQS]; - -/* - * Don't use virtual irqs 0, 1, 2 for devices. - * The pcnet32 driver considers interrupt numbers < 2 to be invalid, - * and 2 is the XICS IPI interrupt. - * We limit virtual irqs to 17 less than NR_IRQS so that when we - * offset them by 16 (to reserve the first 16 for ISA interrupts) - * we don't end up with an interrupt number >= NR_IRQS. - */ -#define MIN_VIRT_IRQ 3 -#define MAX_VIRT_IRQ (NR_IRQS - NUM_ISA_INTERRUPTS - 1) -#define NR_VIRT_IRQS (MAX_VIRT_IRQ - MIN_VIRT_IRQ + 1) - -void -virt_irq_init(void) -{ - int i; - for (i = 0; i < NR_IRQS; i++) - virt_irq_to_real_map[i] = UNDEFINED_IRQ; -} - -/* Create a mapping for a real_irq if it doesn't already exist. - * Return the virtual irq as a convenience. - */ -int virt_irq_create_mapping(unsigned int real_irq) -{ - unsigned int virq, first_virq; - static int warned; - - if (ppc64_interrupt_controller == IC_OPEN_PIC) - return real_irq; /* no mapping for openpic (for now) */ - - if (ppc64_interrupt_controller == IC_CELL_PIC) - return real_irq; /* no mapping for iic either */ - - /* don't map interrupts < MIN_VIRT_IRQ */ - if (real_irq < MIN_VIRT_IRQ) { - virt_irq_to_real_map[real_irq] = real_irq; - return real_irq; - } - - /* map to a number between MIN_VIRT_IRQ and MAX_VIRT_IRQ */ - virq = real_irq; - if (virq > MAX_VIRT_IRQ) - virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ; - - /* search for this number or a free slot */ - first_virq = virq; - while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) { - if (virt_irq_to_real_map[virq] == real_irq) - return virq; - if (++virq > MAX_VIRT_IRQ) - virq = MIN_VIRT_IRQ; - if (virq == first_virq) - goto nospace; /* oops, no free slots */ - } - - virt_irq_to_real_map[virq] = real_irq; - return virq; - - nospace: - if (!warned) { - printk(KERN_CRIT "Interrupt table is full\n"); - printk(KERN_CRIT "Increase NR_IRQS (currently %d) " - "in your kernel sources and rebuild.\n", NR_IRQS); - warned = 1; - } - return NO_IRQ; -} - -/* - * In most cases will get a hit on the very first slot checked in the - * virt_irq_to_real_map. Only when there are a large number of - * IRQs will this be expensive. - */ -unsigned int real_irq_to_virt_slowpath(unsigned int real_irq) -{ - unsigned int virq; - unsigned int first_virq; - - virq = real_irq; - - if (virq > MAX_VIRT_IRQ) - virq = (virq % NR_VIRT_IRQS) + MIN_VIRT_IRQ; - - first_virq = virq; - - do { - if (virt_irq_to_real_map[virq] == real_irq) - return virq; - - virq++; - - if (virq >= MAX_VIRT_IRQ) - virq = 0; - - } while (first_virq != virq); - - return NO_IRQ; - -} - -#endif /* CONFIG_PPC_ISERIES */ - -#ifdef CONFIG_IRQSTACKS -struct thread_info *softirq_ctx[NR_CPUS]; -struct thread_info *hardirq_ctx[NR_CPUS]; - -void irq_ctx_init(void) -{ - struct thread_info *tp; - int i; - - for_each_cpu(i) { - memset((void *)softirq_ctx[i], 0, THREAD_SIZE); - tp = softirq_ctx[i]; - tp->cpu = i; - tp->preempt_count = SOFTIRQ_OFFSET; - - memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); - tp = hardirq_ctx[i]; - tp->cpu = i; - tp->preempt_count = HARDIRQ_OFFSET; - } -} - -void do_softirq(void) -{ - unsigned long flags; - struct thread_info *curtp, *irqtp; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) { - curtp = current_thread_info(); - irqtp = softirq_ctx[smp_processor_id()]; - irqtp->task = curtp->task; - call_do_softirq(irqtp); - irqtp->task = NULL; - } - - local_irq_restore(flags); -} -EXPORT_SYMBOL(do_softirq); - -#endif /* CONFIG_IRQSTACKS */ - -static int __init setup_noirqdistrib(char *str) -{ - distribute_irqs = 0; - return 1; -} - -__setup("noirqdistrib", setup_noirqdistrib); diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index ed876a5178a..511af54e623 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -30,19 +30,14 @@ #include <linux/config.h> #include <linux/kprobes.h> #include <linux/ptrace.h> -#include <linux/spinlock.h> #include <linux/preempt.h> #include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/sstep.h> static DECLARE_MUTEX(kprobe_mutex); - -static struct kprobe *current_kprobe; -static unsigned long kprobe_status, kprobe_saved_msr; -static struct kprobe *kprobe_prev; -static unsigned long kprobe_status_prev, kprobe_saved_msr_prev; -static struct pt_regs jprobe_saved_regs; +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); int __kprobes arch_prepare_kprobe(struct kprobe *p) { @@ -108,20 +103,28 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->nip = (unsigned long)p->ainsn.insn; } -static inline void save_previous_kprobe(void) +static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr; +} + +static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - kprobe_prev = current_kprobe; - kprobe_status_prev = kprobe_status; - kprobe_saved_msr_prev = kprobe_saved_msr; + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } -static inline void restore_previous_kprobe(void) +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { - current_kprobe = kprobe_prev; - kprobe_status = kprobe_status_prev; - kprobe_saved_msr = kprobe_saved_msr_prev; + __get_cpu_var(current_kprobe) = p; + kcb->kprobe_saved_msr = regs->msr; } +/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -145,19 +148,24 @@ static inline int kprobe_handler(struct pt_regs *regs) struct kprobe *p; int ret = 0; unsigned int *addr = (unsigned int *)regs->nip; + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Check we're not actually recursing */ if (kprobe_running()) { - /* We *are* holding lock here, so this is safe. - Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { kprobe_opcode_t insn = *p->ainsn.insn; - if (kprobe_status == KPROBE_HIT_SS && + if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) { regs->msr &= ~MSR_SE; - regs->msr |= kprobe_saved_msr; - unlock_kprobes(); + regs->msr |= kcb->kprobe_saved_msr; goto no_kprobe; } /* We have reentered the kprobe_handler(), since @@ -166,27 +174,24 @@ static inline int kprobe_handler(struct pt_regs *regs) * just single step on the instruction of the new probe * without calling any user handlers. */ - save_previous_kprobe(); - current_kprobe = p; - kprobe_saved_msr = regs->msr; + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kcb->kprobe_saved_msr = regs->msr; p->nmissed++; prepare_singlestep(p, regs); - kprobe_status = KPROBE_REENTER; + kcb->kprobe_status = KPROBE_REENTER; return 1; } else { - p = current_kprobe; + p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } } - /* If it's not ours, can't be delete race, (we hold lock). */ goto no_kprobe; } - lock_kprobes(); p = get_kprobe(addr); if (!p) { - unlock_kprobes(); if (*addr != BREAKPOINT_INSTRUCTION) { /* * PowerPC has multiple variants of the "trap" @@ -209,24 +214,19 @@ static inline int kprobe_handler(struct pt_regs *regs) goto no_kprobe; } - kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_msr = regs->msr; + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p, regs, kcb); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ return 1; ss_probe: prepare_singlestep(p, regs); - kprobe_status = KPROBE_HIT_SS; - /* - * This preempt_disable() matches the preempt_enable_no_resched() - * in post_kprobe_handler(). - */ - preempt_disable(); + kcb->kprobe_status = KPROBE_HIT_SS; return 1; no_kprobe: + preempt_enable_no_resched(); return ret; } @@ -251,9 +251,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) struct kretprobe_instance *ri = NULL; struct hlist_head *head; struct hlist_node *node, *tmp; - unsigned long orig_ret_address = 0; + unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); /* @@ -292,12 +293,14 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); regs->nip = orig_ret_address; - unlock_kprobes(); + reset_current_kprobe(); + spin_unlock_irqrestore(&kretprobe_lock, flags); + preempt_enable_no_resched(); /* * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption. + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) */ return 1; } @@ -323,23 +326,26 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) static inline int post_kprobe_handler(struct pt_regs *regs) { - if (!kprobe_running()) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) return 0; - if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { - kprobe_status = KPROBE_HIT_SSDONE; - current_kprobe->post_handler(current_kprobe, regs, 0); + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); } - resume_execution(current_kprobe, regs); - regs->msr |= kprobe_saved_msr; + resume_execution(cur, regs); + regs->msr |= kcb->kprobe_saved_msr; /*Restore back the original saved kprobes variables and continue. */ - if (kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); goto out; } - unlock_kprobes(); + reset_current_kprobe(); out: preempt_enable_no_resched(); @@ -354,19 +360,20 @@ out: return 1; } -/* Interrupts disabled, kprobe_lock held. */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { - if (current_kprobe->fault_handler - && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; - if (kprobe_status & KPROBE_HIT_SS) { - resume_execution(current_kprobe, regs); + if (kcb->kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs); regs->msr &= ~MSR_SE; - regs->msr |= kprobe_saved_msr; + regs->msr |= kcb->kprobe_saved_msr; - unlock_kprobes(); + reset_current_kprobe(); preempt_enable_no_resched(); } return 0; @@ -381,11 +388,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - /* - * Interrupts are not disabled here. We need to disable - * preemption, because kprobe_running() uses smp_processor_id(). - */ - preempt_disable(); switch (val) { case DIE_BPT: if (kprobe_handler(args->regs)) @@ -396,22 +398,25 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; + preempt_enable(); break; default: break; } - preempt_enable_no_resched(); return ret; } int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - memcpy(&jprobe_saved_regs, regs, sizeof(struct pt_regs)); + memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs)); /* setup return addr to the jprobe handler routine */ regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry); @@ -431,12 +436,15 @@ void __kprobes jprobe_return_end(void) int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + /* * FIXME - we should ideally be validating that we got here 'cos * of the "trap" in jprobe_return() above, before restoring the * saved regs... */ - memcpy(regs, &jprobe_saved_regs, sizeof(struct pt_regs)); + memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + preempt_enable_no_resched(); return 1; } diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c deleted file mode 100644 index e86155770bb..00000000000 --- a/arch/ppc64/kernel/lparcfg.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * PowerPC64 LPAR Configuration Information Driver - * - * Dave Engebretsen engebret@us.ibm.com - * Copyright (c) 2003 Dave Engebretsen - * Will Schmidt willschm@us.ibm.com - * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation. - * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation. - * Nathan Lynch nathanl@austin.ibm.com - * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * This driver creates a proc file at /proc/ppc64/lparcfg which contains - * keyword - value pairs that specify the configuration of the partition. - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/seq_file.h> -#include <asm/uaccess.h> -#include <asm/iseries/hv_lp_config.h> -#include <asm/lppaca.h> -#include <asm/hvcall.h> -#include <asm/firmware.h> -#include <asm/rtas.h> -#include <asm/system.h> -#include <asm/time.h> -#include <asm/iseries/it_exp_vpd_panel.h> -#include <asm/prom.h> - -#define MODULE_VERS "1.6" -#define MODULE_NAME "lparcfg" - -/* #define LPARCFG_DEBUG */ - -/* find a better place for this function... */ -void log_plpar_hcall_return(unsigned long rc, char *tag) -{ - if (rc == 0) /* success, return */ - return; -/* check for null tag ? */ - if (rc == H_Hardware) - printk(KERN_INFO - "plpar-hcall (%s) failed with hardware fault\n", tag); - else if (rc == H_Function) - printk(KERN_INFO - "plpar-hcall (%s) failed; function not allowed\n", tag); - else if (rc == H_Authority) - printk(KERN_INFO - "plpar-hcall (%s) failed; not authorized to this function\n", - tag); - else if (rc == H_Parameter) - printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n", - tag); - else - printk(KERN_INFO - "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n", - tag, rc); - -} - -static struct proc_dir_entry *proc_ppc64_lparcfg; -#define LPARCFG_BUFF_SIZE 4096 - -#ifdef CONFIG_PPC_ISERIES - -/* - * For iSeries legacy systems, the PPA purr function is available from the - * emulated_time_base field in the paca. - */ -static unsigned long get_purr(void) -{ - unsigned long sum_purr = 0; - int cpu; - struct paca_struct *lpaca; - - for_each_cpu(cpu) { - lpaca = paca + cpu; - sum_purr += lpaca->lppaca.emulated_time_base; - -#ifdef PURR_DEBUG - printk(KERN_INFO "get_purr for cpu (%d) has value (%ld) \n", - cpu, lpaca->lppaca.emulated_time_base); -#endif - } - return sum_purr; -} - -#define lparcfg_write NULL - -/* - * Methods used to fetch LPAR data when running on an iSeries platform. - */ -static int lparcfg_data(struct seq_file *m, void *v) -{ - unsigned long pool_id, lp_index; - int shared, entitled_capacity, max_entitled_capacity; - int processors, max_processors; - struct paca_struct *lpaca = get_paca(); - unsigned long purr = get_purr(); - - seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); - - shared = (int)(lpaca->lppaca_ptr->shared_proc); - seq_printf(m, "serial_number=%c%c%c%c%c%c%c\n", - e2a(xItExtVpdPanel.mfgID[2]), - e2a(xItExtVpdPanel.mfgID[3]), - e2a(xItExtVpdPanel.systemSerial[1]), - e2a(xItExtVpdPanel.systemSerial[2]), - e2a(xItExtVpdPanel.systemSerial[3]), - e2a(xItExtVpdPanel.systemSerial[4]), - e2a(xItExtVpdPanel.systemSerial[5])); - - seq_printf(m, "system_type=%c%c%c%c\n", - e2a(xItExtVpdPanel.machineType[0]), - e2a(xItExtVpdPanel.machineType[1]), - e2a(xItExtVpdPanel.machineType[2]), - e2a(xItExtVpdPanel.machineType[3])); - - lp_index = HvLpConfig_getLpIndex(); - seq_printf(m, "partition_id=%d\n", (int)lp_index); - - seq_printf(m, "system_active_processors=%d\n", - (int)HvLpConfig_getSystemPhysicalProcessors()); - - seq_printf(m, "system_potential_processors=%d\n", - (int)HvLpConfig_getSystemPhysicalProcessors()); - - processors = (int)HvLpConfig_getPhysicalProcessors(); - seq_printf(m, "partition_active_processors=%d\n", processors); - - max_processors = (int)HvLpConfig_getMaxPhysicalProcessors(); - seq_printf(m, "partition_potential_processors=%d\n", max_processors); - - if (shared) { - entitled_capacity = HvLpConfig_getSharedProcUnits(); - max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits(); - } else { - entitled_capacity = processors * 100; - max_entitled_capacity = max_processors * 100; - } - seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity); - - seq_printf(m, "partition_max_entitled_capacity=%d\n", - max_entitled_capacity); - - if (shared) { - pool_id = HvLpConfig_getSharedPoolIndex(); - seq_printf(m, "pool=%d\n", (int)pool_id); - seq_printf(m, "pool_capacity=%d\n", - (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) * - 100)); - seq_printf(m, "purr=%ld\n", purr); - } - - seq_printf(m, "shared_processor_mode=%d\n", shared); - - return 0; -} -#endif /* CONFIG_PPC_ISERIES */ - -#ifdef CONFIG_PPC_PSERIES -/* - * Methods used to fetch LPAR data when running on a pSeries platform. - */ - -/* - * H_GET_PPP hcall returns info in 4 parms. - * entitled_capacity,unallocated_capacity, - * aggregation, resource_capability). - * - * R4 = Entitled Processor Capacity Percentage. - * R5 = Unallocated Processor Capacity Percentage. - * R6 (AABBCCDDEEFFGGHH). - * XXXX - reserved (0) - * XXXX - reserved (0) - * XXXX - Group Number - * XXXX - Pool Number. - * R7 (IIJJKKLLMMNNOOPP). - * XX - reserved. (0) - * XX - bit 0-6 reserved (0). bit 7 is Capped indicator. - * XX - variable processor Capacity Weight - * XX - Unallocated Variable Processor Capacity Weight. - * XXXX - Active processors in Physical Processor Pool. - * XXXX - Processors active on platform. - */ -static unsigned int h_get_ppp(unsigned long *entitled, - unsigned long *unallocated, - unsigned long *aggregation, - unsigned long *resource) -{ - unsigned long rc; - rc = plpar_hcall_4out(H_GET_PPP, 0, 0, 0, 0, entitled, unallocated, - aggregation, resource); - - log_plpar_hcall_return(rc, "H_GET_PPP"); - - return rc; -} - -static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs) -{ - unsigned long rc; - unsigned long dummy; - rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy); - - log_plpar_hcall_return(rc, "H_PIC"); -} - -static unsigned long get_purr(void); - -/* Track sum of all purrs across all processors. This is used to further */ -/* calculate usage values by different applications */ - -static unsigned long get_purr(void) -{ - unsigned long sum_purr = 0; - int cpu; - struct cpu_usage *cu; - - for_each_cpu(cpu) { - cu = &per_cpu(cpu_usage_array, cpu); - sum_purr += cu->current_tb; - } - return sum_purr; -} - -#define SPLPAR_CHARACTERISTICS_TOKEN 20 -#define SPLPAR_MAXLENGTH 1026*(sizeof(char)) - -/* - * parse_system_parameter_string() - * Retrieve the potential_processors, max_entitled_capacity and friends - * through the get-system-parameter rtas call. Replace keyword strings as - * necessary. - */ -static void parse_system_parameter_string(struct seq_file *m) -{ - int call_status; - - char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); - if (!local_buffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d \n", - __FILE__, __FUNCTION__, __LINE__); - return; - } - - spin_lock(&rtas_data_buf_lock); - memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); - call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, - NULL, - SPLPAR_CHARACTERISTICS_TOKEN, - __pa(rtas_data_buf)); - memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); - spin_unlock(&rtas_data_buf_lock); - - if (call_status != 0) { - printk(KERN_INFO - "%s %s Error calling get-system-parameter (0x%x)\n", - __FILE__, __FUNCTION__, call_status); - } else { - int splpar_strlen; - int idx, w_idx; - char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); - if (!workbuffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d \n", - __FILE__, __FUNCTION__, __LINE__); - kfree(local_buffer); - return; - } -#ifdef LPARCFG_DEBUG - printk(KERN_INFO "success calling get-system-parameter \n"); -#endif - splpar_strlen = local_buffer[0] * 16 + local_buffer[1]; - local_buffer += 2; /* step over strlen value */ - - memset(workbuffer, 0, SPLPAR_MAXLENGTH); - w_idx = 0; - idx = 0; - while ((*local_buffer) && (idx < splpar_strlen)) { - workbuffer[w_idx++] = local_buffer[idx++]; - if ((local_buffer[idx] == ',') - || (local_buffer[idx] == '\0')) { - workbuffer[w_idx] = '\0'; - if (w_idx) { - /* avoid the empty string */ - seq_printf(m, "%s\n", workbuffer); - } - memset(workbuffer, 0, SPLPAR_MAXLENGTH); - idx++; /* skip the comma */ - w_idx = 0; - } else if (local_buffer[idx] == '=') { - /* code here to replace workbuffer contents - with different keyword strings */ - if (0 == strcmp(workbuffer, "MaxEntCap")) { - strcpy(workbuffer, - "partition_max_entitled_capacity"); - w_idx = strlen(workbuffer); - } - if (0 == strcmp(workbuffer, "MaxPlatProcs")) { - strcpy(workbuffer, - "system_potential_processors"); - w_idx = strlen(workbuffer); - } - } - } - kfree(workbuffer); - local_buffer -= 2; /* back up over strlen value */ - } - kfree(local_buffer); -} - -static int lparcfg_count_active_processors(void); - -/* Return the number of processors in the system. - * This function reads through the device tree and counts - * the virtual processors, this does not include threads. - */ -static int lparcfg_count_active_processors(void) -{ - struct device_node *cpus_dn = NULL; - int count = 0; - - while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { -#ifdef LPARCFG_DEBUG - printk(KERN_ERR "cpus_dn %p \n", cpus_dn); -#endif - count++; - } - return count; -} - -static int lparcfg_data(struct seq_file *m, void *v) -{ - int partition_potential_processors; - int partition_active_processors; - struct device_node *rootdn; - const char *model = ""; - const char *system_id = ""; - unsigned int *lp_index_ptr, lp_index = 0; - struct device_node *rtas_node; - int *lrdrp; - - rootdn = find_path_device("/"); - if (rootdn) { - model = get_property(rootdn, "model", NULL); - system_id = get_property(rootdn, "system-id", NULL); - lp_index_ptr = (unsigned int *) - get_property(rootdn, "ibm,partition-no", NULL); - if (lp_index_ptr) - lp_index = *lp_index_ptr; - } - - seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); - - seq_printf(m, "serial_number=%s\n", system_id); - - seq_printf(m, "system_type=%s\n", model); - - seq_printf(m, "partition_id=%d\n", (int)lp_index); - - rtas_node = find_path_device("/rtas"); - lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL); - - if (lrdrp == NULL) { - partition_potential_processors = systemcfg->processorCount; - } else { - partition_potential_processors = *(lrdrp + 4); - } - - partition_active_processors = lparcfg_count_active_processors(); - - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - unsigned long h_entitled, h_unallocated; - unsigned long h_aggregation, h_resource; - unsigned long pool_idle_time, pool_procs; - unsigned long purr; - - h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation, - &h_resource); - - seq_printf(m, "R4=0x%lx\n", h_entitled); - seq_printf(m, "R5=0x%lx\n", h_unallocated); - seq_printf(m, "R6=0x%lx\n", h_aggregation); - seq_printf(m, "R7=0x%lx\n", h_resource); - - purr = get_purr(); - - /* this call handles the ibm,get-system-parameter contents */ - parse_system_parameter_string(m); - - seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled); - - seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff); - - seq_printf(m, "system_active_processors=%ld\n", - (h_resource >> 0 * 8) & 0xffff); - - /* pool related entries are apropriate for shared configs */ - if (paca[0].lppaca.shared_proc) { - - h_pic(&pool_idle_time, &pool_procs); - - seq_printf(m, "pool=%ld\n", - (h_aggregation >> 0 * 8) & 0xffff); - - /* report pool_capacity in percentage */ - seq_printf(m, "pool_capacity=%ld\n", - ((h_resource >> 2 * 8) & 0xffff) * 100); - - seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); - - seq_printf(m, "pool_num_procs=%ld\n", pool_procs); - } - - seq_printf(m, "unallocated_capacity_weight=%ld\n", - (h_resource >> 4 * 8) & 0xFF); - - seq_printf(m, "capacity_weight=%ld\n", - (h_resource >> 5 * 8) & 0xFF); - - seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01); - - seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated); - - seq_printf(m, "purr=%ld\n", purr); - - } else { /* non SPLPAR case */ - - seq_printf(m, "system_active_processors=%d\n", - partition_potential_processors); - - seq_printf(m, "system_potential_processors=%d\n", - partition_potential_processors); - - seq_printf(m, "partition_max_entitled_capacity=%d\n", - partition_potential_processors * 100); - - seq_printf(m, "partition_entitled_capacity=%d\n", - partition_active_processors * 100); - } - - seq_printf(m, "partition_active_processors=%d\n", - partition_active_processors); - - seq_printf(m, "partition_potential_processors=%d\n", - partition_potential_processors); - - seq_printf(m, "shared_processor_mode=%d\n", paca[0].lppaca.shared_proc); - - return 0; -} - -/* - * Interface for changing system parameters (variable capacity weight - * and entitled capacity). Format of input is "param_name=value"; - * anything after value is ignored. Valid parameters at this time are - * "partition_entitled_capacity" and "capacity_weight". We use - * H_SET_PPP to alter parameters. - * - * This function should be invoked only on systems with - * FW_FEATURE_SPLPAR. - */ -static ssize_t lparcfg_write(struct file *file, const char __user * buf, - size_t count, loff_t * off) -{ - char *kbuf; - char *tmp; - u64 new_entitled, *new_entitled_ptr = &new_entitled; - u8 new_weight, *new_weight_ptr = &new_weight; - - unsigned long current_entitled; /* parameters for h_get_ppp */ - unsigned long dummy; - unsigned long resource; - u8 current_weight; - - ssize_t retval = -ENOMEM; - - kbuf = kmalloc(count, GFP_KERNEL); - if (!kbuf) - goto out; - - retval = -EFAULT; - if (copy_from_user(kbuf, buf, count)) - goto out; - - retval = -EINVAL; - kbuf[count - 1] = '\0'; - tmp = strchr(kbuf, '='); - if (!tmp) - goto out; - - *tmp++ = '\0'; - - if (!strcmp(kbuf, "partition_entitled_capacity")) { - char *endp; - *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); - if (endp == tmp) - goto out; - new_weight_ptr = ¤t_weight; - } else if (!strcmp(kbuf, "capacity_weight")) { - char *endp; - *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); - if (endp == tmp) - goto out; - new_entitled_ptr = ¤t_entitled; - } else - goto out; - - /* Get our current parameters */ - retval = h_get_ppp(¤t_entitled, &dummy, &dummy, &resource); - if (retval) { - retval = -EIO; - goto out; - } - - current_weight = (resource >> 5 * 8) & 0xFF; - - pr_debug("%s: current_entitled = %lu, current_weight = %lu\n", - __FUNCTION__, current_entitled, current_weight); - - pr_debug("%s: new_entitled = %lu, new_weight = %lu\n", - __FUNCTION__, *new_entitled_ptr, *new_weight_ptr); - - retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr, - *new_weight_ptr); - - if (retval == H_Success || retval == H_Constrained) { - retval = count; - } else if (retval == H_Busy) { - retval = -EBUSY; - } else if (retval == H_Hardware) { - retval = -EIO; - } else if (retval == H_Parameter) { - retval = -EINVAL; - } else { - printk(KERN_WARNING "%s: received unknown hv return code %ld", - __FUNCTION__, retval); - retval = -EIO; - } - - out: - kfree(kbuf); - return retval; -} - -#endif /* CONFIG_PPC_PSERIES */ - -static int lparcfg_open(struct inode *inode, struct file *file) -{ - return single_open(file, lparcfg_data, NULL); -} - -struct file_operations lparcfg_fops = { - .owner = THIS_MODULE, - .read = seq_read, - .open = lparcfg_open, - .release = single_release, -}; - -int __init lparcfg_init(void) -{ - struct proc_dir_entry *ent; - mode_t mode = S_IRUSR | S_IRGRP | S_IROTH; - - /* Allow writing if we have FW_FEATURE_SPLPAR */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - lparcfg_fops.write = lparcfg_write; - mode |= S_IWUSR; - } - - ent = create_proc_entry("ppc64/lparcfg", mode, NULL); - if (ent) { - ent->proc_fops = &lparcfg_fops; - ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL); - if (!ent->data) { - printk(KERN_ERR - "Failed to allocate buffer for lparcfg\n"); - remove_proc_entry("lparcfg", ent->parent); - return -ENOMEM; - } - } else { - printk(KERN_ERR "Failed to create ppc64/lparcfg\n"); - return -EIO; - } - - proc_ppc64_lparcfg = ent; - return 0; -} - -void __exit lparcfg_cleanup(void) -{ - if (proc_ppc64_lparcfg) { - if (proc_ppc64_lparcfg->data) { - kfree(proc_ppc64_lparcfg->data); - } - remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent); - } -} - -module_init(lparcfg_init); -module_exit(lparcfg_cleanup); -MODULE_DESCRIPTION("Interface for LPAR configuration data"); -MODULE_AUTHOR("Dave Engebretsen"); -MODULE_LICENSE("GPL"); diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/ppc64/kernel/machine_kexec.c index ff8679f260f..07ea03598c0 100644 --- a/arch/ppc64/kernel/machine_kexec.c +++ b/arch/ppc64/kernel/machine_kexec.c @@ -24,6 +24,7 @@ #include <asm/mmu.h> #include <asm/sections.h> /* _end */ #include <asm/prom.h> +#include <asm/smp.h> #define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index 077507ffbab..492bca6137e 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -78,12 +78,12 @@ _GLOBAL(call_do_softirq) mtlr r0 blr -_GLOBAL(call_handle_IRQ_event) +_GLOBAL(call___do_IRQ) mflr r0 std r0,16(r1) - stdu r1,THREAD_SIZE-112(r6) - mr r1,r6 - bl .handle_IRQ_event + stdu r1,THREAD_SIZE-112(r5) + mr r1,r5 + bl .__do_IRQ ld r1,0(r1) ld r0,16(r1) mtlr r0 @@ -560,7 +560,7 @@ _GLOBAL(real_readb) isync blr - /* +/* * Do an IO access in real mode */ _GLOBAL(real_writeb) @@ -593,6 +593,76 @@ _GLOBAL(real_writeb) #endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ /* + * SCOM access functions for 970 (FX only for now) + * + * unsigned long scom970_read(unsigned int address); + * void scom970_write(unsigned int address, unsigned long value); + * + * The address passed in is the 24 bits register address. This code + * is 970 specific and will not check the status bits, so you should + * know what you are doing. + */ +_GLOBAL(scom970_read) + /* interrupts off */ + mfmsr r4 + ori r0,r4,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd, + * and finally or in RW bit + */ + rlwinm r3,r3,8,0,15 + ori r3,r3,0x8000 + + /* do the actual scom read */ + sync + mtspr SPRN_SCOMC,r3 + isync + mfspr r3,SPRN_SCOMD + isync + mfspr r0,SPRN_SCOMC + isync + + /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah + * that's the best we can do). Not implemented yet as we don't use + * the scom on any of the bogus CPUs yet, but may have to be done + * ultimately + */ + + /* restore interrupts */ + mtmsrd r4,1 + blr + + +_GLOBAL(scom970_write) + /* interrupts off */ + mfmsr r5 + ori r0,r5,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd. + */ + + rlwinm r3,r3,8,0,15 + + sync + mtspr SPRN_SCOMD,r4 /* write data */ + isync + mtspr SPRN_SCOMC,r3 /* write command */ + isync + mfspr 3,SPRN_SCOMC + isync + + /* restore interrupts */ + mtmsrd r5,1 + blr + + +/* * Create a kernel thread * kernel_thread(fn, arg, flags) */ diff --git a/arch/ppc64/kernel/nvram.c b/arch/ppc64/kernel/nvram.c index 4fb1a9f5060..c0fcd29918c 100644 --- a/arch/ppc64/kernel/nvram.c +++ b/arch/ppc64/kernel/nvram.c @@ -31,7 +31,6 @@ #include <asm/rtas.h> #include <asm/prom.h> #include <asm/machdep.h> -#include <asm/systemcfg.h> #undef DEBUG_NVRAM @@ -167,7 +166,7 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file, case IOC_NVRAM_GET_OFFSET: { int part, offset; - if (systemcfg->platform != PLATFORM_POWERMAC) + if (_machine != PLATFORM_POWERMAC) return -EINVAL; if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) return -EFAULT; @@ -450,7 +449,7 @@ static int nvram_setup_partition(void) * in our nvram, as Apple defined partitions use pretty much * all of the space */ - if (systemcfg->platform == PLATFORM_POWERMAC) + if (_machine == PLATFORM_POWERMAC) return -ENOSPC; /* see if we have an OS partition that meets our needs. diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c deleted file mode 100644 index 5e27e5a6a35..00000000000 --- a/arch/ppc64/kernel/pacaData.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * c 2001 PPC 64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/threads.h> -#include <linux/module.h> - -#include <asm/processor.h> -#include <asm/ptrace.h> -#include <asm/page.h> - -#include <asm/lppaca.h> -#include <asm/iseries/it_lp_queue.h> -#include <asm/paca.h> - -static union { - struct systemcfg data; - u8 page[PAGE_SIZE]; -} systemcfg_store __page_aligned; -struct systemcfg *systemcfg = &systemcfg_store.data; -EXPORT_SYMBOL(systemcfg); - - -/* This symbol is provided by the linker - let it fill in the paca - * field correctly */ -extern unsigned long __toc_start; - -/* The Paca is an array with one entry per processor. Each contains an - * lppaca, which contains the information shared between the - * hypervisor and Linux. Each also contains an ItLpRegSave area which - * is used by the hypervisor to save registers. - * On systems with hardware multi-threading, there are two threads - * per processor. The Paca array must contain an entry for each thread. - * The VPD Areas will give a max logical processors = 2 * max physical - * processors. The processor VPD array needs one entry per physical - * processor (not thread). - */ -#define PACA_INIT_COMMON(number, start, asrr, asrv) \ - .lock_token = 0x8000, \ - .paca_index = (number), /* Paca Index */ \ - .default_decr = 0x00ff0000, /* Initial Decr */ \ - .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ - .stab_real = (asrr), /* Real pointer to segment table */ \ - .stab_addr = (asrv), /* Virt pointer to segment table */ \ - .cpu_start = (start), /* Processor start */ \ - .hw_cpu_id = 0xffff, \ - .lppaca = { \ - .desc = 0xd397d781, /* "LpPa" */ \ - .size = sizeof(struct lppaca), \ - .dyn_proc_status = 2, \ - .decr_val = 0x00ff0000, \ - .fpregs_in_use = 1, \ - .end_of_quantum = 0xfffffffffffffffful, \ - .slb_count = 64, \ - .vmxregs_in_use = 0, \ - }, \ - -#ifdef CONFIG_PPC_ISERIES -#define PACA_INIT_ISERIES(number) \ - .lppaca_ptr = &paca[number].lppaca, \ - .reg_save_ptr = &paca[number].reg_save, \ - .reg_save = { \ - .xDesc = 0xd397d9e2, /* "LpRS" */ \ - .xSize = sizeof(struct ItLpRegSave) \ - } - -#define PACA_INIT(number) \ -{ \ - PACA_INIT_COMMON(number, 0, 0, 0) \ - PACA_INIT_ISERIES(number) \ -} - -#define BOOTCPU_PACA_INIT(number) \ -{ \ - PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \ - PACA_INIT_ISERIES(number) \ -} - -#else -#define PACA_INIT(number) \ -{ \ - PACA_INIT_COMMON(number, 0, 0, 0) \ -} - -#define BOOTCPU_PACA_INIT(number) \ -{ \ - PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \ -} -#endif - -struct paca_struct paca[] = { - BOOTCPU_PACA_INIT(0), -#if NR_CPUS > 1 - PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3), -#if NR_CPUS > 4 - PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7), -#if NR_CPUS > 8 - PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11), - PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15), - PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19), - PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23), - PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27), - PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31), -#if NR_CPUS > 32 - PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35), - PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39), - PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43), - PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47), - PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51), - PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55), - PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59), - PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63), -#if NR_CPUS > 64 - PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67), - PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71), - PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75), - PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79), - PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83), - PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87), - PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91), - PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95), - PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99), - PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103), - PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107), - PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111), - PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115), - PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119), - PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123), - PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127), -#endif -#endif -#endif -#endif -#endif -}; -EXPORT_SYMBOL(paca); diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index 3d2106b022a..3cef1b8f57f 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -295,8 +295,8 @@ static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev) } } -static struct pci_dev *of_create_pci_dev(struct device_node *node, - struct pci_bus *bus, int devfn) +struct pci_dev *of_create_pci_dev(struct device_node *node, + struct pci_bus *bus, int devfn) { struct pci_dev *dev; const char *type; @@ -354,10 +354,9 @@ static struct pci_dev *of_create_pci_dev(struct device_node *node, return dev; } +EXPORT_SYMBOL(of_create_pci_dev); -static void of_scan_pci_bridge(struct device_node *node, struct pci_dev *dev); - -static void __devinit of_scan_bus(struct device_node *node, +void __devinit of_scan_bus(struct device_node *node, struct pci_bus *bus) { struct device_node *child = NULL; @@ -381,9 +380,10 @@ static void __devinit of_scan_bus(struct device_node *node, do_bus_setup(bus); } +EXPORT_SYMBOL(of_scan_bus); -static void __devinit of_scan_pci_bridge(struct device_node *node, - struct pci_dev *dev) +void __devinit of_scan_pci_bridge(struct device_node *node, + struct pci_dev *dev) { struct pci_bus *bus; u32 *busrange, *ranges; @@ -464,9 +464,10 @@ static void __devinit of_scan_pci_bridge(struct device_node *node, else if (mode == PCI_PROBE_NORMAL) pci_scan_child_bus(bus); } +EXPORT_SYMBOL(of_scan_pci_bridge); #endif /* CONFIG_PPC_MULTIPLATFORM */ -static void __devinit scan_phb(struct pci_controller *hose) +void __devinit scan_phb(struct pci_controller *hose) { struct pci_bus *bus; struct device_node *node = hose->arch_data; @@ -547,6 +548,11 @@ static int __init pcibios_init(void) if (ppc64_isabridge_dev != NULL) printk("ISA bridge at %s\n", pci_name(ppc64_isabridge_dev)); +#ifdef CONFIG_PPC_MULTIPLATFORM + /* map in PCI I/O space */ + phbs_remap_io(); +#endif + printk("PCI: Probing PCI hardware done\n"); return 0; @@ -1276,12 +1282,9 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, * G5 machines... So when something asks for bus 0 io base * (bus 0 is HT root), we return the AGP one instead. */ -#ifdef CONFIG_PPC_PMAC - if (systemcfg->platform == PLATFORM_POWERMAC && - machine_is_compatible("MacRISC4")) + if (machine_is_compatible("MacRISC4")) if (in_bus == 0) in_bus = 0xf0; -#endif /* CONFIG_PPC_PMAC */ /* That syscall isn't quite compatible with PCI domains, but it's * used on pre-domains setup. We return the first match diff --git a/arch/ppc64/kernel/pci_dn.c b/arch/ppc64/kernel/pci_dn.c index 1a443a7ada4..12c4c9e9bbc 100644 --- a/arch/ppc64/kernel/pci_dn.c +++ b/arch/ppc64/kernel/pci_dn.c @@ -43,7 +43,7 @@ static void * __devinit update_dn_pci_info(struct device_node *dn, void *data) u32 *regs; struct pci_dn *pdn; - if (phb->is_dynamic) + if (mem_init_done) pdn = kmalloc(sizeof(*pdn), GFP_KERNEL); else pdn = alloc_bootmem(sizeof(*pdn)); @@ -120,6 +120,14 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre, return NULL; } +/** + * pci_devs_phb_init_dynamic - setup pci devices under this PHB + * phb: pci-to-host bridge (top-level bridge connecting to cpu) + * + * This routine is called both during boot, (before the memory + * subsystem is set up, before kmalloc is valid) and during the + * dynamic lpar operation of adding a PHB to a running system. + */ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb) { struct device_node * dn = (struct device_node *) phb->arch_data; @@ -201,9 +209,14 @@ static struct notifier_block pci_dn_reconfig_nb = { .notifier_call = pci_dn_reconfig_notifier, }; -/* - * Actually initialize the phbs. - * The buswalk on this phb has not happened yet. +/** + * pci_devs_phb_init - Initialize phbs and pci devs under them. + * + * This routine walks over all phb's (pci-host bridges) on the + * system, and sets up assorted pci-related structures + * (including pci info in the device node structs) for each + * pci device found underneath. This routine runs once, + * early in the boot sequence. */ void __init pci_devs_phb_init(void) { diff --git a/arch/ppc64/kernel/proc_ppc64.c b/arch/ppc64/kernel/proc_ppc64.c deleted file mode 100644 index 24e955ee948..00000000000 --- a/arch/ppc64/kernel/proc_ppc64.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * arch/ppc64/kernel/proc_ppc64.c - * - * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/config.h> -#include <linux/init.h> -#include <linux/mm.h> -#include <linux/proc_fs.h> -#include <linux/slab.h> -#include <linux/kernel.h> - -#include <asm/systemcfg.h> -#include <asm/rtas.h> -#include <asm/uaccess.h> -#include <asm/prom.h> - -static loff_t page_map_seek( struct file *file, loff_t off, int whence); -static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos); -static int page_map_mmap( struct file *file, struct vm_area_struct *vma ); - -static struct file_operations page_map_fops = { - .llseek = page_map_seek, - .read = page_map_read, - .mmap = page_map_mmap -}; - -/* - * Create the ppc64 and ppc64/rtas directories early. This allows us to - * assume that they have been previously created in drivers. - */ -static int __init proc_ppc64_create(void) -{ - struct proc_dir_entry *root; - - root = proc_mkdir("ppc64", NULL); - if (!root) - return 1; - - if (!(systemcfg->platform & (PLATFORM_PSERIES | PLATFORM_CELL))) - return 0; - - if (!proc_mkdir("rtas", root)) - return 1; - - if (!proc_symlink("rtas", NULL, "ppc64/rtas")) - return 1; - - return 0; -} -core_initcall(proc_ppc64_create); - -static int __init proc_ppc64_init(void) -{ - struct proc_dir_entry *pde; - - pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL); - if (!pde) - return 1; - pde->nlink = 1; - pde->data = systemcfg; - pde->size = PAGE_SIZE; - pde->proc_fops = &page_map_fops; - - return 0; -} -__initcall(proc_ppc64_init); - -static loff_t page_map_seek( struct file *file, loff_t off, int whence) -{ - loff_t new; - struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); - - switch(whence) { - case 0: - new = off; - break; - case 1: - new = file->f_pos + off; - break; - case 2: - new = dp->size + off; - break; - default: - return -EINVAL; - } - if ( new < 0 || new > dp->size ) - return -EINVAL; - return (file->f_pos = new); -} - -static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) -{ - struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); - return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size); -} - -static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) -{ - struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); - - vma->vm_flags |= VM_SHM | VM_LOCKED; - - if ((vma->vm_end - vma->vm_start) > dp->size) - return -EINVAL; - - remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT, - dp->size, vma->vm_page_prot); - return 0; -} - diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 97bfceb5353..fbad2c36078 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -31,6 +31,7 @@ #include <linux/initrd.h> #include <linux/bitops.h> #include <linux/module.h> +#include <linux/module.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -46,7 +47,6 @@ #include <asm/pgtable.h> #include <asm/pci.h> #include <asm/iommu.h> -#include <asm/ppcdebug.h> #include <asm/btext.h> #include <asm/sections.h> #include <asm/machdep.h> @@ -318,7 +318,7 @@ static int __devinit finish_node_interrupts(struct device_node *np, } /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ - if (systemcfg->platform == PLATFORM_POWERMAC && ic && ic->parent) { + if (_machine == PLATFORM_POWERMAC && ic && ic->parent) { char *name = get_property(ic->parent, "name", NULL); if (name && !strcmp(name, "u3")) np->intrs[intrcount].line += 128; @@ -635,10 +635,10 @@ static inline char *find_flat_dt_string(u32 offset) * used to extract the memory informations at boot before we can * unflatten the tree */ -static int __init scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data) +int __init of_scan_flat_dt(int (*it)(unsigned long node, + const char *uname, int depth, + void *data), + void *data) { unsigned long p = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; @@ -695,8 +695,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node, * This function can be used within scan_flattened_dt callback to get * access to properties */ -static void* __init get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) +void* __init of_get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size) { unsigned long p = node; @@ -996,7 +996,7 @@ void __init unflatten_device_tree(void) static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - char *type = get_flat_dt_prop(node, "device_type", NULL); + char *type = of_get_flat_dt_prop(node, "device_type", NULL); u32 *prop; unsigned long size; @@ -1004,17 +1004,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; - /* On LPAR, look for the first ibm,pft-size property for the hash table size - */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) { - u32 *pft_size; - pft_size = (u32 *)get_flat_dt_prop(node, "ibm,pft-size", NULL); - if (pft_size != NULL) { - /* pft_size[0] is the NUMA CEC cookie */ - ppc64_pft_size = pft_size[1]; - } - } - if (initial_boot_params && initial_boot_params->version >= 2) { /* version 2 of the kexec param format adds the phys cpuid * of booted proc. @@ -1023,8 +1012,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, boot_cpuid = 0; } else { /* Check if it's the boot-cpu, set it's hw index in paca now */ - if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { - u32 *prop = get_flat_dt_prop(node, "reg", NULL); + if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL) + != NULL) { + u32 *prop = of_get_flat_dt_prop(node, "reg", NULL); set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop); boot_cpuid_phys = get_hard_smp_processor_id(0); } @@ -1032,14 +1022,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #ifdef CONFIG_ALTIVEC /* Check if we have a VMX and eventually update CPU features */ - prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL); if (prop && (*prop) > 0) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; } /* Same goes for Apple's "altivec" property */ - prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL); if (prop) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; @@ -1051,7 +1041,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * this by looking at the size of the ibm,ppc-interrupt-server#s * property */ - prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &size); cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; if (prop && ((size / sizeof(u32)) > 1)) @@ -1072,26 +1062,26 @@ static int __init early_init_dt_scan_chosen(unsigned long node, return 0; /* get platform type */ - prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); if (prop == NULL) return 0; - systemcfg->platform = *prop; + _machine = *prop; /* check if iommu is forced on or off */ - if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) + if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) iommu_is_off = 1; - if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) + if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) iommu_force_on = 1; - prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL); + prop64 = (u64*)of_get_flat_dt_prop(node, "linux,memory-limit", NULL); if (prop64) memory_limit = *prop64; - prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-start",NULL); if (prop64) tce_alloc_start = *prop64; - prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); if (prop64) tce_alloc_end = *prop64; @@ -1102,9 +1092,12 @@ static int __init early_init_dt_scan_chosen(unsigned long node, { u64 *basep, *entryp; - basep = (u64*)get_flat_dt_prop(node, "linux,rtas-base", NULL); - entryp = (u64*)get_flat_dt_prop(node, "linux,rtas-entry", NULL); - prop = (u32*)get_flat_dt_prop(node, "linux,rtas-size", NULL); + basep = (u64*)of_get_flat_dt_prop(node, + "linux,rtas-base", NULL); + entryp = (u64*)of_get_flat_dt_prop(node, + "linux,rtas-entry", NULL); + prop = (u32*)of_get_flat_dt_prop(node, + "linux,rtas-size", NULL); if (basep && entryp && prop) { rtas.base = *basep; rtas.entry = *entryp; @@ -1125,11 +1118,11 @@ static int __init early_init_dt_scan_root(unsigned long node, if (depth != 0) return 0; - prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "#size-cells", NULL); dt_root_size_cells = (prop == NULL) ? 1 : *prop; DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "#address-cells", NULL); dt_root_addr_cells = (prop == NULL) ? 2 : *prop; DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); @@ -1161,7 +1154,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) static int __init early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data) { - char *type = get_flat_dt_prop(node, "device_type", NULL); + char *type = of_get_flat_dt_prop(node, "device_type", NULL); cell_t *reg, *endp; unsigned long l; @@ -1169,7 +1162,7 @@ static int __init early_init_dt_scan_memory(unsigned long node, if (type == NULL || strcmp(type, "memory") != 0) return 0; - reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); + reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); if (reg == NULL) return 0; @@ -1225,26 +1218,20 @@ void __init early_init_devtree(void *params) /* Setup flat device-tree pointer */ initial_boot_params = params; - /* By default, hash size is not set */ - ppc64_pft_size = 0; - /* Retreive various informations from the /chosen node of the * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - scan_flat_dt(early_init_dt_scan_chosen, NULL); + of_scan_flat_dt(early_init_dt_scan_chosen, NULL); /* Scan memory nodes and rebuild LMBs */ lmb_init(); - scan_flat_dt(early_init_dt_scan_root, NULL); - scan_flat_dt(early_init_dt_scan_memory, NULL); + of_scan_flat_dt(early_init_dt_scan_root, NULL); + of_scan_flat_dt(early_init_dt_scan_memory, NULL); lmb_enforce_memory_limit(memory_limit); lmb_analyze(); - systemcfg->physicalMemorySize = lmb_phys_mem_size(); lmb_reserve(0, __pa(klimit)); - DBG("Phys. mem: %lx\n", systemcfg->physicalMemorySize); - /* Reserve LMB regions used by kernel, initrd, dt, etc... */ early_reserve_mem(); @@ -1253,26 +1240,8 @@ void __init early_init_devtree(void *params) /* Retreive hash table size from flattened tree plus other * CPU related informations (altivec support, boot CPU ID, ...) */ - scan_flat_dt(early_init_dt_scan_cpus, NULL); - - /* If hash size wasn't obtained above, we calculate it now based on - * the total RAM size - */ - if (ppc64_pft_size == 0) { - unsigned long rnd_mem_size, pteg_count; - - /* round mem_size up to next power of 2 */ - rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize); - if (rnd_mem_size < systemcfg->physicalMemorySize) - rnd_mem_size <<= 1; - - /* # pages / 2 */ - pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11); + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); - ppc64_pft_size = __ilog2(pteg_count << 7); - } - - DBG("Hash pftSize: %x\n", (int)ppc64_pft_size); DBG(" <- early_init_devtree()\n"); } @@ -1781,7 +1750,7 @@ static int of_finish_dynamic_node(struct device_node *node, /* We don't support that function on PowerMac, at least * not yet */ - if (systemcfg->platform == PLATFORM_POWERMAC) + if (_machine == PLATFORM_POWERMAC) return -ENODEV; /* fix up new node's linux_phandle field */ @@ -1894,17 +1863,32 @@ get_property(struct device_node *np, const char *name, int *lenp) EXPORT_SYMBOL(get_property); /* - * Add a property to a node + * Add a property to a node. */ -void +int prom_add_property(struct device_node* np, struct property* prop) { - struct property **next = &np->properties; + struct property **next; prop->next = NULL; - while (*next) + write_lock(&devtree_lock); + next = &np->properties; + while (*next) { + if (strcmp(prop->name, (*next)->name) == 0) { + /* duplicate ! don't insert it */ + write_unlock(&devtree_lock); + return -1; + } next = &(*next)->next; + } *next = prop; + write_unlock(&devtree_lock); + + /* try to add to proc as well if it was initialized */ + if (np->pde) + proc_device_tree_add_prop(np->pde, prop); + + return 0; } #if 0 diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index a4bbca6dbb8..6375f40b23d 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -44,7 +44,6 @@ #include <asm/pgtable.h> #include <asm/pci.h> #include <asm/iommu.h> -#include <asm/ppcdebug.h> #include <asm/btext.h> #include <asm/sections.h> #include <asm/machdep.h> @@ -1825,7 +1824,7 @@ static void __init fixup_device_tree(void) if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev)) == PROM_ERROR) return; - if (u3_rev != 0x35 && u3_rev != 0x37) + if (u3_rev < 0x35 || u3_rev > 0x39) return; /* does it need fixup ? */ if (prom_getproplen(i2c, "interrupts") > 0) @@ -1935,7 +1934,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long /* * On pSeries, inform the firmware about our capabilities */ - if (RELOC(of_platform) & PLATFORM_PSERIES) + if (RELOC(of_platform) == PLATFORM_PSERIES || + RELOC(of_platform) == PLATFORM_PSERIES_LPAR) prom_send_capabilities(); /* diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c deleted file mode 100644 index 3ad15c90fbb..00000000000 --- a/arch/ppc64/kernel/rtas_pci.c +++ /dev/null @@ -1,516 +0,0 @@ -/* - * arch/ppc64/kernel/rtas_pci.c - * - * Copyright (C) 2001 Dave Engebretsen, IBM Corporation - * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM - * - * RTAS specific routines for PCI. - * - * Based on code from pci.c, chrp_pci.c and pSeries_pci.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/kernel.h> -#include <linux/threads.h> -#include <linux/pci.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> - -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/irq.h> -#include <asm/prom.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <asm/iommu.h> -#include <asm/rtas.h> -#include <asm/mpic.h> -#include <asm/ppc-pci.h> - -/* RTAS tokens */ -static int read_pci_config; -static int write_pci_config; -static int ibm_read_pci_config; -static int ibm_write_pci_config; - -static int config_access_valid(struct pci_dn *dn, int where) -{ - if (where < 256) - return 1; - if (where < 4096 && dn->pci_ext_config_space) - return 1; - - return 0; -} - -static int of_device_available(struct device_node * dn) -{ - char * status; - - status = get_property(dn, "status", NULL); - - if (!status) - return 1; - - if (!strcmp(status, "okay")) - return 1; - - return 0; -} - -static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) -{ - int returnval = -1; - unsigned long buid, addr; - int ret; - struct pci_dn *pdn; - - if (!dn || !dn->data) - return PCIBIOS_DEVICE_NOT_FOUND; - pdn = dn->data; - if (!config_access_valid(pdn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = ((where & 0xf00) << 20) | (pdn->busno << 16) | - (pdn->devfn << 8) | (where & 0xff); - buid = pdn->phb->buid; - if (buid) { - ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, - addr, buid >> 32, buid & 0xffffffff, size); - } else { - ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); - } - *val = returnval; - - if (ret) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (returnval == EEH_IO_ERROR_VALUE(size) && - eeh_dn_check_failure (dn, NULL)) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int rtas_pci_read_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 *val) -{ - struct device_node *busdn, *dn; - - if (bus->self) - busdn = pci_device_to_OF_node(bus->self); - else - busdn = bus->sysdata; /* must be a phb */ - - /* Search only direct children of the bus */ - for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->data && PCI_DN(dn)->devfn == devfn - && of_device_available(dn)) - return rtas_read_config(dn, where, size, val); - - return PCIBIOS_DEVICE_NOT_FOUND; -} - -int rtas_write_config(struct device_node *dn, int where, int size, u32 val) -{ - unsigned long buid, addr; - int ret; - struct pci_dn *pdn; - - if (!dn || !dn->data) - return PCIBIOS_DEVICE_NOT_FOUND; - pdn = dn->data; - if (!config_access_valid(pdn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = ((where & 0xf00) << 20) | (pdn->busno << 16) | - (pdn->devfn << 8) | (where & 0xff); - buid = pdn->phb->buid; - if (buid) { - ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val); - } else { - ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val); - } - - if (ret) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int rtas_pci_write_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 val) -{ - struct device_node *busdn, *dn; - - if (bus->self) - busdn = pci_device_to_OF_node(bus->self); - else - busdn = bus->sysdata; /* must be a phb */ - - /* Search only direct children of the bus */ - for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->data && PCI_DN(dn)->devfn == devfn - && of_device_available(dn)) - return rtas_write_config(dn, where, size, val); - return PCIBIOS_DEVICE_NOT_FOUND; -} - -struct pci_ops rtas_pci_ops = { - rtas_pci_read_config, - rtas_pci_write_config -}; - -int is_python(struct device_node *dev) -{ - char *model = (char *)get_property(dev, "model", NULL); - - if (model && strstr(model, "Python")) - return 1; - - return 0; -} - -static int get_phb_reg_prop(struct device_node *dev, - unsigned int addr_size_words, - struct reg_property64 *reg) -{ - unsigned int *ui_ptr = NULL, len; - - /* Found a PHB, now figure out where his registers are mapped. */ - ui_ptr = (unsigned int *)get_property(dev, "reg", &len); - if (ui_ptr == NULL) - return 1; - - if (addr_size_words == 1) { - reg->address = ((struct reg_property32 *)ui_ptr)->address; - reg->size = ((struct reg_property32 *)ui_ptr)->size; - } else { - *reg = *((struct reg_property64 *)ui_ptr); - } - - return 0; -} - -static void python_countermeasures(struct device_node *dev, - unsigned int addr_size_words) -{ - struct reg_property64 reg_struct; - void __iomem *chip_regs; - volatile u32 val; - - if (get_phb_reg_prop(dev, addr_size_words, ®_struct)) - return; - - /* Python's register file is 1 MB in size. */ - chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000); - - /* - * Firmware doesn't always clear this bit which is critical - * for good performance - Anton - */ - -#define PRG_CL_RESET_VALID 0x00010000 - - val = in_be32(chip_regs + 0xf6030); - if (val & PRG_CL_RESET_VALID) { - printk(KERN_INFO "Python workaround: "); - val &= ~PRG_CL_RESET_VALID; - out_be32(chip_regs + 0xf6030, val); - /* - * We must read it back for changes to - * take effect - */ - val = in_be32(chip_regs + 0xf6030); - printk("reg0: %x\n", val); - } - - iounmap(chip_regs); -} - -void __init init_pci_config_tokens (void) -{ - read_pci_config = rtas_token("read-pci-config"); - write_pci_config = rtas_token("write-pci-config"); - ibm_read_pci_config = rtas_token("ibm,read-pci-config"); - ibm_write_pci_config = rtas_token("ibm,write-pci-config"); -} - -unsigned long __devinit get_phb_buid (struct device_node *phb) -{ - int addr_cells; - unsigned int *buid_vals; - unsigned int len; - unsigned long buid; - - if (ibm_read_pci_config == -1) return 0; - - /* PHB's will always be children of the root node, - * or so it is promised by the current firmware. */ - if (phb->parent == NULL) - return 0; - if (phb->parent->parent) - return 0; - - buid_vals = (unsigned int *) get_property(phb, "reg", &len); - if (buid_vals == NULL) - return 0; - - addr_cells = prom_n_addr_cells(phb); - if (addr_cells == 1) { - buid = (unsigned long) buid_vals[0]; - } else { - buid = (((unsigned long)buid_vals[0]) << 32UL) | - (((unsigned long)buid_vals[1]) & 0xffffffff); - } - return buid; -} - -static int phb_set_bus_ranges(struct device_node *dev, - struct pci_controller *phb) -{ - int *bus_range; - unsigned int len; - - bus_range = (int *) get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - return 1; - } - - phb->first_busno = bus_range[0]; - phb->last_busno = bus_range[1]; - - return 0; -} - -static int __devinit setup_phb(struct device_node *dev, - struct pci_controller *phb, - unsigned int addr_size_words) -{ - pci_setup_pci_controller(phb); - - if (is_python(dev)) - python_countermeasures(dev, addr_size_words); - - if (phb_set_bus_ranges(dev, phb)) - return 1; - - phb->arch_data = dev; - phb->ops = &rtas_pci_ops; - phb->buid = get_phb_buid(dev); - - return 0; -} - -static void __devinit add_linux_pci_domain(struct device_node *dev, - struct pci_controller *phb, - struct property *of_prop) -{ - memset(of_prop, 0, sizeof(struct property)); - of_prop->name = "linux,pci-domain"; - of_prop->length = sizeof(phb->global_number); - of_prop->value = (unsigned char *)&of_prop[1]; - memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number)); - prom_add_property(dev, of_prop); -} - -static struct pci_controller * __init alloc_phb(struct device_node *dev, - unsigned int addr_size_words) -{ - struct pci_controller *phb; - struct property *of_prop; - - phb = alloc_bootmem(sizeof(struct pci_controller)); - if (phb == NULL) - return NULL; - - of_prop = alloc_bootmem(sizeof(struct property) + - sizeof(phb->global_number)); - if (!of_prop) - return NULL; - - if (setup_phb(dev, phb, addr_size_words)) - return NULL; - - add_linux_pci_domain(dev, phb, of_prop); - - return phb; -} - -static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words) -{ - struct pci_controller *phb; - - phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), - GFP_KERNEL); - if (phb == NULL) - return NULL; - - if (setup_phb(dev, phb, addr_size_words)) - return NULL; - - phb->is_dynamic = 1; - - /* TODO: linux,pci-domain? */ - - return phb; -} - -unsigned long __init find_and_init_phbs(void) -{ - struct device_node *node; - struct pci_controller *phb; - unsigned int root_size_cells = 0; - unsigned int index; - unsigned int *opprop = NULL; - struct device_node *root = of_find_node_by_path("/"); - - if (ppc64_interrupt_controller == IC_OPEN_PIC) { - opprop = (unsigned int *)get_property(root, - "platform-open-pic", NULL); - } - - root_size_cells = prom_n_size_cells(root); - - index = 0; - - for (node = of_get_next_child(root, NULL); - node != NULL; - node = of_get_next_child(root, node)) { - if (node->type == NULL || strcmp(node->type, "pci") != 0) - continue; - - phb = alloc_phb(node, root_size_cells); - if (!phb) - continue; - - pci_process_bridge_OF_ranges(phb, node, 0); - pci_setup_phb_io(phb, index == 0); -#ifdef CONFIG_PPC_PSERIES - if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { - int addr = root_size_cells * (index + 2) - 1; - mpic_assign_isu(pSeries_mpic, index, opprop[addr]); - } -#endif - index++; - } - - of_node_put(root); - pci_devs_phb_init(); - - /* - * pci_probe_only and pci_assign_all_buses can be set via properties - * in chosen. - */ - if (of_chosen) { - int *prop; - - prop = (int *)get_property(of_chosen, "linux,pci-probe-only", - NULL); - if (prop) - pci_probe_only = *prop; - - prop = (int *)get_property(of_chosen, - "linux,pci-assign-all-buses", NULL); - if (prop) - pci_assign_all_buses = *prop; - } - - return 0; -} - -struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) -{ - struct device_node *root = of_find_node_by_path("/"); - unsigned int root_size_cells = 0; - struct pci_controller *phb; - struct pci_bus *bus; - int primary; - - root_size_cells = prom_n_size_cells(root); - - primary = list_empty(&hose_list); - phb = alloc_phb_dynamic(dn, root_size_cells); - if (!phb) - return NULL; - - pci_process_bridge_OF_ranges(phb, dn, primary); - - pci_setup_phb_io_dynamic(phb, primary); - of_node_put(root); - - pci_devs_phb_init_dynamic(phb); - phb->last_busno = 0xff; - bus = pci_scan_bus(phb->first_busno, phb->ops, phb->arch_data); - phb->bus = bus; - phb->last_busno = bus->subordinate; - - return phb; -} -EXPORT_SYMBOL(init_phb_dynamic); - -/* RPA-specific bits for removing PHBs */ -int pcibios_remove_root_bus(struct pci_controller *phb) -{ - struct pci_bus *b = phb->bus; - struct resource *res; - int rc, i; - - res = b->resource[0]; - if (!res->flags) { - printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__, - b->name); - return 1; - } - - rc = unmap_bus_range(b); - if (rc) { - printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", - __FUNCTION__, b->name); - return 1; - } - - if (release_resource(res)) { - printk(KERN_ERR "%s: failed to release IO on bus %s\n", - __FUNCTION__, b->name); - return 1; - } - - for (i = 1; i < 3; ++i) { - res = b->resource[i]; - if (!res->flags && i == 0) { - printk(KERN_ERR "%s: no MEM resource for PHB %s\n", - __FUNCTION__, b->name); - return 1; - } - if (res->flags && release_resource(res)) { - printk(KERN_ERR - "%s: failed to release IO %d on bus %s\n", - __FUNCTION__, i, b->name); - return 1; - } - } - - list_del(&phb->list_node); - if (phb->is_dynamic) - kfree(phb); - - return 0; -} -EXPORT_SYMBOL(pcibios_remove_root_bus); diff --git a/arch/ppc64/kernel/scanlog.c b/arch/ppc64/kernel/scanlog.c deleted file mode 100644 index 215bf890030..00000000000 --- a/arch/ppc64/kernel/scanlog.c +++ /dev/null @@ -1,236 +0,0 @@ -/* - * c 2001 PPC 64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com> - * - * When ppc64 hardware fails the service processor dumps internal state - * of the system. After a reboot the operating system can access a dump - * of this data using this driver. A dump exists if the device-tree - * /chosen/ibm,scan-log-data property exists. - * - * This driver exports /proc/ppc64/scan-log-dump which can be read. - * The driver supports only sequential reads. - * - * The driver looks at a write to the driver for the single word "reset". - * If given, the driver will reset the scanlog so the platform can free it. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <asm/uaccess.h> -#include <asm/rtas.h> -#include <asm/prom.h> - -#define MODULE_VERS "1.0" -#define MODULE_NAME "scanlog" - -/* Status returns from ibm,scan-log-dump */ -#define SCANLOG_COMPLETE 0 -#define SCANLOG_HWERROR -1 -#define SCANLOG_CONTINUE 1 - -#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0) - -static int scanlog_debug; -static unsigned int ibm_scan_log_dump; /* RTAS token */ -static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */ - -static ssize_t scanlog_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct inode * inode = file->f_dentry->d_inode; - struct proc_dir_entry *dp; - unsigned int *data; - int status; - unsigned long len, off; - unsigned int wait_time; - - dp = PDE(inode); - data = (unsigned int *)dp->data; - - if (!data) { - printk(KERN_ERR "scanlog: read failed no data\n"); - return -EIO; - } - - if (count > RTAS_DATA_BUF_SIZE) - count = RTAS_DATA_BUF_SIZE; - - if (count < 1024) { - /* This is the min supported by this RTAS call. Rather - * than do all the buffering we insist the user code handle - * larger reads. As long as cp works... :) - */ - printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count); - return -EINVAL; - } - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - - for (;;) { - wait_time = 500; /* default wait if no data */ - spin_lock(&rtas_data_buf_lock); - memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE); - status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, - (u32) __pa(rtas_data_buf), (u32) count); - memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); - spin_unlock(&rtas_data_buf_lock); - - DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n", - status, data[0], data[1], data[2]); - switch (status) { - case SCANLOG_COMPLETE: - DEBUG("hit eof\n"); - return 0; - case SCANLOG_HWERROR: - DEBUG("hardware error reading scan log data\n"); - return -EIO; - case SCANLOG_CONTINUE: - /* We may or may not have data yet */ - len = data[1]; - off = data[2]; - if (len > 0) { - if (copy_to_user(buf, ((char *)data)+off, len)) - return -EFAULT; - return len; - } - /* Break to sleep default time */ - break; - default: - if (status > 9900 && status <= 9905) { - wait_time = rtas_extended_busy_delay_time(status); - } else { - printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status); - return -EIO; - } - } - /* Apparently no data yet. Wait and try again. */ - msleep_interruptible(wait_time); - } - /*NOTREACHED*/ -} - -static ssize_t scanlog_write(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) -{ - char stkbuf[20]; - int status; - - if (count > 19) count = 19; - if (copy_from_user (stkbuf, buf, count)) { - return -EFAULT; - } - stkbuf[count] = 0; - - if (buf) { - if (strncmp(stkbuf, "reset", 5) == 0) { - DEBUG("reset scanlog\n"); - status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); - DEBUG("rtas returns %d\n", status); - } else if (strncmp(stkbuf, "debugon", 7) == 0) { - printk(KERN_ERR "scanlog: debug on\n"); - scanlog_debug = 1; - } else if (strncmp(stkbuf, "debugoff", 8) == 0) { - printk(KERN_ERR "scanlog: debug off\n"); - scanlog_debug = 0; - } - } - return count; -} - -static int scanlog_open(struct inode * inode, struct file * file) -{ - struct proc_dir_entry *dp = PDE(inode); - unsigned int *data = (unsigned int *)dp->data; - - if (!data) { - printk(KERN_ERR "scanlog: open failed no data\n"); - return -EIO; - } - - if (data[0] != 0) { - /* This imperfect test stops a second copy of the - * data (or a reset while data is being copied) - */ - return -EBUSY; - } - - data[0] = 0; /* re-init so we restart the scan */ - - return 0; -} - -static int scanlog_release(struct inode * inode, struct file * file) -{ - struct proc_dir_entry *dp = PDE(inode); - unsigned int *data = (unsigned int *)dp->data; - - if (!data) { - printk(KERN_ERR "scanlog: release failed no data\n"); - return -EIO; - } - data[0] = 0; - - return 0; -} - -struct file_operations scanlog_fops = { - .owner = THIS_MODULE, - .read = scanlog_read, - .write = scanlog_write, - .open = scanlog_open, - .release = scanlog_release, -}; - -int __init scanlog_init(void) -{ - struct proc_dir_entry *ent; - - ibm_scan_log_dump = rtas_token("ibm,scan-log-dump"); - if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) { - printk(KERN_ERR "scan-log-dump not implemented on this system\n"); - return -EIO; - } - - ent = create_proc_entry("ppc64/rtas/scan-log-dump", S_IRUSR, NULL); - if (ent) { - ent->proc_fops = &scanlog_fops; - /* Ideally we could allocate a buffer < 4G */ - ent->data = kmalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); - if (!ent->data) { - printk(KERN_ERR "Failed to allocate a buffer\n"); - remove_proc_entry("scan-log-dump", ent->parent); - return -ENOMEM; - } - ((unsigned int *)ent->data)[0] = 0; - } else { - printk(KERN_ERR "Failed to create ppc64/scan-log-dump proc entry\n"); - return -EIO; - } - proc_ppc64_scan_log_dump = ent; - - return 0; -} - -void __exit scanlog_cleanup(void) -{ - if (proc_ppc64_scan_log_dump) { - if (proc_ppc64_scan_log_dump->data) - kfree(proc_ppc64_scan_log_dump->data); - remove_proc_entry("scan-log-dump", proc_ppc64_scan_log_dump->parent); - } -} - -module_init(scanlog_init); -module_exit(scanlog_cleanup); -MODULE_LICENSE("GPL"); diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c deleted file mode 100644 index 6654b350979..00000000000 --- a/arch/ppc64/kernel/sysfs.c +++ /dev/null @@ -1,383 +0,0 @@ -#include <linux/config.h> -#include <linux/sysdev.h> -#include <linux/cpu.h> -#include <linux/smp.h> -#include <linux/percpu.h> -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/module.h> -#include <linux/nodemask.h> -#include <linux/cpumask.h> -#include <linux/notifier.h> - -#include <asm/current.h> -#include <asm/processor.h> -#include <asm/cputable.h> -#include <asm/firmware.h> -#include <asm/hvcall.h> -#include <asm/prom.h> -#include <asm/systemcfg.h> -#include <asm/paca.h> -#include <asm/lppaca.h> -#include <asm/machdep.h> - -static DEFINE_PER_CPU(struct cpu, cpu_devices); - -/* SMT stuff */ - -#ifdef CONFIG_PPC_MULTIPLATFORM -/* default to snooze disabled */ -DEFINE_PER_CPU(unsigned long, smt_snooze_delay); - -static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf, - size_t count) -{ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); - ssize_t ret; - unsigned long snooze; - - ret = sscanf(buf, "%lu", &snooze); - if (ret != 1) - return -EINVAL; - - per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; - - return count; -} - -static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf) -{ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); - - return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); -} - -static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, - store_smt_snooze_delay); - -/* Only parse OF options if the matching cmdline option was not specified */ -static int smt_snooze_cmdline; - -static int __init smt_setup(void) -{ - struct device_node *options; - unsigned int *val; - unsigned int cpu; - - if (!cpu_has_feature(CPU_FTR_SMT)) - return 1; - - options = find_path_device("/options"); - if (!options) - return 1; - - val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay", - NULL); - if (!smt_snooze_cmdline && val) { - for_each_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = *val; - } - - return 1; -} -__initcall(smt_setup); - -static int __init setup_smt_snooze_delay(char *str) -{ - unsigned int cpu; - int snooze; - - if (!cpu_has_feature(CPU_FTR_SMT)) - return 1; - - smt_snooze_cmdline = 1; - - if (get_option(&str, &snooze)) { - for_each_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - } - - return 1; -} -__setup("smt-snooze-delay=", setup_smt_snooze_delay); - -#endif /* CONFIG_PPC_MULTIPLATFORM */ - -/* - * Enabling PMCs will slow partition context switch times so we only do - * it the first time we write to the PMCs. - */ - -static DEFINE_PER_CPU(char, pmcs_enabled); - -void ppc64_enable_pmcs(void) -{ - /* Only need to enable them once */ - if (__get_cpu_var(pmcs_enabled)) - return; - - __get_cpu_var(pmcs_enabled) = 1; - - if (ppc_md.enable_pmcs) - ppc_md.enable_pmcs(); -} -EXPORT_SYMBOL(ppc64_enable_pmcs); - -/* XXX convert to rusty's on_one_cpu */ -static unsigned long run_on_cpu(unsigned long cpu, - unsigned long (*func)(unsigned long), - unsigned long arg) -{ - cpumask_t old_affinity = current->cpus_allowed; - unsigned long ret; - - /* should return -EINVAL to userspace */ - if (set_cpus_allowed(current, cpumask_of_cpu(cpu))) - return 0; - - ret = func(arg); - - set_cpus_allowed(current, old_affinity); - - return ret; -} - -#define SYSFS_PMCSETUP(NAME, ADDRESS) \ -static unsigned long read_##NAME(unsigned long junk) \ -{ \ - return mfspr(ADDRESS); \ -} \ -static unsigned long write_##NAME(unsigned long val) \ -{ \ - ppc64_enable_pmcs(); \ - mtspr(ADDRESS, val); \ - return 0; \ -} \ -static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ -{ \ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ - unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \ - return sprintf(buf, "%lx\n", val); \ -} \ -static ssize_t __attribute_used__ \ - store_##NAME(struct sys_device *dev, const char *buf, size_t count) \ -{ \ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); \ - unsigned long val; \ - int ret = sscanf(buf, "%lx", &val); \ - if (ret != 1) \ - return -EINVAL; \ - run_on_cpu(cpu->sysdev.id, write_##NAME, val); \ - return count; \ -} - -SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0); -SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); -SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); -SYSFS_PMCSETUP(pmc1, SPRN_PMC1); -SYSFS_PMCSETUP(pmc2, SPRN_PMC2); -SYSFS_PMCSETUP(pmc3, SPRN_PMC3); -SYSFS_PMCSETUP(pmc4, SPRN_PMC4); -SYSFS_PMCSETUP(pmc5, SPRN_PMC5); -SYSFS_PMCSETUP(pmc6, SPRN_PMC6); -SYSFS_PMCSETUP(pmc7, SPRN_PMC7); -SYSFS_PMCSETUP(pmc8, SPRN_PMC8); -SYSFS_PMCSETUP(purr, SPRN_PURR); - -static SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0); -static SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1); -static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); -static SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1); -static SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2); -static SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3); -static SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4); -static SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5); -static SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6); -static SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7); -static SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8); -static SYSDEV_ATTR(purr, 0600, show_purr, NULL); - -static void register_cpu_online(unsigned int cpu) -{ - struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - -#ifndef CONFIG_PPC_ISERIES - if (cpu_has_feature(CPU_FTR_SMT)) - sysdev_create_file(s, &attr_smt_snooze_delay); -#endif - - /* PMC stuff */ - - sysdev_create_file(s, &attr_mmcr0); - sysdev_create_file(s, &attr_mmcr1); - - if (cpu_has_feature(CPU_FTR_MMCRA)) - sysdev_create_file(s, &attr_mmcra); - - if (cur_cpu_spec->num_pmcs >= 1) - sysdev_create_file(s, &attr_pmc1); - if (cur_cpu_spec->num_pmcs >= 2) - sysdev_create_file(s, &attr_pmc2); - if (cur_cpu_spec->num_pmcs >= 3) - sysdev_create_file(s, &attr_pmc3); - if (cur_cpu_spec->num_pmcs >= 4) - sysdev_create_file(s, &attr_pmc4); - if (cur_cpu_spec->num_pmcs >= 5) - sysdev_create_file(s, &attr_pmc5); - if (cur_cpu_spec->num_pmcs >= 6) - sysdev_create_file(s, &attr_pmc6); - if (cur_cpu_spec->num_pmcs >= 7) - sysdev_create_file(s, &attr_pmc7); - if (cur_cpu_spec->num_pmcs >= 8) - sysdev_create_file(s, &attr_pmc8); - - if (cpu_has_feature(CPU_FTR_SMT)) - sysdev_create_file(s, &attr_purr); -} - -#ifdef CONFIG_HOTPLUG_CPU -static void unregister_cpu_online(unsigned int cpu) -{ - struct cpu *c = &per_cpu(cpu_devices, cpu); - struct sys_device *s = &c->sysdev; - - BUG_ON(c->no_control); - -#ifndef CONFIG_PPC_ISERIES - if (cpu_has_feature(CPU_FTR_SMT)) - sysdev_remove_file(s, &attr_smt_snooze_delay); -#endif - - /* PMC stuff */ - - sysdev_remove_file(s, &attr_mmcr0); - sysdev_remove_file(s, &attr_mmcr1); - - if (cpu_has_feature(CPU_FTR_MMCRA)) - sysdev_remove_file(s, &attr_mmcra); - - if (cur_cpu_spec->num_pmcs >= 1) - sysdev_remove_file(s, &attr_pmc1); - if (cur_cpu_spec->num_pmcs >= 2) - sysdev_remove_file(s, &attr_pmc2); - if (cur_cpu_spec->num_pmcs >= 3) - sysdev_remove_file(s, &attr_pmc3); - if (cur_cpu_spec->num_pmcs >= 4) - sysdev_remove_file(s, &attr_pmc4); - if (cur_cpu_spec->num_pmcs >= 5) - sysdev_remove_file(s, &attr_pmc5); - if (cur_cpu_spec->num_pmcs >= 6) - sysdev_remove_file(s, &attr_pmc6); - if (cur_cpu_spec->num_pmcs >= 7) - sysdev_remove_file(s, &attr_pmc7); - if (cur_cpu_spec->num_pmcs >= 8) - sysdev_remove_file(s, &attr_pmc8); - - if (cpu_has_feature(CPU_FTR_SMT)) - sysdev_remove_file(s, &attr_purr); -} -#endif /* CONFIG_HOTPLUG_CPU */ - -static int __devinit sysfs_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned int)(long)hcpu; - - switch (action) { - case CPU_ONLINE: - register_cpu_online(cpu); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - unregister_cpu_online(cpu); - break; -#endif - } - return NOTIFY_OK; -} - -static struct notifier_block __devinitdata sysfs_cpu_nb = { - .notifier_call = sysfs_cpu_notify, -}; - -/* NUMA stuff */ - -#ifdef CONFIG_NUMA -static struct node node_devices[MAX_NUMNODES]; - -static void register_nodes(void) -{ - int i; - - for (i = 0; i < MAX_NUMNODES; i++) { - if (node_online(i)) { - int p_node = parent_node(i); - struct node *parent = NULL; - - if (p_node != i) - parent = &node_devices[p_node]; - - register_node(&node_devices[i], i, parent); - } - } -} -#else -static void register_nodes(void) -{ - return; -} -#endif - -/* Only valid if CPU is present. */ -static ssize_t show_physical_id(struct sys_device *dev, char *buf) -{ - struct cpu *cpu = container_of(dev, struct cpu, sysdev); - - return sprintf(buf, "%d\n", get_hard_smp_processor_id(cpu->sysdev.id)); -} -static SYSDEV_ATTR(physical_id, 0444, show_physical_id, NULL); - -static int __init topology_init(void) -{ - int cpu; - struct node *parent = NULL; - - register_nodes(); - - register_cpu_notifier(&sysfs_cpu_nb); - - for_each_cpu(cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); - -#ifdef CONFIG_NUMA - /* The node to which a cpu belongs can't be known - * until the cpu is made present. - */ - parent = NULL; - if (cpu_present(cpu)) - parent = &node_devices[cpu_to_node(cpu)]; -#endif - /* - * For now, we just see if the system supports making - * the RTAS calls for CPU hotplug. But, there may be a - * more comprehensive way to do this for an individual - * CPU. For instance, the boot cpu might never be valid - * for hotplugging. - */ - if (!ppc_md.cpu_die) - c->no_control = 1; - - if (cpu_online(cpu) || (c->no_control == 0)) { - register_cpu(c, cpu, parent); - - sysdev_create_file(&c->sysdev, &attr_physical_id); - } - - if (cpu_online(cpu)) - register_cpu_online(cpu); - } - - return 0; -} -__initcall(topology_init); diff --git a/arch/ppc64/kernel/udbg.c b/arch/ppc64/kernel/udbg.c index d49c3613c8e..0d878e72fc4 100644 --- a/arch/ppc64/kernel/udbg.c +++ b/arch/ppc64/kernel/udbg.c @@ -10,12 +10,10 @@ */ #include <stdarg.h> -#define WANT_PPCDBG_TAB /* Only defined here */ #include <linux/config.h> #include <linux/types.h> #include <linux/sched.h> #include <linux/console.h> -#include <asm/ppcdebug.h> #include <asm/processor.h> void (*udbg_putc)(unsigned char c); @@ -89,59 +87,6 @@ void udbg_printf(const char *fmt, ...) va_end(args); } -/* PPCDBG stuff */ - -u64 ppc64_debug_switch; - -/* Special print used by PPCDBG() macro */ -void udbg_ppcdbg(unsigned long debug_flags, const char *fmt, ...) -{ - unsigned long active_debugs = debug_flags & ppc64_debug_switch; - - if (active_debugs) { - va_list ap; - unsigned char buf[UDBG_BUFSIZE]; - unsigned long i, len = 0; - - for (i=0; i < PPCDBG_NUM_FLAGS; i++) { - if (((1U << i) & active_debugs) && - trace_names[i]) { - len += strlen(trace_names[i]); - udbg_puts(trace_names[i]); - break; - } - } - - snprintf(buf, UDBG_BUFSIZE, " [%s]: ", current->comm); - len += strlen(buf); - udbg_puts(buf); - - while (len < 18) { - udbg_puts(" "); - len++; - } - - va_start(ap, fmt); - vsnprintf(buf, UDBG_BUFSIZE, fmt, ap); - udbg_puts(buf); - va_end(ap); - } -} - -unsigned long udbg_ifdebug(unsigned long flags) -{ - return (flags & ppc64_debug_switch); -} - -/* - * Initialize the PPCDBG state. Called before relocation has been enabled. - */ -void __init ppcdbg_initialize(void) -{ - ppc64_debug_switch = PPC_DEBUG_DEFAULT; /* | PPCDBG_BUSWALK | */ - /* PPCDBG_PHBINIT | PPCDBG_MM | PPCDBG_MMINIT | PPCDBG_TCEINIT | PPCDBG_TCE */; -} - /* * Early boot console based on udbg */ diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c index 4aacf521e3e..1bbacac4498 100644 --- a/arch/ppc64/kernel/vdso.c +++ b/arch/ppc64/kernel/vdso.c @@ -34,6 +34,7 @@ #include <asm/machdep.h> #include <asm/cputable.h> #include <asm/sections.h> +#include <asm/systemcfg.h> #include <asm/vdso.h> #undef DEBUG @@ -179,7 +180,7 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma, * Last page is systemcfg. */ if ((vma->vm_end - address) <= PAGE_SIZE) - pg = virt_to_page(systemcfg); + pg = virt_to_page(_systemcfg); else pg = virt_to_page(vbase + offset); @@ -604,7 +605,7 @@ void __init vdso_init(void) get_page(pg); } - get_page(virt_to_page(systemcfg)); + get_page(virt_to_page(_systemcfg)); } int in_gate_area_no_task(unsigned long addr) |