From 9142e0c8396b25ed4cb549b5efa636065768ebe0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:26 +0200 Subject: [PATCH] i386: Update defconfig This is based on the x86-64 defconfig which works on a wide range of systems. Signed-off-by: Andi Kleen --- arch/i386/defconfig | 1063 ++++++++++++++++++++++++--------------------------- 1 file changed, 501 insertions(+), 562 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 89ebb7a316a..1a29bfa26d0 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -1,41 +1,51 @@ # # Automatically generated make config: don't edit +# Linux kernel version: 2.6.18-git5 +# Tue Sep 26 09:30:47 2006 # CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y CONFIG_SEMAPHORE_SLEEPERS=y CONFIG_X86=y CONFIG_MMU=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_HWEIGHT=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # # Code maturity level options # CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 # # General setup # CONFIG_LOCALVERSION="" -# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y -# CONFIG_POSIX_MQUEUE is not set +CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +# CONFIG_CPUSETS is not set +# CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" -CONFIG_UID16=y -CONFIG_VM86=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL=y CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y @@ -45,11 +55,9 @@ CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 CONFIG_SLAB=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -60,41 +68,45 @@ CONFIG_BASE_SMALL=0 CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set # CONFIG_KMOD is not set +CONFIG_STOP_MACHINE=y # # Block layer # -# CONFIG_LBD is not set +CONFIG_LBD=y +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set # # IO Schedulers # CONFIG_IOSCHED_NOOP=y -# CONFIG_IOSCHED_AS is not set -# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set +CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" +CONFIG_DEFAULT_IOSCHED="anticipatory" # # Processor type and features # -CONFIG_X86_PC=y +CONFIG_SMP=y +# CONFIG_X86_PC is not set # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_NUMAQ is not set # CONFIG_X86_SUMMIT is not set # CONFIG_X86_BIGSMP is not set # CONFIG_X86_VISWS is not set -# CONFIG_X86_GENERICARCH is not set +CONFIG_X86_GENERICARCH=y # CONFIG_X86_ES7000 is not set +CONFIG_X86_CYCLONE_TIMER=y # CONFIG_M386 is not set # CONFIG_M486 is not set # CONFIG_M586 is not set @@ -102,11 +114,11 @@ CONFIG_X86_PC=y # CONFIG_M586MMX is not set # CONFIG_M686 is not set # CONFIG_MPENTIUMII is not set -# CONFIG_MPENTIUMIII is not set +CONFIG_MPENTIUMIII=y # CONFIG_MPENTIUMM is not set # CONFIG_MPENTIUM4 is not set # CONFIG_MK6 is not set -CONFIG_MK7=y +# CONFIG_MK7 is not set # CONFIG_MK8 is not set # CONFIG_MCRUSOE is not set # CONFIG_MEFFICEON is not set @@ -117,10 +129,10 @@ CONFIG_MK7=y # CONFIG_MGEODE_LX is not set # CONFIG_MCYRIXIII is not set # CONFIG_MVIAC3_2 is not set -# CONFIG_X86_GENERIC is not set +CONFIG_X86_GENERIC=y CONFIG_X86_CMPXCHG=y CONFIG_X86_XADD=y -CONFIG_X86_L1_CACHE_SHIFT=6 +CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_X86_WP_WORKS_OK=y @@ -131,26 +143,28 @@ CONFIG_X86_CMPXCHG64=y CONFIG_X86_GOOD_APIC=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_USE_3DNOW=y CONFIG_X86_TSC=y -# CONFIG_HPET_TIMER is not set -# CONFIG_SMP is not set -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_NR_CPUS=32 +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set -CONFIG_X86_UP_APIC=y -CONFIG_X86_UP_IOAPIC=y +CONFIG_PREEMPT_BKL=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_MCE=y CONFIG_X86_MCE_NONFATAL=y -# CONFIG_X86_MCE_P4THERMAL is not set +CONFIG_X86_MCE_P4THERMAL=y +CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set # CONFIG_X86_REBOOTFIXUPS is not set -# CONFIG_MICROCODE is not set -# CONFIG_X86_MSR is not set -# CONFIG_X86_CPUID is not set +CONFIG_MICROCODE=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y # # Firmware Drivers @@ -158,68 +172,67 @@ CONFIG_X86_MCE_NONFATAL=y # CONFIG_EDD is not set # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set -CONFIG_NOHIGHMEM=y -# CONFIG_HIGHMEM4G is not set +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set -CONFIG_VMSPLIT_3G=y -# CONFIG_VMSPLIT_3G_OPT is not set -# CONFIG_VMSPLIT_2G is not set -# CONFIG_VMSPLIT_1G is not set CONFIG_PAGE_OFFSET=0xC0000000 -CONFIG_ARCH_FLATMEM_ENABLE=y -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_HIGHMEM=y CONFIG_SELECT_MEMORY_MODEL=y CONFIG_FLATMEM_MANUAL=y # CONFIG_DISCONTIGMEM_MANUAL is not set # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y -CONFIG_SPARSEMEM_STATIC=y +# CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_RESOURCES_64BIT=y +# CONFIG_HIGHPTE is not set # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y # CONFIG_EFI is not set +# CONFIG_IRQBALANCE is not set CONFIG_REGPARM=y -# CONFIG_SECCOMP is not set -CONFIG_HZ_100=y -# CONFIG_HZ_250 is not set +CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set -CONFIG_HZ=100 +CONFIG_HZ=250 # CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x100000 -CONFIG_DOUBLEFAULT=y +# CONFIG_HOTPLUG_CPU is not set +CONFIG_COMPAT_VDSO=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # # Power management options (ACPI, APM) # CONFIG_PM=y -# CONFIG_PM_LEGACY is not set +CONFIG_PM_LEGACY=y # CONFIG_PM_DEBUG is not set -CONFIG_SOFTWARE_SUSPEND=y -CONFIG_PM_STD_PARTITION="" # # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y -# CONFIG_ACPI_SLEEP is not set -# CONFIG_ACPI_AC is not set -# CONFIG_ACPI_BATTERY is not set -# CONFIG_ACPI_BUTTON is not set +CONFIG_ACPI_AC=y +CONFIG_ACPI_BATTERY=y +CONFIG_ACPI_BUTTON=y # CONFIG_ACPI_VIDEO is not set # CONFIG_ACPI_HOTKEY is not set -# CONFIG_ACPI_FAN is not set -# CONFIG_ACPI_PROCESSOR is not set +CONFIG_ACPI_FAN=y +# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_THERMAL=y # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_IBM is not set # CONFIG_ACPI_TOSHIBA is not set -CONFIG_ACPI_BLACKLIST_YEAR=0 -# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BLACKLIST_YEAR=2001 +CONFIG_ACPI_DEBUG=y CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y -# CONFIG_X86_PM_TIMER is not set +CONFIG_X86_PM_TIMER=y # CONFIG_ACPI_CONTAINER is not set # @@ -230,7 +243,41 @@ CONFIG_ACPI_SYSTEM=y # # CPU Frequency scaling # -# CONFIG_CPU_FREQ is not set +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_DEBUG=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set + +# +# CPUFreq processor drivers +# +CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_POWERNOW_K6 is not set +# CONFIG_X86_POWERNOW_K7 is not set +CONFIG_X86_POWERNOW_K8=y +CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_GX_SUSPMOD is not set +# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +# CONFIG_X86_SPEEDSTEP_ICH is not set +# CONFIG_X86_SPEEDSTEP_SMI is not set +# CONFIG_X86_P4_CLOCKMOD is not set +# CONFIG_X86_CPUFREQ_NFORCE2 is not set +# CONFIG_X86_LONGRUN is not set +# CONFIG_X86_LONGHAUL is not set + +# +# shared options +# +CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_SPEEDSTEP_LIB is not set # # Bus options (PCI, PCMCIA, EISA, MCA, ISA) @@ -244,12 +291,13 @@ CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y # CONFIG_PCIEPORTBUS is not set -# CONFIG_PCI_MSI is not set -# CONFIG_PCI_LEGACY_PROC is not set +CONFIG_PCI_MSI=y +# CONFIG_PCI_DEBUG is not set CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set # CONFIG_MCA is not set # CONFIG_SCx200 is not set +CONFIG_K8_NB=y # # PCCARD (PCMCIA/CardBus) support @@ -278,93 +326,54 @@ CONFIG_NET=y # # CONFIG_NETDEBUG is not set CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y +# CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set # CONFIG_NET_KEY is not set CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set +CONFIG_IP_MULTICAST=y # CONFIG_IP_ADVANCED_ROUTER is not set CONFIG_IP_FIB_HASH=y -# CONFIG_IP_PNP is not set +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set # CONFIG_ARPD is not set # CONFIG_SYN_COOKIES is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set # CONFIG_INET_TUNNEL is not set -# CONFIG_INET_DIAG is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_BIC=y - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set - -# -# Core Netfilter Configuration -# -# CONFIG_NETFILTER_NETLINK is not set -CONFIG_NETFILTER_XTABLES=y -# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set -# CONFIG_NETFILTER_XT_TARGET_MARK is not set -# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set -# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set -# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set -# CONFIG_NETFILTER_XT_MATCH_DCCP is not set -# CONFIG_NETFILTER_XT_MATCH_HELPER is not set -# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set -CONFIG_NETFILTER_XT_MATCH_LIMIT=y -CONFIG_NETFILTER_XT_MATCH_MAC=y -# CONFIG_NETFILTER_XT_MATCH_MARK is not set -# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set -# CONFIG_NETFILTER_XT_MATCH_REALM is not set -# CONFIG_NETFILTER_XT_MATCH_SCTP is not set -CONFIG_NETFILTER_XT_MATCH_STATE=y -# CONFIG_NETFILTER_XT_MATCH_STRING is not set -# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=y -# CONFIG_IP_NF_CT_ACCT is not set -# CONFIG_IP_NF_CONNTRACK_MARK is not set -# CONFIG_IP_NF_CONNTRACK_EVENTS is not set -# CONFIG_IP_NF_CT_PROTO_SCTP is not set -CONFIG_IP_NF_FTP=y -# CONFIG_IP_NF_IRC is not set -# CONFIG_IP_NF_NETBIOS_NS is not set -# CONFIG_IP_NF_TFTP is not set -# CONFIG_IP_NF_AMANDA is not set -# CONFIG_IP_NF_PPTP is not set -# CONFIG_IP_NF_QUEUE is not set -CONFIG_IP_NF_IPTABLES=y -# CONFIG_IP_NF_MATCH_IPRANGE is not set -# CONFIG_IP_NF_MATCH_MULTIPORT is not set -# CONFIG_IP_NF_MATCH_TOS is not set -# CONFIG_IP_NF_MATCH_RECENT is not set -# CONFIG_IP_NF_MATCH_ECN is not set -# CONFIG_IP_NF_MATCH_DSCP is not set -# CONFIG_IP_NF_MATCH_AH_ESP is not set -# CONFIG_IP_NF_MATCH_TTL is not set -# CONFIG_IP_NF_MATCH_OWNER is not set -# CONFIG_IP_NF_MATCH_ADDRTYPE is not set -# CONFIG_IP_NF_MATCH_HASHLIMIT is not set -CONFIG_IP_NF_FILTER=y -# CONFIG_IP_NF_TARGET_REJECT is not set -CONFIG_IP_NF_TARGET_LOG=y -# CONFIG_IP_NF_TARGET_ULOG is not set -# CONFIG_IP_NF_TARGET_TCPMSS is not set -# CONFIG_IP_NF_NAT is not set -# CONFIG_IP_NF_MANGLE is not set -# CONFIG_IP_NF_RAW is not set -# CONFIG_IP_NF_ARPTABLES is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_IPV6_MIP6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_IPV6_SUBTREES is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set # # DCCP Configuration (EXPERIMENTAL) @@ -389,7 +398,6 @@ CONFIG_IP_NF_TARGET_LOG=y # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -402,6 +410,7 @@ CONFIG_IP_NF_TARGET_LOG=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NET_TCPPROBE is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set @@ -416,7 +425,9 @@ CONFIG_IP_NF_TARGET_LOG=y # CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set +CONFIG_FW_LOADER=y +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker @@ -431,13 +442,7 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y # # Parallel port support # -CONFIG_PARPORT=y -CONFIG_PARPORT_PC=y -# CONFIG_PARPORT_SERIAL is not set -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_GSC is not set -CONFIG_PARPORT_1284=y +# CONFIG_PARPORT is not set # # Plug and Play support @@ -447,8 +452,7 @@ CONFIG_PARPORT_1284=y # # Block devices # -# CONFIG_BLK_DEV_FD is not set -# CONFIG_PARIDE is not set +CONFIG_BLK_DEV_FD=y # CONFIG_BLK_CPQ_DA is not set # CONFIG_BLK_CPQ_CISS_DA is not set # CONFIG_BLK_DEV_DAC960 is not set @@ -459,8 +463,11 @@ CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_UB is not set -# CONFIG_BLK_DEV_RAM is not set +CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set @@ -476,7 +483,7 @@ CONFIG_BLK_DEV_IDE=y # CONFIG_BLK_DEV_IDE_SATA is not set # CONFIG_BLK_DEV_HD_IDE is not set CONFIG_BLK_DEV_IDEDISK=y -# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_IDEDISK_MULTI_MODE=y CONFIG_BLK_DEV_IDECD=y # CONFIG_BLK_DEV_IDETAPE is not set # CONFIG_BLK_DEV_IDEFLOPPY is not set @@ -486,10 +493,10 @@ CONFIG_BLK_DEV_IDECD=y # # IDE chipset support/bugfixes # -# CONFIG_IDE_GENERIC is not set +CONFIG_IDE_GENERIC=y # CONFIG_BLK_DEV_CMD640 is not set CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_SHARE_IRQ=y +# CONFIG_IDEPCI_SHARE_IRQ is not set # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_BLK_DEV_GENERIC is not set # CONFIG_BLK_DEV_OPTI621 is not set @@ -500,7 +507,7 @@ CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_IDEDMA_ONLYDISK is not set # CONFIG_BLK_DEV_AEC62XX is not set # CONFIG_BLK_DEV_ALI15X3 is not set -# CONFIG_BLK_DEV_AMD74XX is not set +CONFIG_BLK_DEV_AMD74XX=y # CONFIG_BLK_DEV_ATIIXP is not set # CONFIG_BLK_DEV_CMD64X is not set # CONFIG_BLK_DEV_TRIFLEX is not set @@ -511,7 +518,7 @@ CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_BLK_DEV_HPT34X is not set # CONFIG_BLK_DEV_HPT366 is not set # CONFIG_BLK_DEV_SC1200 is not set -# CONFIG_BLK_DEV_PIIX is not set +CONFIG_BLK_DEV_PIIX=y # CONFIG_BLK_DEV_IT821X is not set # CONFIG_BLK_DEV_NS87415 is not set # CONFIG_BLK_DEV_PDC202XX_OLD is not set @@ -521,7 +528,7 @@ CONFIG_IDEDMA_PCI_AUTO=y # CONFIG_BLK_DEV_SIS5513 is not set # CONFIG_BLK_DEV_SLC90E66 is not set # CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y +# CONFIG_BLK_DEV_VIA82CXXX is not set # CONFIG_IDE_ARM is not set CONFIG_BLK_DEV_IDEDMA=y # CONFIG_IDEDMA_IVB is not set @@ -533,6 +540,7 @@ CONFIG_IDEDMA_AUTO=y # # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y +CONFIG_SCSI_NETLINK=y # CONFIG_SCSI_PROC_FS is not set # @@ -541,8 +549,9 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y # CONFIG_CHR_DEV_ST is not set # CONFIG_CHR_DEV_OSST is not set -# CONFIG_BLK_DEV_SR is not set -# CONFIG_CHR_DEV_SG is not set +CONFIG_BLK_DEV_SR=y +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=y # CONFIG_CHR_DEV_SCH is not set # @@ -553,29 +562,44 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOGGING is not set # -# SCSI Transport Attributes +# SCSI Transports # -# CONFIG_SCSI_SPI_ATTRS is not set -# CONFIG_SCSI_FC_ATTRS is not set +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set # # SCSI low-level drivers # # CONFIG_ISCSI_TCP is not set -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +CONFIG_BLK_DEV_3W_XXXX_RAID=y # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set # CONFIG_SCSI_AACRAID is not set -# CONFIG_SCSI_AIC7XXX is not set +CONFIG_SCSI_AIC7XXX=y +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=5000 +CONFIG_AIC7XXX_DEBUG_ENABLE=y +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y # CONFIG_SCSI_AIC7XXX_OLD is not set -# CONFIG_SCSI_AIC79XX is not set +CONFIG_SCSI_AIC79XX=y +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=4000 +# CONFIG_AIC79XX_ENABLE_RD_STRM is not set +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +# CONFIG_SCSI_AIC94XX is not set # CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set -# CONFIG_SCSI_SATA is not set +# CONFIG_SCSI_HPTIOP is not set # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set @@ -584,11 +608,9 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_IPS is not set # CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_PPA is not set -# CONFIG_SCSI_IMM is not set +# CONFIG_SCSI_STEX is not set # CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set # CONFIG_SCSI_QLA_FC is not set # CONFIG_SCSI_LPFC is not set @@ -597,23 +619,115 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_NSP32 is not set # CONFIG_SCSI_DEBUG is not set +# +# Serial ATA (prod) and Parallel ATA (experimental) drivers +# +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_SVW=y +CONFIG_ATA_PIIX=y +# CONFIG_SATA_MV is not set +CONFIG_SATA_NV=y +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +CONFIG_SATA_SIL=y +# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +CONFIG_SATA_VIA=y +# CONFIG_SATA_VITESSE is not set +CONFIG_SATA_INTEL_COMBINED=y +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_LEGACY is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_QDI is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set + # # Multi-device support (RAID and LVM) # -# CONFIG_MD is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_CRYPT is not set +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set # # Fusion MPT device support # -# CONFIG_FUSION is not set -# CONFIG_FUSION_SPI is not set +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y # CONFIG_FUSION_FC is not set # CONFIG_FUSION_SAS is not set +CONFIG_FUSION_MAX_SGE=128 +# CONFIG_FUSION_CTL is not set # # IEEE 1394 (FireWire) support # -# CONFIG_IEEE1394 is not set +CONFIG_IEEE1394=y + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set +# CONFIG_IEEE1394_OUI_DB is not set +# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set +# CONFIG_IEEE1394_EXPORT_FULL_API is not set + +# +# Device Drivers +# + +# +# Texas Instruments PCILynx requires I2C +# +CONFIG_IEEE1394_OHCI1394=y + +# +# Protocol Drivers +# +# CONFIG_IEEE1394_VIDEO1394 is not set +# CONFIG_IEEE1394_SBP2 is not set +# CONFIG_IEEE1394_ETH1394 is not set +# CONFIG_IEEE1394_DV1394 is not set +CONFIG_IEEE1394_RAWIO=y # # I2O device support @@ -652,46 +766,63 @@ CONFIG_MII=y # # Tulip family network device support # -# CONFIG_NET_TULIP is not set +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=y +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_TULIP_NAPI is not set +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_ULI526X is not set # CONFIG_HP100 is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set # CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_B44 is not set -# CONFIG_FORCEDETH is not set +CONFIG_B44=y +CONFIG_FORCEDETH=y +# CONFIG_FORCEDETH_NAPI is not set # CONFIG_DGRS is not set # CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set # CONFIG_NE2K_PCI is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set +CONFIG_8139CP=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set -# CONFIG_NET_POCKET is not set # # Ethernet (1000 Mbit) # # CONFIG_ACENIC is not set # CONFIG_DL2K is not set -# CONFIG_E1000 is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set +CONFIG_R8169=y +# CONFIG_R8169_NAPI is not set # CONFIG_SIS190 is not set # CONFIG_SKGE is not set -# CONFIG_SKY2 is not set +CONFIG_SKY2=y # CONFIG_SK98LIN is not set # CONFIG_VIA_VELOCITY is not set -# CONFIG_TIGON3 is not set -# CONFIG_BNX2 is not set +CONFIG_TIGON3=y +CONFIG_BNX2=y +# CONFIG_QLA3XXX is not set # # Ethernet (10000 Mbit) @@ -699,6 +830,7 @@ CONFIG_E100=y # CONFIG_CHELSIO_T1 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set # # Token Ring devices @@ -716,14 +848,15 @@ CONFIG_E100=y # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set -# CONFIG_PLIP is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set # CONFIG_NET_FC is not set # CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set +CONFIG_NETCONSOLE=y +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y # # ISDN subsystem @@ -745,8 +878,8 @@ CONFIG_INPUT=y # CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set # CONFIG_INPUT_TSDEV is not set CONFIG_INPUT_EVDEV=y @@ -776,7 +909,6 @@ CONFIG_SERIO=y CONFIG_SERIO_I8042=y # CONFIG_SERIO_SERPORT is not set # CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PARKBD is not set # CONFIG_SERIO_PCIPS2 is not set CONFIG_SERIO_LIBPS2=y # CONFIG_SERIO_RAW is not set @@ -788,14 +920,15 @@ CONFIG_SERIO_LIBPS2=y CONFIG_VT=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set # CONFIG_SERIAL_NONSTANDARD is not set # # Serial drivers # CONFIG_SERIAL_8250=y -# CONFIG_SERIAL_8250_CONSOLE is not set -# CONFIG_SERIAL_8250_ACPI is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_PCI=y CONFIG_SERIAL_8250_NR_UARTS=4 CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # CONFIG_SERIAL_8250_EXTENDED is not set @@ -804,14 +937,11 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # Non-8250 serial port support # CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 -CONFIG_PRINTER=y -# CONFIG_LP_CONSOLE is not set -# CONFIG_PPDEV is not set -# CONFIG_TIPAR is not set # # IPMI @@ -822,8 +952,12 @@ CONFIG_PRINTER=y # Watchdog Cards # # CONFIG_WATCHDOG is not set -# CONFIG_HW_RANDOM is not set -CONFIG_NVRAM=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_INTEL=y +CONFIG_HW_RANDOM_AMD=y +CONFIG_HW_RANDOM_GEODE=y +CONFIG_HW_RANDOM_VIA=y +# CONFIG_NVRAM is not set CONFIG_RTC=y # CONFIG_DTLK is not set # CONFIG_R3964 is not set @@ -833,31 +967,28 @@ CONFIG_RTC=y # # Ftape, the floppy tape device driver # -# CONFIG_FTAPE is not set CONFIG_AGP=y # CONFIG_AGP_ALI is not set # CONFIG_AGP_ATI is not set # CONFIG_AGP_AMD is not set -# CONFIG_AGP_AMD64 is not set -# CONFIG_AGP_INTEL is not set +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y # CONFIG_AGP_NVIDIA is not set # CONFIG_AGP_SIS is not set # CONFIG_AGP_SWORKS is not set -CONFIG_AGP_VIA=y +# CONFIG_AGP_VIA is not set # CONFIG_AGP_EFFICEON is not set -CONFIG_DRM=y -# CONFIG_DRM_TDFX is not set -# CONFIG_DRM_R128 is not set -CONFIG_DRM_RADEON=y -# CONFIG_DRM_MGA is not set -# CONFIG_DRM_SIS is not set -# CONFIG_DRM_VIA is not set -# CONFIG_DRM_SAVAGE is not set +# CONFIG_DRM is not set # CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set # CONFIG_CS5535_GPIO is not set -# CONFIG_RAW_DRIVER is not set -# CONFIG_HPET is not set -# CONFIG_HANGCHECK_TIMER is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 +CONFIG_HPET=y +# CONFIG_HPET_RTC_IRQ is not set +CONFIG_HPET_MMAP=y +CONFIG_HANGCHECK_TIMER=y # # TPM devices @@ -868,59 +999,7 @@ CONFIG_DRM_RADEON=y # # I2C support # -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y - -# -# I2C Algorithms -# -CONFIG_I2C_ALGOBIT=y -# CONFIG_I2C_ALGOPCF is not set -# CONFIG_I2C_ALGOPCA is not set - -# -# I2C Hardware Bus support -# -# CONFIG_I2C_ALI1535 is not set -# CONFIG_I2C_ALI1563 is not set -# CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_AMD8111 is not set -# CONFIG_I2C_I801 is not set -# CONFIG_I2C_I810 is not set -# CONFIG_I2C_PIIX4 is not set -CONFIG_I2C_ISA=y -# CONFIG_I2C_NFORCE2 is not set -# CONFIG_I2C_PARPORT is not set -# CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PROSAVAGE is not set -# CONFIG_I2C_SAVAGE4 is not set -# CONFIG_SCx200_ACB is not set -# CONFIG_I2C_SIS5595 is not set -# CONFIG_I2C_SIS630 is not set -# CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_STUB is not set -# CONFIG_I2C_VIA is not set -CONFIG_I2C_VIAPRO=y -# CONFIG_I2C_VOODOO3 is not set -# CONFIG_I2C_PCA_ISA is not set - -# -# Miscellaneous I2C Chip support -# -# CONFIG_SENSORS_DS1337 is not set -# CONFIG_SENSORS_DS1374 is not set -# CONFIG_SENSORS_EEPROM is not set -# CONFIG_SENSORS_PCF8574 is not set -# CONFIG_SENSORS_PCA9539 is not set -# CONFIG_SENSORS_PCF8591 is not set -# CONFIG_SENSORS_RTC8564 is not set -# CONFIG_SENSORS_MAX6875 is not set -# CONFIG_RTC_X1205_I2C is not set -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# CONFIG_I2C_DEBUG_CHIP is not set +# CONFIG_I2C is not set # # SPI support @@ -931,169 +1010,44 @@ CONFIG_I2C_VIAPRO=y # # Dallas's 1-wire bus # -# CONFIG_W1 is not set # # Hardware Monitoring support # -CONFIG_HWMON=y -CONFIG_HWMON_VID=y -# CONFIG_SENSORS_ADM1021 is not set -# CONFIG_SENSORS_ADM1025 is not set -# CONFIG_SENSORS_ADM1026 is not set -# CONFIG_SENSORS_ADM1031 is not set -# CONFIG_SENSORS_ADM9240 is not set -# CONFIG_SENSORS_ASB100 is not set -# CONFIG_SENSORS_ATXP1 is not set -# CONFIG_SENSORS_DS1621 is not set -# CONFIG_SENSORS_F71805F is not set -# CONFIG_SENSORS_FSCHER is not set -# CONFIG_SENSORS_FSCPOS is not set -# CONFIG_SENSORS_GL518SM is not set -# CONFIG_SENSORS_GL520SM is not set -CONFIG_SENSORS_IT87=y -# CONFIG_SENSORS_LM63 is not set -# CONFIG_SENSORS_LM75 is not set -# CONFIG_SENSORS_LM77 is not set -# CONFIG_SENSORS_LM78 is not set -# CONFIG_SENSORS_LM80 is not set -# CONFIG_SENSORS_LM83 is not set -# CONFIG_SENSORS_LM85 is not set -# CONFIG_SENSORS_LM87 is not set -# CONFIG_SENSORS_LM90 is not set -# CONFIG_SENSORS_LM92 is not set -# CONFIG_SENSORS_MAX1619 is not set -# CONFIG_SENSORS_PC87360 is not set -# CONFIG_SENSORS_SIS5595 is not set -# CONFIG_SENSORS_SMSC47M1 is not set -# CONFIG_SENSORS_SMSC47B397 is not set -# CONFIG_SENSORS_VIA686A is not set -# CONFIG_SENSORS_VT8231 is not set -# CONFIG_SENSORS_W83781D is not set -# CONFIG_SENSORS_W83792D is not set -# CONFIG_SENSORS_W83L785TS is not set -# CONFIG_SENSORS_W83627HF is not set -# CONFIG_SENSORS_W83627EHF is not set -# CONFIG_SENSORS_HDAPS is not set -# CONFIG_HWMON_DEBUG_CHIP is not set +# CONFIG_HWMON is not set +# CONFIG_HWMON_VID is not set # # Misc devices # # CONFIG_IBM_ASM is not set -# -# Multimedia Capabilities Port drivers -# - # # Multimedia devices # -CONFIG_VIDEO_DEV=y - -# -# Video For Linux -# - -# -# Video Adapters -# -# CONFIG_VIDEO_ADV_DEBUG is not set -# CONFIG_VIDEO_BT848 is not set -# CONFIG_VIDEO_BWQCAM is not set -# CONFIG_VIDEO_CQCAM is not set -# CONFIG_VIDEO_W9966 is not set -# CONFIG_VIDEO_CPIA is not set -# CONFIG_VIDEO_SAA5246A is not set -# CONFIG_VIDEO_SAA5249 is not set -# CONFIG_TUNER_3036 is not set -# CONFIG_VIDEO_STRADIS is not set -# CONFIG_VIDEO_ZORAN is not set -CONFIG_VIDEO_SAA7134=y -# CONFIG_VIDEO_SAA7134_ALSA is not set -# CONFIG_VIDEO_MXB is not set -# CONFIG_VIDEO_DPC is not set -# CONFIG_VIDEO_HEXIUM_ORION is not set -# CONFIG_VIDEO_HEXIUM_GEMINI is not set -# CONFIG_VIDEO_CX88 is not set -# CONFIG_VIDEO_EM28XX is not set -# CONFIG_VIDEO_OVCAMCHIP is not set -# CONFIG_VIDEO_AUDIO_DECODER is not set -# CONFIG_VIDEO_DECODER is not set - -# -# Radio Adapters -# -# CONFIG_RADIO_GEMTEK_PCI is not set -# CONFIG_RADIO_MAXIRADIO is not set -# CONFIG_RADIO_MAESTRO is not set +# CONFIG_VIDEO_DEV is not set +CONFIG_VIDEO_V4L2=y # # Digital Video Broadcasting Devices # # CONFIG_DVB is not set -CONFIG_VIDEO_TUNER=y -CONFIG_VIDEO_BUF=y -CONFIG_VIDEO_IR=y +# CONFIG_USB_DABUSB is not set # # Graphics support # -CONFIG_FB=y -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -# CONFIG_FB_MACMODES is not set -CONFIG_FB_MODE_HELPERS=y -# CONFIG_FB_TILEBLITTING is not set -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_VGA16 is not set -# CONFIG_FB_VESA is not set -CONFIG_VIDEO_SELECT=y -# CONFIG_FB_HGA is not set -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_I810 is not set -# CONFIG_FB_INTEL is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON_OLD is not set -CONFIG_FB_RADEON=y -CONFIG_FB_RADEON_I2C=y -# CONFIG_FB_RADEON_DEBUG is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_CYBLA is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_GEODE is not set -# CONFIG_FB_VIRTUAL is not set +CONFIG_FIRMWARE_EDID=y +# CONFIG_FB is not set # # Console display driver support # CONFIG_VGA_CONSOLE=y +CONFIG_VGACON_SOFT_SCROLLBACK=y +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 +CONFIG_VIDEO_SELECT=y CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set -# CONFIG_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Logo configuration -# -# CONFIG_LOGO is not set # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # @@ -1104,97 +1058,30 @@ CONFIG_SOUND=y # # Advanced Linux Sound Architecture # -CONFIG_SND=y -CONFIG_SND_TIMER=y -CONFIG_SND_PCM=y -CONFIG_SND_RAWMIDI=y -CONFIG_SND_SEQUENCER=y -# CONFIG_SND_SEQ_DUMMY is not set -# CONFIG_SND_MIXER_OSS is not set -# CONFIG_SND_PCM_OSS is not set -# CONFIG_SND_SEQUENCER_OSS is not set -CONFIG_SND_RTCTIMER=y -CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y -# CONFIG_SND_DYNAMIC_MINORS is not set -# CONFIG_SND_SUPPORT_OLD_API is not set -# CONFIG_SND_VERBOSE_PRINTK is not set -# CONFIG_SND_DEBUG is not set - -# -# Generic devices -# -CONFIG_SND_MPU401_UART=y -CONFIG_SND_AC97_CODEC=y -CONFIG_SND_AC97_BUS=y -# CONFIG_SND_DUMMY is not set -# CONFIG_SND_VIRMIDI is not set -# CONFIG_SND_MTPAV is not set -# CONFIG_SND_SERIAL_U16550 is not set -# CONFIG_SND_MPU401 is not set - -# -# PCI devices -# -# CONFIG_SND_AD1889 is not set -# CONFIG_SND_ALS4000 is not set -# CONFIG_SND_ALI5451 is not set -# CONFIG_SND_ATIIXP is not set -# CONFIG_SND_ATIIXP_MODEM is not set -# CONFIG_SND_AU8810 is not set -# CONFIG_SND_AU8820 is not set -# CONFIG_SND_AU8830 is not set -# CONFIG_SND_AZT3328 is not set -# CONFIG_SND_BT87X is not set -# CONFIG_SND_CA0106 is not set -# CONFIG_SND_CMIPCI is not set -# CONFIG_SND_CS4281 is not set -# CONFIG_SND_CS46XX is not set -# CONFIG_SND_CS5535AUDIO is not set -# CONFIG_SND_EMU10K1 is not set -# CONFIG_SND_EMU10K1X is not set -# CONFIG_SND_ENS1370 is not set -# CONFIG_SND_ENS1371 is not set -# CONFIG_SND_ES1938 is not set -# CONFIG_SND_ES1968 is not set -# CONFIG_SND_FM801 is not set -# CONFIG_SND_HDA_INTEL is not set -# CONFIG_SND_HDSP is not set -# CONFIG_SND_HDSPM is not set -# CONFIG_SND_ICE1712 is not set -# CONFIG_SND_ICE1724 is not set -# CONFIG_SND_INTEL8X0 is not set -# CONFIG_SND_INTEL8X0M is not set -# CONFIG_SND_KORG1212 is not set -# CONFIG_SND_MAESTRO3 is not set -# CONFIG_SND_MIXART is not set -# CONFIG_SND_NM256 is not set -# CONFIG_SND_PCXHR is not set -# CONFIG_SND_RME32 is not set -# CONFIG_SND_RME96 is not set -# CONFIG_SND_RME9652 is not set -# CONFIG_SND_SONICVIBES is not set -# CONFIG_SND_TRIDENT is not set -CONFIG_SND_VIA82XX=y -# CONFIG_SND_VIA82XX_MODEM is not set -# CONFIG_SND_VX222 is not set -# CONFIG_SND_YMFPCI is not set - -# -# USB devices -# -# CONFIG_SND_USB_AUDIO is not set -# CONFIG_SND_USB_USX2Y is not set +# CONFIG_SND is not set # # Open Sound System # -# CONFIG_SOUND_PRIME is not set +CONFIG_SOUND_PRIME=y +CONFIG_OSS_OBSOLETE_DRIVER=y +# CONFIG_SOUND_BT878 is not set +# CONFIG_SOUND_EMU10K1 is not set +# CONFIG_SOUND_FUSION is not set +# CONFIG_SOUND_ES1371 is not set +CONFIG_SOUND_ICH=y +# CONFIG_SOUND_TRIDENT is not set +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +# CONFIG_SOUND_VIA82CXXX is not set +# CONFIG_SOUND_OSS is not set # # USB support # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -1213,17 +1100,19 @@ CONFIG_USB_DEVICEFS=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_SPLIT_ISO is not set # CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_ISP116X_HCD is not set -# CONFIG_USB_OHCI_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set # # USB Device Class drivers # -# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set # CONFIG_USB_ACM is not set -# CONFIG_USB_PRINTER is not set +CONFIG_USB_PRINTER=y # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -1248,21 +1137,17 @@ CONFIG_USB_STORAGE=y # # USB Input Devices # -# CONFIG_USB_HID is not set - -# -# USB HID Boot Protocol drivers -# -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set +CONFIG_USB_HID=y +CONFIG_USB_HIDINPUT=y +# CONFIG_USB_HIDINPUT_POWERBOOK is not set +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set # CONFIG_USB_AIPTEK is not set # CONFIG_USB_WACOM is not set # CONFIG_USB_ACECAD is not set # CONFIG_USB_KBTAB is not set # CONFIG_USB_POWERMATE is not set -# CONFIG_USB_MTOUCH is not set -# CONFIG_USB_ITMTOUCH is not set -# CONFIG_USB_EGALAX is not set +# CONFIG_USB_TOUCHSCREEN is not set # CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set @@ -1276,21 +1161,6 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set -# -# USB Multimedia devices -# -# CONFIG_USB_DABUSB is not set -# CONFIG_USB_VICAM is not set -# CONFIG_USB_DSBR is not set -# CONFIG_USB_ET61X251 is not set -# CONFIG_USB_IBMCAM is not set -# CONFIG_USB_KONICAWC is not set -# CONFIG_USB_OV511 is not set -# CONFIG_USB_SE401 is not set -# CONFIG_USB_SN9C102 is not set -# CONFIG_USB_STV680 is not set -# CONFIG_USB_PWC is not set - # # USB Network Adapters # @@ -1299,12 +1169,11 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set -# CONFIG_USB_MON is not set +CONFIG_USB_MON=y # # USB port drivers # -# CONFIG_USB_USS720 is not set # # USB Serial Converter support @@ -1321,10 +1190,12 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set # CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set # CONFIG_USB_PHIDGETKIT is not set # CONFIG_USB_PHIDGETSERVO is not set # CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_APPLEDISPLAY is not set # CONFIG_USB_SISUSBVGA is not set # CONFIG_USB_LD is not set # CONFIG_USB_TEST is not set @@ -1343,57 +1214,97 @@ CONFIG_USB_STORAGE=y # # CONFIG_MMC is not set +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# + # # InfiniBand support # # CONFIG_INFINIBAND is not set # -# SN Devices +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) +# +# CONFIG_EDAC is not set + +# +# Real Time Clock # +# CONFIG_RTC_CLASS is not set # -# EDAC - error detection and reporting (RAS) +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices # -# CONFIG_EDAC is not set # # File systems # CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +# CONFIG_EXT2_FS_SECURITY is not set # CONFIG_EXT2_FS_XIP is not set -# CONFIG_EXT3_FS is not set -# CONFIG_REISERFS_FS is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +# CONFIG_REISERFS_FS_SECURITY is not set # CONFIG_JFS_FS is not set -# CONFIG_FS_POSIX_ACL is not set +CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set -# CONFIG_INOTIFY is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set CONFIG_DNOTIFY=y # CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set +CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set # # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_ZISOFS_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set # CONFIG_UDF_FS is not set # # DOS/FAT/NT Filesystems # CONFIG_FAT_FS=y -# CONFIG_MSDOS_FS is not set +CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y -CONFIG_FAT_DEFAULT_CODEPAGE=850 +CONFIG_FAT_DEFAULT_CODEPAGE=437 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # CONFIG_NTFS_FS is not set @@ -1404,10 +1315,9 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_SYSFS=y CONFIG_TMPFS=y -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -# CONFIG_RELAYFS_FS is not set # CONFIG_CONFIGFS_FS is not set # @@ -1430,13 +1340,26 @@ CONFIG_RAMFS=y # # Network File Systems # -# CONFIG_NFS_FS is not set -# CONFIG_NFSD is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +CONFIG_NFSD_TCP=y +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set -CONFIG_CIFS=y -# CONFIG_CIFS_STATS is not set -# CONFIG_CIFS_XATTR is not set -# CONFIG_CIFS_EXPERIMENTAL is not set +# CONFIG_CIFS is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -1445,33 +1368,18 @@ CONFIG_CIFS=y # # Partition Types # -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set +# CONFIG_PARTITION_ADVANCED is not set CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_KARMA_PARTITION is not set -# CONFIG_EFI_PARTITION is not set # # Native Language Support # CONFIG_NLS=y -CONFIG_NLS_DEFAULT="iso8859-15" -# CONFIG_NLS_CODEPAGE_437 is not set +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set -CONFIG_NLS_CODEPAGE_850=y +# CONFIG_NLS_CODEPAGE_850 is not set # CONFIG_NLS_CODEPAGE_852 is not set # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set @@ -1491,7 +1399,7 @@ CONFIG_NLS_CODEPAGE_850=y # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_CODEPAGE_1250 is not set # CONFIG_NLS_CODEPAGE_1251 is not set -# CONFIG_NLS_ASCII is not set +CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_ISO8859_2 is not set # CONFIG_NLS_ISO8859_3 is not set @@ -1510,20 +1418,50 @@ CONFIG_NLS_UTF8=y # # Instrumentation Support # -# CONFIG_PROFILING is not set -# CONFIG_KPROBES is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y # # Kernel hacking # +CONFIG_TRACE_IRQFLAGS_SUPPORT=y # CONFIG_PRINTK_TIME is not set CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_KERNEL=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_RWSEMS is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_FRAME_POINTER is not set +CONFIG_UNWIND_INFO=y +CONFIG_STACK_UNWIND=y +# CONFIG_FORCED_INLINING is not set +# CONFIG_RCU_TORTURE_TEST is not set CONFIG_EARLY_PRINTK=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_RODATA is not set +# CONFIG_4KSTACKS is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y +CONFIG_DOUBLEFAULT=y # # Security options @@ -1536,10 +1474,6 @@ CONFIG_X86_MPPARSE=y # # CONFIG_CRYPTO is not set -# -# Hardware crypto devices -# - # # Library routines # @@ -1548,7 +1482,12 @@ CONFIG_X86_MPPARSE=y CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y +CONFIG_PLIST=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y CONFIG_KTIME_SCALAR=y -- cgit v1.2.3 From 874c4fe389d1358f82c96dc9b5092fc5c7690604 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:26 +0200 Subject: [PATCH] i386: Allow to use GENERICARCH for UP kernels There are some machines around (large xSeries or Unisys ES7000) that need physical IO-APIC destination mode to access all of their IO devices. This currently doesn't work in UP kernels as used in distribution installers. This patch allows to compile even UP kernels as GENERICARCH which allows to use physical or clustered APIC mode. Signed-off-by: Andi Kleen --- arch/i386/Kconfig | 7 +++---- arch/i386/kernel/io_apic.c | 1 + arch/i386/kernel/mpparse.c | 1 + arch/i386/mach-generic/bigsmp.c | 1 + arch/i386/mach-generic/es7000.c | 1 + arch/i386/mach-generic/probe.c | 2 ++ arch/i386/mach-generic/summit.c | 1 + 7 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index b2751eadbc5..c134a545b37 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -166,7 +166,6 @@ config X86_VISWS config X86_GENERICARCH bool "Generic architecture (Summit, bigsmp, ES7000, default)" - depends on SMP help This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. It is intended for a generic binary kernel. @@ -263,7 +262,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on !SMP && !(X86_VISWS || X86_VOYAGER) + depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -288,12 +287,12 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC bool - depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) + depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH default y config X86_IO_APIC bool - depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) + depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH default y config X86_VISWS_APIC diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 4fb32c551fe..4eacd52eeb7 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -40,6 +40,7 @@ #include #include +#include #include "io_ports.h" diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index a70b5fa0ef0..82756968856 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -30,6 +30,7 @@ #include #include +#include #include #include diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c index ef7a6e6fcb9..33d9f93557b 100644 --- a/arch/i386/mach-generic/bigsmp.c +++ b/arch/i386/mach-generic/bigsmp.c @@ -5,6 +5,7 @@ #define APIC_DEFINITION 1 #include #include +#include #include #include #include diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c index 845cdd0b359..aa144d82334 100644 --- a/arch/i386/mach-generic/es7000.c +++ b/arch/i386/mach-generic/es7000.c @@ -4,6 +4,7 @@ #define APIC_DEFINITION 1 #include #include +#include #include #include #include diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c index bcd1bcfaa72..793d1b47325 100644 --- a/arch/i386/mach-generic/probe.c +++ b/arch/i386/mach-generic/probe.c @@ -119,7 +119,9 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } +#ifdef CONFIG_SMP int hard_smp_processor_id(void) { return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); } +#endif diff --git a/arch/i386/mach-generic/summit.c b/arch/i386/mach-generic/summit.c index b73501ddd65..f7e5d66648d 100644 --- a/arch/i386/mach-generic/summit.c +++ b/arch/i386/mach-generic/summit.c @@ -4,6 +4,7 @@ #define APIC_DEFINITION 1 #include #include +#include #include #include #include -- cgit v1.2.3 From b07f8915cda3fcd73b8b68075ba1e6cd0673365d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:26 +0200 Subject: [PATCH] x86: Temporarily revert parts of the Core 2 nmi nmi watchdog support This makes merging easier. They are readded a few patches later. Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 65 +------------------------------------------------- 1 file changed, 1 insertion(+), 64 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index acb351478e4..1282d70ff97 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -24,7 +24,6 @@ #include #include -#include #include "mach_traps.h" @@ -96,9 +95,6 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) -#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL -#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK - #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -211,8 +207,6 @@ static int __init setup_nmi_watchdog(char *str) __setup("nmi_watchdog=", setup_nmi_watchdog); -static void disable_intel_arch_watchdog(void); - static void disable_lapic_nmi_watchdog(void) { if (nmi_active <= 0) @@ -222,10 +216,6 @@ static void disable_lapic_nmi_watchdog(void) wrmsr(MSR_K7_EVNTSEL0, 0, 0); break; case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - disable_intel_arch_watchdog(); - break; - } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) @@ -454,53 +444,6 @@ static int setup_p4_watchdog(void) return 1; } -static void disable_intel_arch_watchdog(void) -{ - unsigned ebx; - - /* - * Check whether the Architectural PerfMon supports - * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebp indicates event present. - */ - ebx = cpuid_ebx(10); - if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); -} - -static int setup_intel_arch_watchdog(void) -{ - unsigned int evntsel; - unsigned ebx; - - /* - * Check whether the Architectural PerfMon supports - * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebp indicates event present. - */ - ebx = cpuid_ebx(10); - if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - return 0; - - nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; - - clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); - clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); - - evntsel = ARCH_PERFMON_EVENTSEL_INT - | ARCH_PERFMON_EVENTSEL_OS - | ARCH_PERFMON_EVENTSEL_USR - | ARCH_PERFMON_NMI_EVENT_SEL - | ARCH_PERFMON_NMI_EVENT_UMASK; - - wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); - write_watchdog_counter("INTEL_ARCH_PERFCTR0"); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); - return 1; -} - void setup_apic_nmi_watchdog (void) { switch (boot_cpu_data.x86_vendor) { @@ -510,11 +453,6 @@ void setup_apic_nmi_watchdog (void) setup_k7_watchdog(); break; case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - if (!setup_intel_arch_watchdog()) - return; - break; - } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) @@ -619,8 +557,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); apic_write(APIC_LVTPC, APIC_DM_NMI); } - else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || - nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { + else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { /* Only P6 based Pentium M need to re-unmask * the apic vector but it doesn't hurt * other P6 variant */ -- cgit v1.2.3 From 828f0afda123a96ff4e8078f057a302f4b4232ae Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:26 +0200 Subject: [PATCH] x86: Add performance counter reservation framework for UP kernels Adds basic infrastructure to allow subsystems to reserve performance counters on the x86 chips. Only UP kernels are supported in this patch to make reviewing easier. The SMP portion makes a lot more changes. Think of this as a locking mechanism where each bit represents a different counter. In addition, each subsystem should also reserve an appropriate event selection register that will correspond to the performance counter it will be using (this is mainly neccessary for the Pentium 4 chips as they break the 1:1 relationship to performance counters). This will help prevent subsystems like oprofile from interfering with the nmi watchdog. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 188 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 156 insertions(+), 32 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 1282d70ff97..5d58dfeacd5 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -34,6 +34,20 @@ static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); +/* perfctr_nmi_owner tracks the ownership of the perfctr registers: + * evtsel_nmi_owner tracks the ownership of the event selection + * - different performance counters/ event selection may be reserved for + * different subsystems this reservation system just tries to coordinate + * things a little + */ +static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); +static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); + +/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's + * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) + */ +#define NMI_MAX_COUNTER_BITS 66 + /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: * - it may be reserved by some other driver, or not @@ -95,6 +109,105 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +/* converts an msr to an appropriate reservation bit */ +static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the performance counter register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_PERFCTR0); + case X86_VENDOR_INTEL: + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_PERFCTR0); + case 15: + return (msr - MSR_P4_BPU_PERFCTR0); + } + } + return 0; +} + +/* converts an msr to an appropriate reservation bit */ +static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the event selection register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_EVNTSEL0); + case X86_VENDOR_INTEL: + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_EVNTSEL0); + case 15: + return (msr - MSR_P4_BSU_ESCR0); + } + } + return 0; +} + +/* checks for a bit availability (hack for oprofile) */ +int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) +{ + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); +} + +/* checks the an msr for availability */ +int avail_to_resrv_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); +} + +int reserve_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) + return 1; + return 0; +} + +void release_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); +} + +int reserve_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0])) + return 1; + return 0; +} + +void release_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); +} + #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -344,14 +457,6 @@ late_initcall(init_lapic_nmi_sysfs); * Original code written by Keith Owens. */ -static void clear_msr_range(unsigned int base, unsigned int n) -{ - unsigned int i; - - for(i = 0; i < n; ++i) - wrmsr(base+i, 0, 0); -} - static void write_watchdog_counter(const char *descr) { u64 count = (u64)cpu_khz * 1000; @@ -362,14 +467,19 @@ static void write_watchdog_counter(const char *descr) wrmsrl(nmi_perfctr_msr, 0 - count); } -static void setup_k7_watchdog(void) +static int setup_k7_watchdog(void) { unsigned int evntsel; nmi_perfctr_msr = MSR_K7_PERFCTR0; - clear_msr_range(MSR_K7_EVNTSEL0, 4); - clear_msr_range(MSR_K7_PERFCTR0, 4); + if (!reserve_perfctr_nmi(nmi_perfctr_msr)) + goto fail; + + if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0)) + goto fail1; + + wrmsrl(MSR_K7_PERFCTR0, 0UL); evntsel = K7_EVNTSEL_INT | K7_EVNTSEL_OS @@ -381,16 +491,24 @@ static void setup_k7_watchdog(void) apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); + return 1; +fail1: + release_perfctr_nmi(nmi_perfctr_msr); +fail: + return 0; } -static void setup_p6_watchdog(void) +static int setup_p6_watchdog(void) { unsigned int evntsel; nmi_perfctr_msr = MSR_P6_PERFCTR0; - clear_msr_range(MSR_P6_EVNTSEL0, 2); - clear_msr_range(MSR_P6_PERFCTR0, 2); + if (!reserve_perfctr_nmi(nmi_perfctr_msr)) + goto fail; + + if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0)) + goto fail1; evntsel = P6_EVNTSEL_INT | P6_EVNTSEL_OS @@ -402,6 +520,11 @@ static void setup_p6_watchdog(void) apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); + return 1; +fail1: + release_perfctr_nmi(nmi_perfctr_msr); +fail: + return 0; } static int setup_p4_watchdog(void) @@ -419,22 +542,11 @@ static int setup_p4_watchdog(void) nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; #endif - if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) - clear_msr_range(0x3F1, 2); - /* MSR 0x3F0 seems to have a default value of 0xFC00, but current - docs doesn't fully define it, so leave it alone for now. */ - if (boot_cpu_data.x86_model >= 0x3) { - /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ - clear_msr_range(0x3A0, 26); - clear_msr_range(0x3BC, 3); - } else { - clear_msr_range(0x3A0, 31); - } - clear_msr_range(0x3C0, 6); - clear_msr_range(0x3C8, 6); - clear_msr_range(0x3E0, 2); - clear_msr_range(MSR_P4_CCCR0, 18); - clear_msr_range(MSR_P4_PERFCTR0, 18); + if (!reserve_perfctr_nmi(nmi_perfctr_msr)) + goto fail; + + if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) + goto fail1; wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); @@ -442,6 +554,10 @@ static int setup_p4_watchdog(void) apic_write(APIC_LVTPC, APIC_DM_NMI); wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; +fail1: + release_perfctr_nmi(nmi_perfctr_msr); +fail: + return 0; } void setup_apic_nmi_watchdog (void) @@ -450,7 +566,8 @@ void setup_apic_nmi_watchdog (void) case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) return; - setup_k7_watchdog(); + if (!setup_k7_watchdog()) + return; break; case X86_VENDOR_INTEL: switch (boot_cpu_data.x86) { @@ -458,7 +575,8 @@ void setup_apic_nmi_watchdog (void) if (boot_cpu_data.x86_model > 0xd) return; - setup_p6_watchdog(); + if(!setup_p6_watchdog()) + return; break; case 15: if (boot_cpu_data.x86_model > 0x4) @@ -612,6 +730,12 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); +EXPORT_SYMBOL(reserve_perfctr_nmi); +EXPORT_SYMBOL(release_perfctr_nmi); +EXPORT_SYMBOL(reserve_evntsel_nmi); +EXPORT_SYMBOL(release_evntsel_nmi); EXPORT_SYMBOL(reserve_lapic_nmi); EXPORT_SYMBOL(release_lapic_nmi); EXPORT_SYMBOL(disable_timer_nmi_watchdog); -- cgit v1.2.3 From cb9c448c661d40ce2efbce8e9c19cc4d420d8ccc Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:26 +0200 Subject: [PATCH] i386: Utilize performance counter reservation framework in oprofile Incorporates the new performance counter reservation system in oprofile. Also cleans up a lot of the initialization code. The code original zero'd out every register associated with performance counters regardless if those registers were used or not. This causes issues with the nmi watchdog. Now oprofile tries to reserve registers and gives up if it can't get them. Cc: levon@movementarian.org Cc: oprofile-list@lists.sf.net Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/oprofile/nmi_int.c | 41 +++++++--- arch/i386/oprofile/op_model_athlon.c | 54 ++++++++++--- arch/i386/oprofile/op_model_p4.c | 152 +++++++++++++++++------------------ arch/i386/oprofile/op_model_ppro.c | 65 ++++++++++++--- arch/i386/oprofile/op_x86_model.h | 1 + 5 files changed, 199 insertions(+), 114 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 5f8dc8a21bd..8710ca081b1 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -98,15 +98,19 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs) unsigned int i; for (i = 0; i < nr_ctrs; ++i) { - rdmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); + if (counters[i].addr){ + rdmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } } for (i = 0; i < nr_ctrls; ++i) { - rdmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); + if (controls[i].addr){ + rdmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } } } @@ -205,15 +209,19 @@ static void nmi_restore_registers(struct op_msrs * msrs) unsigned int i; for (i = 0; i < nr_ctrls; ++i) { - wrmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); + if (controls[i].addr){ + wrmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } } for (i = 0; i < nr_ctrs; ++i) { - wrmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); + if (counters[i].addr){ + wrmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } } } @@ -234,6 +242,7 @@ static void nmi_cpu_shutdown(void * dummy) apic_write(APIC_LVTPC, saved_lvtpc[cpu]); apic_write(APIC_LVTERR, v); nmi_restore_registers(msrs); + model->shutdown(msrs); } @@ -284,6 +293,14 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root) struct dentry * dir; char buf[4]; + /* quick little hack to _not_ expose a counter if it is not + * available for use. This should protect userspace app. + * NOTE: assumes 1:1 mapping here (that counters are organized + * sequentially in their struct assignment). + */ + if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) + continue; + snprintf(buf, sizeof(buf), "%d", i); dir = oprofilefs_mkdir(sb, root, buf); oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c index 693bdea4a52..3057a19e464 100644 --- a/arch/i386/oprofile/op_model_athlon.c +++ b/arch/i386/oprofile/op_model_athlon.c @@ -21,10 +21,12 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) @@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COUNTERS]; static void athlon_fill_in_addresses(struct op_msrs * const msrs) { - msrs->counters[0].addr = MSR_K7_PERFCTR0; - msrs->counters[1].addr = MSR_K7_PERFCTR1; - msrs->counters[2].addr = MSR_K7_PERFCTR2; - msrs->counters[3].addr = MSR_K7_PERFCTR3; - - msrs->controls[0].addr = MSR_K7_EVNTSEL0; - msrs->controls[1].addr = MSR_K7_EVNTSEL1; - msrs->controls[2].addr = MSR_K7_EVNTSEL2; - msrs->controls[3].addr = MSR_K7_EVNTSEL3; + int i; + + for (i=0; i < NUM_COUNTERS; i++) { + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + else + msrs->counters[i].addr = 0; + } + + for (i=0; i < NUM_CONTROLS; i++) { + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + else + msrs->controls[i].addr = 0; + } } @@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) /* clear all counters */ for (i = 0 ; i < NUM_CONTROLS; ++i) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; CTRL_READ(low, high, msrs, i); CTRL_CLEAR(low); CTRL_WRITE(low, high, msrs, i); } - + /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { + if (unlikely(!CTR_IS_RESERVED(msrs,i))) + continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - if (counter_config[i].enabled) { + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { reset_value[i] = counter_config[i].count; CTR_WRITE(counter_config[i].count, msrs, i); @@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_regs * const regs, int i; for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { oprofile_add_sample(regs, i); @@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs const * const msrs) /* Subtle: stop on all counters to avoid race with * setting our pm callback */ for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (!reset_value[i]) + continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); CTRL_WRITE(low, high, msrs, i); } } +static void athlon_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(MSR_K7_PERFCTR0 + i); + } + for (i = 0 ; i < NUM_CONTROLS ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + } +} struct op_x86_model_spec const op_athlon_spec = { .num_counters = NUM_COUNTERS, @@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon_spec = { .setup_ctrs = &athlon_setup_ctrs, .check_ctrs = &athlon_check_ctrs, .start = &athlon_start, - .stop = &athlon_stop + .stop = &athlon_stop, + .shutdown = &athlon_shutdown }; diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c index 7c61d357b82..47925927b12 100644 --- a/arch/i386/oprofile/op_model_p4.c +++ b/arch/i386/oprofile/op_model_p4.c @@ -32,7 +32,7 @@ #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) static unsigned int num_counters = NUM_COUNTERS_NON_HT; - +static unsigned int num_controls = NUM_CONTROLS_NON_HT; /* this has to be checked dynamically since the hyper-threadedness of a chip is discovered at @@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT; static inline void setup_num_counters(void) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2) + if (smp_num_siblings == 2){ num_counters = NUM_COUNTERS_HT2; + num_controls = NUM_CONTROLS_HT2; + } #endif } @@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT -/* All cccr we don't use. */ -static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { - MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, - MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, - MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, - MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, - MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 -}; - /* p4 event codes in libop/op_event.h are indices into this table. */ static struct p4_event_binding p4_events[NUM_EVENTS] = { @@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) @@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT]; static void p4_fill_in_addresses(struct op_msrs * const msrs) { unsigned int i; - unsigned int addr, stag; + unsigned int addr, cccraddr, stag; setup_num_counters(); stag = get_stagger(); - /* the counter registers we pay attention to */ + /* initialize some registers */ for (i = 0; i < num_counters; ++i) { - msrs->counters[i].addr = - p4_counters[VIRT_CTR(stag, i)].counter_address; + msrs->counters[i].addr = 0; } - - /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ - - /* 18 CCCR registers */ - for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; - addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + for (i = 0; i < num_controls; ++i) { + msrs->controls[i].addr = 0; } + /* the counter & cccr registers we pay attention to */ + for (i = 0; i < num_counters; ++i) { + addr = p4_counters[VIRT_CTR(stag, i)].counter_address; + cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; + if (reserve_perfctr_nmi(addr)){ + msrs->counters[i].addr = addr; + msrs->controls[i].addr = cccraddr; + } + } + /* 43 ESCR registers in three or four discontiguous group */ for (addr = MSR_P4_BSU_ESCR0 + stag; addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 @@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) if (boot_cpu_data.x86_model >= 0x3) { for (addr = MSR_P4_BSU_ESCR0 + stag; addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } } else { for (addr = MSR_P4_IQ_ESCR0 + stag; addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } } for (addr = MSR_P4_RAT_ESCR0 + stag; addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } for (addr = MSR_P4_MS_ESCR0 + stag; addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } for (addr = MSR_P4_IX_ESCR0 + stag; addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { - msrs->controls[i].addr = addr; + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; } /* there are 2 remaining non-contiguously located ESCRs */ if (num_counters == NUM_COUNTERS_NON_HT) { /* standard non-HT CPUs handle both remaining ESCRs*/ - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; - msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; } else if (stag == 0) { /* HT CPUs give the first remainder to the even thread, as the 32nd control register */ - msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; } else { /* and two copies of the second to the odd thread, for the 22st and 23nd control registers */ - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + } } } @@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) { unsigned int i; unsigned int low, high; - unsigned int addr; unsigned int stag; stag = get_stagger(); @@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear the cccrs we will use */ for (i = 0 ; i < num_counters ; i++) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); CCCR_SET_REQUIRED_BITS(low); wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } - /* clear cccrs outside our concern */ - for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { - rdmsr(p4_unused_cccr[i], low, high); - CCCR_CLEAR(low); - CCCR_SET_REQUIRED_BITS(low); - wrmsr(p4_unused_cccr[i], low, high); - } - /* clear all escrs (including those outside our concern) */ - for (addr = MSR_P4_BSU_ESCR0 + stag; - addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { - wrmsr(addr, 0, 0); - } - - /* On older models clear also MSR_P4_IQ_ESCR0/1 */ - if (boot_cpu_data.x86_model < 0x3) { - wrmsr(MSR_P4_IQ_ESCR0, 0, 0); - wrmsr(MSR_P4_IQ_ESCR1, 0, 0); - } - - for (addr = MSR_P4_RAT_ESCR0 + stag; - addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { - wrmsr(addr, 0, 0); - } - - for (addr = MSR_P4_MS_ESCR0 + stag; - addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ - wrmsr(addr, 0, 0); - } - - for (addr = MSR_P4_IX_ESCR0 + stag; - addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ - wrmsr(addr, 0, 0); + for (i = num_counters; i < num_controls; i++) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; + wrmsr(msrs->controls[i].addr, 0, 0); } - if (num_counters == NUM_COUNTERS_NON_HT) { - wrmsr(MSR_P4_CRU_ESCR4, 0, 0); - wrmsr(MSR_P4_CRU_ESCR5, 0, 0); - } else if (stag == 0) { - wrmsr(MSR_P4_CRU_ESCR4, 0, 0); - } else { - wrmsr(MSR_P4_CRU_ESCR5, 0, 0); - } - /* setup all counters */ for (i = 0 ; i < num_counters ; ++i) { - if (counter_config[i].enabled) { + if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); @@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs) stag = get_stagger(); for (i = 0; i < num_counters; ++i) { + if (!reset_value[i]) + continue; CCCR_READ(low, high, VIRT_CTR(stag, i)); CCCR_SET_DISABLE(low); CCCR_WRITE(low, high, VIRT_CTR(stag, i)); } } +static void p4_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < num_counters ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(msrs->counters[i].addr); + } + /* some of the control registers are specially reserved in + * conjunction with the counter registers (hence the starting offset). + * This saves a few bits. + */ + for (i = num_counters ; i < num_controls ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(msrs->controls[i].addr); + } +} + #ifdef CONFIG_SMP struct op_x86_model_spec const op_p4_ht2_spec = { @@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = { .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, .start = &p4_start, - .stop = &p4_stop + .stop = &p4_stop, + .shutdown = &p4_shutdown }; #endif @@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = { .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, .start = &p4_start, - .stop = &p4_stop + .stop = &p4_stop, + .shutdown = &p4_shutdown }; diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c index 5c3ab4b027a..f88e05ba8eb 100644 --- a/arch/i386/oprofile/op_model_ppro.c +++ b/arch/i386/oprofile/op_model_ppro.c @@ -22,10 +22,12 @@ #define NUM_COUNTERS 2 #define NUM_CONTROLS 2 +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) @@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COUNTERS]; static void ppro_fill_in_addresses(struct op_msrs * const msrs) { - msrs->counters[0].addr = MSR_P6_PERFCTR0; - msrs->counters[1].addr = MSR_P6_PERFCTR1; + int i; + + for (i=0; i < NUM_COUNTERS; i++) { + if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) + msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; + else + msrs->counters[i].addr = 0; + } - msrs->controls[0].addr = MSR_P6_EVNTSEL0; - msrs->controls[1].addr = MSR_P6_EVNTSEL1; + for (i=0; i < NUM_CONTROLS; i++) { + if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) + msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; + else + msrs->controls[i].addr = 0; + } } @@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) /* clear all counters */ for (i = 0 ; i < NUM_CONTROLS; ++i) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; CTRL_READ(low, high, msrs, i); CTRL_CLEAR(low); CTRL_WRITE(low, high, msrs, i); @@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { + if (unlikely(!CTR_IS_RESERVED(msrs,i))) + continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - if (counter_config[i].enabled) { + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { reset_value[i] = counter_config[i].count; CTR_WRITE(counter_config[i].count, msrs, i); @@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_UM(low, counter_config[i].unit_mask); CTRL_SET_EVENT(low, counter_config[i].event); CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; } } } @@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_regs * const regs, int i; for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { oprofile_add_sample(regs, i); @@ -118,18 +138,38 @@ static int ppro_check_ctrs(struct pt_regs * const regs, static void ppro_start(struct op_msrs const * const msrs) { unsigned int low,high; - CTRL_READ(low, high, msrs, 0); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, 0); + + if (reset_value[0]) { + CTRL_READ(low, high, msrs, 0); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, 0); + } } static void ppro_stop(struct op_msrs const * const msrs) { unsigned int low,high; - CTRL_READ(low, high, msrs, 0); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, 0); + + if (reset_value[0]) { + CTRL_READ(low, high, msrs, 0); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, 0); + } +} + +static void ppro_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(MSR_P6_PERFCTR0 + i); + } + for (i = 0 ; i < NUM_CONTROLS ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); + } } @@ -140,5 +180,6 @@ struct op_x86_model_spec const op_ppro_spec = { .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, .start = &ppro_start, - .stop = &ppro_stop + .stop = &ppro_stop, + .shutdown = &ppro_shutdown }; diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h index 123b7e90a9e..abb1aa95b97 100644 --- a/arch/i386/oprofile/op_x86_model.h +++ b/arch/i386/oprofile/op_x86_model.h @@ -40,6 +40,7 @@ struct op_x86_model_spec { struct op_msrs const * const msrs); void (*start)(struct op_msrs const * const msrs); void (*stop)(struct op_msrs const * const msrs); + void (*shutdown)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec const op_ppro_spec; -- cgit v1.2.3 From b7471c6da94d30d3deadc55986cc38d1ff57f9ca Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:26 +0200 Subject: [PATCH] i386: Add SMP support on i386 to reservation framework This patch includes the changes to make the nmi watchdog on i386 SMP aware. A bunch of code was moved around to make it simpler to read. In addition, it is now possible to determine if a particular NMI was the result of the watchdog or not. This feature allows the kernel to filter out unknown NMIs easier. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/apic.c | 3 +- arch/i386/kernel/nmi.c | 537 ++++++++++++++++++++++++------------- arch/i386/kernel/traps.c | 2 +- arch/i386/oprofile/nmi_timer_int.c | 4 +- 4 files changed, 357 insertions(+), 189 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 8c844d07862..1a34fc57800 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -586,8 +586,7 @@ void __devinit setup_local_APIC(void) printk("No ESR for 82489DX.\n"); } - if (nmi_watchdog == NMI_LOCAL_APIC) - setup_apic_nmi_watchdog(); + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 5d58dfeacd5..d8800434303 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -24,16 +24,10 @@ #include #include +#include #include "mach_traps.h" -unsigned int nmi_watchdog = NMI_NONE; -extern int unknown_nmi_panic; -static unsigned int nmi_hz = HZ; -static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ -static unsigned int nmi_p4_cccr_val; -extern void show_registers(struct pt_regs *regs); - /* perfctr_nmi_owner tracks the ownership of the perfctr registers: * evtsel_nmi_owner tracks the ownership of the event selection * - different performance counters/ event selection may be reserved for @@ -63,51 +57,31 @@ static unsigned int lapic_nmi_owner; #define LAPIC_NMI_RESERVED (1<<1) /* nmi_active: - * +1: the lapic NMI watchdog is active, but can be disabled - * 0: the lapic NMI watchdog has not been set up, and cannot + * >0: the lapic NMI watchdog is active, but can be disabled + * <0: the lapic NMI watchdog has not been set up, and cannot * be enabled - * -1: the lapic NMI watchdog is disabled, but can be enabled + * 0: the lapic NMI watchdog is disabled, but can be enabled */ -int nmi_active; +atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -#define K7_EVNTSEL_ENABLE (1 << 22) -#define K7_EVNTSEL_INT (1 << 20) -#define K7_EVNTSEL_OS (1 << 17) -#define K7_EVNTSEL_USR (1 << 16) -#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 -#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING +unsigned int nmi_watchdog = NMI_DEFAULT; +static unsigned int nmi_hz = HZ; -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED +struct nmi_watchdog_ctlblk { + int enabled; + u64 check_bit; + unsigned int cccr_msr; + unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ + unsigned int evntsel_msr; /* the MSR to select the events to handle */ +}; +static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); -#define MSR_P4_MISC_ENABLE 0x1A0 -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) -#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) -#define MSR_P4_PERFCTR0 0x300 -#define MSR_P4_CCCR0 0x360 -#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) -#define P4_ESCR_OS (1<<3) -#define P4_ESCR_USR (1<<2) -#define P4_CCCR_OVF_PMI0 (1<<26) -#define P4_CCCR_OVF_PMI1 (1<<27) -#define P4_CCCR_THRESHOLD(N) ((N)<<20) -#define P4_CCCR_COMPLEMENT (1<<19) -#define P4_CCCR_COMPARE (1<<18) -#define P4_CCCR_REQUIRED (3<<16) -#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) -#define P4_CCCR_ENABLE (1<<12) -/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter - CRU_ESCR0 (with any non-null event selector) through a complemented - max threshold. [IA32-Vol3, Section 14.9.9] */ -#define MSR_P4_IQ_COUNTER0 0x30C -#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) -#define P4_NMI_IQ_CCCR0 \ - (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ - P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +/* local prototypes */ +static void stop_apic_nmi_watchdog(void *unused); +static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); + +extern void show_registers(struct pt_regs *regs); +extern int unknown_nmi_panic; /* converts an msr to an appropriate reservation bit */ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) @@ -208,6 +182,17 @@ void release_evntsel_nmi(unsigned int msr) clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); } +static __cpuinit inline int nmi_known_cpu(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); + case X86_VENDOR_INTEL: + return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); + } + return 0; +} + #ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all @@ -234,7 +219,10 @@ static int __init check_nmi_watchdog(void) unsigned int *prev_nmi_count; int cpu; - if (nmi_watchdog == NMI_NONE) + if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) + return 0; + + if (!atomic_read(&nmi_active)) return 0; prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); @@ -258,18 +246,22 @@ static int __init check_nmi_watchdog(void) if (!cpu_isset(cpu, cpu_callin_map)) continue; #endif + if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) + continue; if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - endflag = 1; printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", cpu, prev_nmi_count[cpu], nmi_count(cpu)); - nmi_active = 0; - lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; - kfree(prev_nmi_count); - return -1; + per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; + atomic_dec(&nmi_active); } } + if (!atomic_read(&nmi_active)) { + kfree(prev_nmi_count); + atomic_set(&nmi_active, -1); + return -1; + } endflag = 1; printk("OK.\n"); @@ -290,31 +282,16 @@ static int __init setup_nmi_watchdog(char *str) get_option(&str, &nmi); - if (nmi >= NMI_INVALID) + if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) return 0; - if (nmi == NMI_NONE) - nmi_watchdog = nmi; /* * If any other x86 CPU has a local APIC, then * please test the NMI stuff there and send me the * missing bits. Right now Intel P6/P4 and AMD K7 only. */ - if ((nmi == NMI_LOCAL_APIC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) - nmi_watchdog = nmi; - if ((nmi == NMI_LOCAL_APIC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && - (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) - nmi_watchdog = nmi; - /* - * We can enable the IO-APIC watchdog - * unconditionally. - */ - if (nmi == NMI_IO_APIC) { - nmi_active = 1; - nmi_watchdog = nmi; - } + if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) + return 0; /* no lapic support */ + nmi_watchdog = nmi; return 1; } @@ -322,41 +299,30 @@ __setup("nmi_watchdog=", setup_nmi_watchdog); static void disable_lapic_nmi_watchdog(void) { - if (nmi_active <= 0) + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + if (atomic_read(&nmi_active) <= 0) return; - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - wrmsr(MSR_K7_EVNTSEL0, 0, 0); - break; - case X86_VENDOR_INTEL: - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 0xd) - break; - wrmsr(MSR_P6_EVNTSEL0, 0, 0); - break; - case 15: - if (boot_cpu_data.x86_model > 0x4) - break; + on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); - wrmsr(MSR_P4_IQ_CCCR0, 0, 0); - wrmsr(MSR_P4_CRU_ESCR0, 0, 0); - break; - } - break; - } - nmi_active = -1; - /* tell do_nmi() and others that we're not active any more */ - nmi_watchdog = 0; + BUG_ON(atomic_read(&nmi_active) != 0); } static void enable_lapic_nmi_watchdog(void) { - if (nmi_active < 0) { - nmi_watchdog = NMI_LOCAL_APIC; - setup_apic_nmi_watchdog(); - } + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + /* are we already enabled */ + if (atomic_read(&nmi_active) != 0) + return; + + /* are we lapic aware */ + if (nmi_known_cpu() <= 0) + return; + + on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); + touch_nmi_watchdog(); } int reserve_lapic_nmi(void) @@ -388,20 +354,25 @@ void release_lapic_nmi(void) void disable_timer_nmi_watchdog(void) { - if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) + BUG_ON(nmi_watchdog != NMI_IO_APIC); + + if (atomic_read(&nmi_active) <= 0) return; - unset_nmi_callback(); - nmi_active = -1; - nmi_watchdog = NMI_NONE; + disable_irq(0); + on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); + + BUG_ON(atomic_read(&nmi_active) != 0); } void enable_timer_nmi_watchdog(void) { - if (nmi_active < 0) { - nmi_watchdog = NMI_IO_APIC; + BUG_ON(nmi_watchdog != NMI_IO_APIC); + + if (atomic_read(&nmi_active) == 0) { touch_nmi_watchdog(); - nmi_active = 1; + on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); + enable_irq(0); } } @@ -411,7 +382,7 @@ static int nmi_pm_active; /* nmi_active before suspend */ static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) { - nmi_pm_active = nmi_active; + nmi_pm_active = atomic_read(&nmi_active); disable_lapic_nmi_watchdog(); return 0; } @@ -439,7 +410,13 @@ static int __init init_lapic_nmi_sysfs(void) { int error; - if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) + /* should really be a BUG_ON but b/c this is an + * init call, it just doesn't work. -dcz + */ + if (nmi_watchdog != NMI_LOCAL_APIC) + return 0; + + if ( atomic_read(&nmi_active) < 0 ) return 0; error = sysdev_class_register(&nmi_sysclass); @@ -457,143 +434,312 @@ late_initcall(init_lapic_nmi_sysfs); * Original code written by Keith Owens. */ -static void write_watchdog_counter(const char *descr) +static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) { u64 count = (u64)cpu_khz * 1000; do_div(count, nmi_hz); if(descr) Dprintk("setting %s to -0x%08Lx\n", descr, count); - wrmsrl(nmi_perfctr_msr, 0 - count); + wrmsrl(perfctr_msr, 0 - count); } +/* Note that these events don't tick when the CPU idles. This means + the frequency varies with CPU load. */ + +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + static int setup_k7_watchdog(void) { + unsigned int perfctr_msr, evntsel_msr; unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - nmi_perfctr_msr = MSR_K7_PERFCTR0; - - if (!reserve_perfctr_nmi(nmi_perfctr_msr)) + perfctr_msr = MSR_K7_PERFCTR0; + evntsel_msr = MSR_K7_EVNTSEL0; + if (!reserve_perfctr_nmi(perfctr_msr)) goto fail; - if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0)) + if (!reserve_evntsel_nmi(evntsel_msr)) goto fail1; - wrmsrl(MSR_K7_PERFCTR0, 0UL); + wrmsrl(perfctr_msr, 0UL); evntsel = K7_EVNTSEL_INT | K7_EVNTSEL_OS | K7_EVNTSEL_USR | K7_NMI_EVENT; - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - write_watchdog_counter("K7_PERFCTR0"); + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "K7_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd->check_bit = 1ULL<<63; return 1; fail1: - release_perfctr_nmi(nmi_perfctr_msr); + release_perfctr_nmi(perfctr_msr); fail: return 0; } +static void stop_k7_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); + + release_evntsel_nmi(wd->evntsel_msr); + release_perfctr_nmi(wd->perfctr_msr); +} + +#define P6_EVNTSEL0_ENABLE (1 << 22) +#define P6_EVNTSEL_INT (1 << 20) +#define P6_EVNTSEL_OS (1 << 17) +#define P6_EVNTSEL_USR (1 << 16) +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED + static int setup_p6_watchdog(void) { + unsigned int perfctr_msr, evntsel_msr; unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - nmi_perfctr_msr = MSR_P6_PERFCTR0; - - if (!reserve_perfctr_nmi(nmi_perfctr_msr)) + perfctr_msr = MSR_P6_PERFCTR0; + evntsel_msr = MSR_P6_EVNTSEL0; + if (!reserve_perfctr_nmi(perfctr_msr)) goto fail; - if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0)) + if (!reserve_evntsel_nmi(evntsel_msr)) goto fail1; + wrmsrl(perfctr_msr, 0UL); + evntsel = P6_EVNTSEL_INT | P6_EVNTSEL_OS | P6_EVNTSEL_USR | P6_NMI_EVENT; - wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); - write_watchdog_counter("P6_PERFCTR0"); + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd->check_bit = 1ULL<<39; return 1; fail1: - release_perfctr_nmi(nmi_perfctr_msr); + release_perfctr_nmi(perfctr_msr); fail: return 0; } +static void stop_p6_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); + + release_evntsel_nmi(wd->evntsel_msr); + release_perfctr_nmi(wd->perfctr_msr); +} + +/* Note that these events don't tick when the CPU idles. This means + the frequency varies with CPU load. */ + +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) +#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) +#define P4_ESCR_OS (1<<3) +#define P4_ESCR_USR (1<<2) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_THRESHOLD(N) ((N)<<20) +#define P4_CCCR_COMPLEMENT (1<<19) +#define P4_CCCR_COMPARE (1<<18) +#define P4_CCCR_REQUIRED (3<<16) +#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) +#define P4_CCCR_ENABLE (1<<12) +#define P4_CCCR_OVF (1<<31) +/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + CRU_ESCR0 (with any non-null event selector) through a complemented + max threshold. [IA32-Vol3, Section 14.9.9] */ + static int setup_p4_watchdog(void) { + unsigned int perfctr_msr, evntsel_msr, cccr_msr; + unsigned int evntsel, cccr_val; unsigned int misc_enable, dummy; + unsigned int ht_num; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); + rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) return 0; - nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; - nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; #ifdef CONFIG_SMP - if (smp_num_siblings == 2) - nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; + /* detect which hyperthread we are on */ + if (smp_num_siblings == 2) { + unsigned int ebx, apicid; + + ebx = cpuid_ebx(1); + apicid = (ebx >> 24) & 0xff; + ht_num = apicid & 1; + } else #endif + ht_num = 0; + + /* performance counters are shared resources + * assign each hyperthread its own set + * (re-use the ESCR0 register, seems safe + * and keeps the cccr_val the same) + */ + if (!ht_num) { + /* logical cpu 0 */ + perfctr_msr = MSR_P4_IQ_PERFCTR0; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR0; + cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + } else { + /* logical cpu 1 */ + perfctr_msr = MSR_P4_IQ_PERFCTR1; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR1; + cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); + } - if (!reserve_perfctr_nmi(nmi_perfctr_msr)) + if (!reserve_perfctr_nmi(perfctr_msr)) goto fail; - if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) + if (!reserve_evntsel_nmi(evntsel_msr)) goto fail1; - wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); - write_watchdog_counter("P4_IQ_COUNTER0"); + evntsel = P4_ESCR_EVENT_SELECT(0x3F) + | P4_ESCR_OS + | P4_ESCR_USR; + + cccr_val |= P4_CCCR_THRESHOLD(15) + | P4_CCCR_COMPLEMENT + | P4_CCCR_COMPARE + | P4_CCCR_REQUIRED; + + wrmsr(evntsel_msr, evntsel, 0); + wrmsr(cccr_msr, cccr_val, 0); + write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0"); apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = cccr_msr; + wd->check_bit = 1ULL<<39; return 1; fail1: - release_perfctr_nmi(nmi_perfctr_msr); + release_perfctr_nmi(perfctr_msr); fail: return 0; } -void setup_apic_nmi_watchdog (void) +static void stop_p4_watchdog(void) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) - return; - if (!setup_k7_watchdog()) - return; - break; - case X86_VENDOR_INTEL: - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 0xd) - return; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->cccr_msr, 0, 0); + wrmsr(wd->evntsel_msr, 0, 0); - if(!setup_p6_watchdog()) + release_evntsel_nmi(wd->evntsel_msr); + release_perfctr_nmi(wd->perfctr_msr); +} + +void setup_apic_nmi_watchdog (void *unused) +{ + /* only support LOCAL and IO APICs for now */ + if ((nmi_watchdog != NMI_LOCAL_APIC) && + (nmi_watchdog != NMI_IO_APIC)) + return; + + if (nmi_watchdog == NMI_LOCAL_APIC) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) return; - break; - case 15: - if (boot_cpu_data.x86_model > 0x4) + if (!setup_k7_watchdog()) return; + break; + case X86_VENDOR_INTEL: + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 0xd) + return; + + if (!setup_p6_watchdog()) + return; + break; + case 15: + if (boot_cpu_data.x86_model > 0x4) + return; - if (!setup_p4_watchdog()) + if (!setup_p4_watchdog()) + return; + break; + default: return; + } + break; + default: + return; + } + } + __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1; + atomic_inc(&nmi_active); +} + +static void stop_apic_nmi_watchdog(void *unused) +{ + /* only support LOCAL and IO APICs for now */ + if ((nmi_watchdog != NMI_LOCAL_APIC) && + (nmi_watchdog != NMI_IO_APIC)) + return; + + if (nmi_watchdog == NMI_LOCAL_APIC) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + stop_k7_watchdog(); + break; + case X86_VENDOR_INTEL: + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 0xd) + break; + stop_p6_watchdog(); + break; + case 15: + if (boot_cpu_data.x86_model > 0x4) + break; + stop_p4_watchdog(); + break; + } break; default: return; } - break; - default: - return; } - lapic_nmi_owner = LAPIC_NMI_WATCHDOG; - nmi_active = 1; + __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0; + atomic_dec(&nmi_active); } /* @@ -635,7 +781,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs) +void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { /* @@ -644,11 +790,21 @@ void nmi_watchdog_tick (struct pt_regs * regs) * smp_processor_id(). */ unsigned int sum; + int touched = 0; int cpu = smp_processor_id(); + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + u64 dummy; + + /* check for other users first */ + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) { + touched = 1; + } sum = per_cpu(irq_stat, cpu).apic_timer_irqs; - if (last_irq_sums[cpu] == sum) { + /* if the apic timer isn't firing, this cpu isn't doing much */ + if (!touched && last_irq_sums[cpu] == sum) { /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... @@ -663,26 +819,41 @@ void nmi_watchdog_tick (struct pt_regs * regs) last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; } - if (nmi_perfctr_msr) { - if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { - /* - * P4 quirks: - * - An overflown perfctr will assert its interrupt - * until the OVF flag in its CCCR is cleared. - * - LVTPC is masked on interrupt and must be - * unmasked by the LVTPC handler. - */ - wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); - apic_write(APIC_LVTPC, APIC_DM_NMI); - } - else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { - /* Only P6 based Pentium M need to re-unmask - * the apic vector but it doesn't hurt - * other P6 variant */ - apic_write(APIC_LVTPC, APIC_DM_NMI); + /* see if the nmi watchdog went off */ + if (wd->enabled) { + if (nmi_watchdog == NMI_LOCAL_APIC) { + rdmsrl(wd->perfctr_msr, dummy); + if (dummy & wd->check_bit){ + /* this wasn't a watchdog timer interrupt */ + goto done; + } + + /* only Intel P4 uses the cccr msr */ + if (wd->cccr_msr != 0) { + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt + * until the OVF flag in its CCCR is cleared. + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + rdmsrl(wd->cccr_msr, dummy); + dummy &= ~P4_CCCR_OVF; + wrmsrl(wd->cccr_msr, dummy); + apic_write(APIC_LVTPC, APIC_DM_NMI); + } + else if (wd->perfctr_msr == MSR_P6_PERFCTR0) { + /* Only P6 based Pentium M need to re-unmask + * the apic vector but it doesn't hurt + * other P6 variant */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + } + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL); } - write_watchdog_counter(NULL); } +done: + return; } #ifdef CONFIG_SYSCTL diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7e9edafffd8..3a07b2677e2 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -724,7 +724,7 @@ static void default_do_nmi(struct pt_regs * regs) * so it must be the NMI watchdog. */ if (nmi_watchdog) { - nmi_watchdog_tick(regs); + nmi_watchdog_tick(regs, reason); return; } #endif diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index 930a1127bb3..a33a73bb502 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -42,9 +42,7 @@ static void timer_stop(void) int __init op_nmi_timer_init(struct oprofile_operations * ops) { - extern int nmi_active; - - if (nmi_active <= 0) + if (atomic_read(&nmi_active) <= 0) return -ENODEV; ops->start = timer_start; -- cgit v1.2.3 From 3adbbcce9a49b900d4cc118cdccfdefa78bf1afb Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:26 +0200 Subject: [PATCH] x86: Cleanup NMI interrupt path This patch cleans up the NMI interrupt path. Instead of being gated by if the 'nmi callback' is set, the interrupt handler now calls everyone who is registered on the die_chain and additionally checks the nmi watchdog, reseting it if enabled. This allows more subsystems to hook into the NMI if they need to (without being block by set_nmi_callback). Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 16 +++++++++++++--- arch/i386/kernel/traps.c | 24 +++++++++++------------- 2 files changed, 24 insertions(+), 16 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index d8800434303..bd96ea4f294 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -781,7 +781,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) +int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { /* @@ -794,10 +794,12 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) int cpu = smp_processor_id(); struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 dummy; + int rc=0; /* check for other users first */ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { + rc = 1; touched = 1; } @@ -850,10 +852,18 @@ void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) } /* start the cycle over again */ write_watchdog_counter(wd->perfctr_msr, NULL); - } + rc = 1; + } else if (nmi_watchdog == NMI_IO_APIC) { + /* don't know how to accurately check for this. + * just assume it was a watchdog timer interrupt + * This matches the old behaviour. + */ + rc = 1; + } else + printk(KERN_WARNING "Unknown enabled NMI hardware?!\n"); } done: - return; + return rc; } #ifdef CONFIG_SYSCTL diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 3a07b2677e2..282f0bd40df 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -706,6 +706,13 @@ void die_nmi (struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } +static int dummy_nmi_callback(struct pt_regs * regs, int cpu) +{ + return 0; +} + +static nmi_callback_t nmi_callback = dummy_nmi_callback; + static void default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -723,12 +730,11 @@ static void default_do_nmi(struct pt_regs * regs) * Ok, so this is none of the documented NMI sources, * so it must be the NMI watchdog. */ - if (nmi_watchdog) { - nmi_watchdog_tick(regs, reason); + if (nmi_watchdog_tick(regs, reason)) return; - } #endif - unknown_nmi_error(reason, regs); + if (!rcu_dereference(nmi_callback)(regs, smp_processor_id())) + unknown_nmi_error(reason, regs); return; } if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -744,13 +750,6 @@ static void default_do_nmi(struct pt_regs * regs) reassert_nmi(); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) -{ - return 0; -} - -static nmi_callback_t nmi_callback = dummy_nmi_callback; - fastcall void do_nmi(struct pt_regs * regs, long error_code) { int cpu; @@ -761,8 +760,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) ++nmi_count(cpu); - if (!rcu_dereference(nmi_callback)(regs, cpu)) - default_do_nmi(regs); + default_do_nmi(regs); nmi_exit(); } -- cgit v1.2.3 From 2fbe7b25c8edaf2d10e6c1a4cc9f8afe714c4764 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] i386/x86-64: Remove un/set_nmi_callback and reserve/release_lapic_nmi functions Removes the un/set_nmi_callback and reserve/release_lapic_nmi functions as they are no longer needed. The various subsystems are modified to register with the die_notifier instead. Also includes compile fixes by Andrew Morton. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/crash.c | 20 ++++++++- arch/i386/kernel/nmi.c | 85 +++++--------------------------------- arch/i386/kernel/traps.c | 23 +---------- arch/i386/oprofile/nmi_int.c | 47 +++++++++++++-------- arch/i386/oprofile/nmi_timer_int.c | 33 +++++++++++---- 5 files changed, 86 insertions(+), 122 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 5b96f038367..736c76d6b31 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -22,6 +22,8 @@ #include #include #include +#include + #include @@ -93,9 +95,18 @@ static void crash_save_self(struct pt_regs *regs) #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static atomic_t waiting_for_crash_ipi; -static int crash_nmi_callback(struct pt_regs *regs, int cpu) +static int crash_nmi_callback(struct notifier_block *self, + unsigned long val, void *data) { + struct pt_regs *regs; struct pt_regs fixed_regs; + int cpu; + + if (val != DIE_NMI) + return NOTIFY_OK; + + regs = ((struct die_args *)data)->regs; + cpu = raw_smp_processor_id(); /* Don't do anything if this handler is invoked on crashing cpu. * Otherwise, system will completely hang. Crashing cpu can get @@ -125,13 +136,18 @@ static void smp_send_nmi_allbutself(void) send_IPI_allbutself(NMI_VECTOR); } +static struct notifier_block crash_nmi_nb = { + .notifier_call = crash_nmi_callback, +}; + static void nmi_shootdown_cpus(void) { unsigned long msecs; atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); /* Would it be better to replace the trap vector here? */ - set_nmi_callback(crash_nmi_callback); + if (register_die_notifier(&crash_nmi_nb)) + return; /* return what? */ /* Ensure the new callback function is set before sending * out the NMI */ diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index bd96ea4f294..acd3fdea2a2 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -42,20 +42,6 @@ static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); */ #define NMI_MAX_COUNTER_BITS 66 -/* - * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: - * - it may be reserved by some other driver, or not - * - when not reserved by some other driver, it may be used for - * the NMI watchdog, or not - * - * This is maintained separately from nmi_active because the NMI - * watchdog may also be driven from the I/O APIC timer. - */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); -static unsigned int lapic_nmi_owner; -#define LAPIC_NMI_WATCHDOG (1<<0) -#define LAPIC_NMI_RESERVED (1<<1) - /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled * <0: the lapic NMI watchdog has not been set up, and cannot @@ -325,33 +311,6 @@ static void enable_lapic_nmi_watchdog(void) touch_nmi_watchdog(); } -int reserve_lapic_nmi(void) -{ - unsigned int old_owner; - - spin_lock(&lapic_nmi_owner_lock); - old_owner = lapic_nmi_owner; - lapic_nmi_owner |= LAPIC_NMI_RESERVED; - spin_unlock(&lapic_nmi_owner_lock); - if (old_owner & LAPIC_NMI_RESERVED) - return -EBUSY; - if (old_owner & LAPIC_NMI_WATCHDOG) - disable_lapic_nmi_watchdog(); - return 0; -} - -void release_lapic_nmi(void) -{ - unsigned int new_owner; - - spin_lock(&lapic_nmi_owner_lock); - new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; - lapic_nmi_owner = new_owner; - spin_unlock(&lapic_nmi_owner_lock); - if (new_owner & LAPIC_NMI_WATCHDOG) - enable_lapic_nmi_watchdog(); -} - void disable_timer_nmi_watchdog(void) { BUG_ON(nmi_watchdog != NMI_IO_APIC); @@ -866,6 +825,15 @@ done: return rc; } +int do_nmi_callback(struct pt_regs * regs, int cpu) +{ +#ifdef CONFIG_SYSCTL + if (unknown_nmi_panic) + return unknown_nmi_panic_callback(regs, cpu); +#endif + return 0; +} + #ifdef CONFIG_SYSCTL static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) @@ -873,37 +841,8 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) unsigned char reason = get_nmi_reason(); char buf[64]; - if (!(reason & 0xc0)) { - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); - } - return 0; -} - -/* - * proc handler for /proc/sys/kernel/unknown_nmi_panic - */ -int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - old_state = unknown_nmi_panic; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!unknown_nmi_panic) - return 0; - - if (unknown_nmi_panic) { - if (reserve_lapic_nmi() < 0) { - unknown_nmi_panic = 0; - return -EBUSY; - } else { - set_nmi_callback(unknown_nmi_panic_callback); - } - } else { - release_lapic_nmi(); - unset_nmi_callback(); - } + sprintf(buf, "NMI received for unknown reason %02x\n", reason); + die_nmi(regs, buf); return 0; } @@ -917,7 +856,5 @@ EXPORT_SYMBOL(reserve_perfctr_nmi); EXPORT_SYMBOL(release_perfctr_nmi); EXPORT_SYMBOL(reserve_evntsel_nmi); EXPORT_SYMBOL(release_evntsel_nmi); -EXPORT_SYMBOL(reserve_lapic_nmi); -EXPORT_SYMBOL(release_lapic_nmi); EXPORT_SYMBOL(disable_timer_nmi_watchdog); EXPORT_SYMBOL(enable_timer_nmi_watchdog); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 282f0bd40df..7db664d0b25 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -706,13 +706,6 @@ void die_nmi (struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) -{ - return 0; -} - -static nmi_callback_t nmi_callback = dummy_nmi_callback; - static void default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -732,9 +725,10 @@ static void default_do_nmi(struct pt_regs * regs) */ if (nmi_watchdog_tick(regs, reason)) return; + if (!do_nmi_callback(regs, smp_processor_id())) #endif - if (!rcu_dereference(nmi_callback)(regs, smp_processor_id())) unknown_nmi_error(reason, regs); + return; } if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -765,19 +759,6 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) nmi_exit(); } -void set_nmi_callback(nmi_callback_t callback) -{ - vmalloc_sync_all(); - rcu_assign_pointer(nmi_callback, callback); -} -EXPORT_SYMBOL_GPL(set_nmi_callback); - -void unset_nmi_callback(void) -{ - nmi_callback = dummy_nmi_callback; -} -EXPORT_SYMBOL_GPL(unset_nmi_callback); - #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 8710ca081b1..b3610188bcf 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -17,14 +17,15 @@ #include #include #include +#include #include "op_counter.h" #include "op_x86_model.h" - + static struct op_x86_model_spec const * model; static struct op_msrs cpu_msrs[NR_CPUS]; static unsigned long saved_lvtpc[NR_CPUS]; - + static int nmi_start(void); static void nmi_stop(void); @@ -82,13 +83,24 @@ static void exit_driverfs(void) #define exit_driverfs() do { } while (0) #endif /* CONFIG_PM */ - -static int nmi_callback(struct pt_regs * regs, int cpu) +int profile_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - return model->check_ctrs(regs, &cpu_msrs[cpu]); + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + int cpu = smp_processor_id(); + + switch(val) { + case DIE_NMI: + if (model->check_ctrs(args->regs, &cpu_msrs[cpu])) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; } - - + static void nmi_cpu_save_registers(struct op_msrs * msrs) { unsigned int const nr_ctrs = model->num_counters; @@ -174,27 +186,29 @@ static void nmi_cpu_setup(void * dummy) apic_write(APIC_LVTPC, APIC_DM_NMI); } +static struct notifier_block profile_exceptions_nb = { + .notifier_call = profile_exceptions_notify, + .next = NULL, + .priority = 0 +}; static int nmi_setup(void) { + int err=0; + if (!allocate_msrs()) return -ENOMEM; - /* We walk a thin line between law and rape here. - * We need to be careful to install our NMI handler - * without actually triggering any NMIs as this will - * break the core code horrifically. - */ - if (reserve_lapic_nmi() < 0) { + if ((err = register_die_notifier(&profile_exceptions_nb))){ free_msrs(); - return -EBUSY; + return err; } + /* We need to serialize save and setup for HT because the subset * of msrs are distinct for save and setup operations */ on_each_cpu(nmi_save_registers, NULL, 0, 1); on_each_cpu(nmi_cpu_setup, NULL, 0, 1); - set_nmi_callback(nmi_callback); nmi_enabled = 1; return 0; } @@ -250,8 +264,7 @@ static void nmi_shutdown(void) { nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); - unset_nmi_callback(); - release_lapic_nmi(); + unregister_die_notifier(&profile_exceptions_nb); free_msrs(); } diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index a33a73bb502..93ca48f804a 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -17,32 +17,49 @@ #include #include #include +#include -static int nmi_timer_callback(struct pt_regs * regs, int cpu) +int profile_timer_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { - oprofile_add_sample(regs, 0); - return 1; + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch(val) { + case DIE_NMI: + oprofile_add_sample(args->regs, 0); + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; } +static struct notifier_block profile_timer_exceptions_nb = { + .notifier_call = profile_timer_exceptions_notify, + .next = NULL, + .priority = 0 +}; + static int timer_start(void) { - disable_timer_nmi_watchdog(); - set_nmi_callback(nmi_timer_callback); + if (register_die_notifier(&profile_timer_exceptions_nb)) + return 1; return 0; } static void timer_stop(void) { - enable_timer_nmi_watchdog(); - unset_nmi_callback(); + unregister_die_notifier(&profile_timer_exceptions_nb); synchronize_sched(); /* Allow already-started NMIs to complete. */ } int __init op_nmi_timer_init(struct oprofile_operations * ops) { - if (atomic_read(&nmi_active) <= 0) + if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) return -ENODEV; ops->start = timer_start; -- cgit v1.2.3 From 407984f1af259b31957c7c05075a454a751bb801 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Add abilty to enable/disable nmi watchdog with sysctl Adds a new /proc/sys/kernel/nmi call that will enable/disable the nmi watchdog. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index acd3fdea2a2..28065d0b71a 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -846,6 +846,58 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) return 0; } +/* + * proc handler for /proc/sys/kernel/nmi_watchdog + */ +int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int old_state; + + nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; + old_state = nmi_watchdog_enabled; + proc_dointvec(table, write, file, buffer, length, ppos); + if (!!old_state == !!nmi_watchdog_enabled) + return 0; + + if (atomic_read(&nmi_active) < 0) { + printk(KERN_WARNING "NMI watchdog is permanently disabled\n"); + return -EINVAL; + } + + if (nmi_watchdog == NMI_DEFAULT) { + if (nmi_known_cpu() > 0) + nmi_watchdog = NMI_LOCAL_APIC; + else + nmi_watchdog = NMI_IO_APIC; + } + + if (nmi_watchdog == NMI_LOCAL_APIC) + { + if (nmi_watchdog_enabled) + enable_lapic_nmi_watchdog(); + else + disable_lapic_nmi_watchdog(); + } else if (nmi_watchdog == NMI_IO_APIC) { + /* FIXME + * for some reason these functions don't work + */ + printk("Can not enable/disable NMI on IO APIC\n"); + return -EINVAL; +#if 0 + if (nmi_watchdog_enabled) + enable_timer_nmi_watchdog(); + else + disable_timer_nmi_watchdog(); +#endif + } else { + printk( KERN_WARNING + "NMI watchdog doesn't know what hardware to touch\n"); + return -EIO; + } + return 0; +} + #endif EXPORT_SYMBOL(nmi_active); -- cgit v1.2.3 From e33e89ab1a8d295de0500b697f4f31c3ceee9aa2 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Add abilty to enable/disable nmi watchdog from procfs (update) Adds a new /proc/sys/kernel/nmi_watchdog call that will enable/disable the nmi watchdog. By entering a non-zero value here, a user can enable the nmi watchdog to monitor the online cpus in the system. By entering a zero value here, a user can disable the nmi watchdog and free up a performance counter which could then be utilized by the oprofile subsystem, otherwise oprofile may be short a counter when in use. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/nmi.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 28065d0b71a..6241e4448ca 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -847,7 +847,7 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) } /* - * proc handler for /proc/sys/kernel/nmi_watchdog + * proc handler for /proc/sys/kernel/nmi */ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) @@ -861,8 +861,8 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, return 0; if (atomic_read(&nmi_active) < 0) { - printk(KERN_WARNING "NMI watchdog is permanently disabled\n"); - return -EINVAL; + printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); + return -EIO; } if (nmi_watchdog == NMI_DEFAULT) { @@ -872,24 +872,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, nmi_watchdog = NMI_IO_APIC; } - if (nmi_watchdog == NMI_LOCAL_APIC) - { + if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog_enabled) enable_lapic_nmi_watchdog(); else disable_lapic_nmi_watchdog(); - } else if (nmi_watchdog == NMI_IO_APIC) { - /* FIXME - * for some reason these functions don't work - */ - printk("Can not enable/disable NMI on IO APIC\n"); - return -EINVAL; -#if 0 - if (nmi_watchdog_enabled) - enable_timer_nmi_watchdog(); - else - disable_timer_nmi_watchdog(); -#endif } else { printk( KERN_WARNING "NMI watchdog doesn't know what hardware to touch\n"); -- cgit v1.2.3 From 8da5adda91df3d2fcc5300e68da491694c9af019 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Allow users to force a panic on NMI To quote Alan Cox: The default Linux behaviour on an NMI of either memory or unknown is to continue operation. For many environments such as scientific computing it is preferable that the box is taken out and the error dealt with than an uncorrected parity/ECC error get propogated. A small number of systems do generate NMI's for bizarre random reasons such as power management so the default is unchanged. In other respects the new proc/sys entry works like the existing panic controls already in that directory. This is separate to the edac support - EDAC allows supported chipsets to handle ECC errors well, this change allows unsupported cases to at least panic rather than cause problems further down the line. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7db664d0b25..2f6cb827648 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -635,6 +635,8 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs) "to continue\n"); printk(KERN_EMERG "You probably have a hardware problem with your RAM " "chips\n"); + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); /* Clear and disable the memory parity error line. */ clear_mem_error(reason); @@ -670,6 +672,10 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) reason, smp_processor_id()); printk("Dazed and confused, but trying to continue\n"); printk("Do you have a strange power saving mode enabled?\n"); + + if (panic_on_unrecovered_nmi) + panic("NMI: Not continuing"); + } static DEFINE_SPINLOCK(nmi_print_lock); -- cgit v1.2.3 From c41c5cd3b20a2d81c30498f13b1527847a8fdf69 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: x86 clean up nmi panic messages Clean up some of the output messages on the nmi error paths to make more sense when they are displayed. This is mainly a cosmetic fix and shouldn't impact any normal code path. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 2f6cb827648..3c85c89f68d 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -631,13 +631,15 @@ gp_in_kernel: static void mem_parity_error(unsigned char reason, struct pt_regs * regs) { - printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " - "to continue\n"); + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " + "CPU %d.\n", reason, smp_processor_id()); printk(KERN_EMERG "You probably have a hardware problem with your RAM " "chips\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + /* Clear and disable the memory parity error line. */ clear_mem_error(reason); } @@ -668,14 +670,13 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) return; } #endif - printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); - printk("Dazed and confused, but trying to continue\n"); - printk("Do you have a strange power saving mode enabled?\n"); - + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " + "CPU %d.\n", reason, smp_processor_id()); + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); + printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); } static DEFINE_SPINLOCK(nmi_print_lock); -- cgit v1.2.3 From 4038f901cf102a40715b900984ed7540a9fa637f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] i386/x86-64: Fix NMI watchdog suspend/resume Making NMI suspend/resume work with SMP. We use CPU hotplug to offline APs in SMP suspend/resume. Only BSP executes sysdev's .suspend/.resume method. APs should follow CPU hotplug code path. And: +From: Don Zickus Makes the start/stop paths of nmi watchdog more robust to handle the suspend/resume cases more gracefully. AK: I merged the two patches together Signed-off-by: Shaohua Li Signed-off-by: Andi Kleen Cc: Don Zickus Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/nmi.c | 33 ++++++++++++++++++++++++++------- arch/i386/kernel/smpboot.c | 3 ++- 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 6241e4448ca..8e4ed930ce6 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -63,7 +63,6 @@ struct nmi_watchdog_ctlblk { static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); /* local prototypes */ -static void stop_apic_nmi_watchdog(void *unused); static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); extern void show_registers(struct pt_regs *regs); @@ -341,15 +340,20 @@ static int nmi_pm_active; /* nmi_active before suspend */ static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) { + /* only CPU0 goes here, other CPUs should be offline */ nmi_pm_active = atomic_read(&nmi_active); - disable_lapic_nmi_watchdog(); + stop_apic_nmi_watchdog(NULL); + BUG_ON(atomic_read(&nmi_active) != 0); return 0; } static int lapic_nmi_resume(struct sys_device *dev) { - if (nmi_pm_active > 0) - enable_lapic_nmi_watchdog(); + /* only CPU0 goes here, other CPUs should be offline */ + if (nmi_pm_active > 0) { + setup_apic_nmi_watchdog(NULL); + touch_nmi_watchdog(); + } return 0; } @@ -626,11 +630,21 @@ static void stop_p4_watchdog(void) void setup_apic_nmi_watchdog (void *unused) { + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + /* only support LOCAL and IO APICs for now */ if ((nmi_watchdog != NMI_LOCAL_APIC) && (nmi_watchdog != NMI_IO_APIC)) return; + if (wd->enabled == 1) + return; + + /* cheap hack to support suspend/resume */ + /* if cpu0 is not active neither should the other cpus */ + if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) + return; + if (nmi_watchdog == NMI_LOCAL_APIC) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: @@ -663,17 +677,22 @@ void setup_apic_nmi_watchdog (void *unused) return; } } - __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1; + wd->enabled = 1; atomic_inc(&nmi_active); } -static void stop_apic_nmi_watchdog(void *unused) +void stop_apic_nmi_watchdog(void *unused) { + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + /* only support LOCAL and IO APICs for now */ if ((nmi_watchdog != NMI_LOCAL_APIC) && (nmi_watchdog != NMI_IO_APIC)) return; + if (wd->enabled == 0) + return; + if (nmi_watchdog == NMI_LOCAL_APIC) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: @@ -697,7 +716,7 @@ static void stop_apic_nmi_watchdog(void *unused) return; } } - __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0; + wd->enabled = 0; atomic_dec(&nmi_active); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index efe07990e7f..9367af76ce3 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1376,7 +1376,8 @@ int __cpu_disable(void) */ if (cpu == 0) return -EBUSY; - + if (nmi_watchdog == NMI_LOCAL_APIC) + stop_apic_nmi_watchdog(NULL); clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ local_irq_enable(); -- cgit v1.2.3 From c7c19f8e5e564fb1a354a065befc8a88a9bb08fd Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] i386: make functions static This patch makes the following needlessly global functions static: - nmi_int.c: profile_exceptions_notify() - nmi_timer_int.c: profile_timer_exceptions_notify() Signed-off-by: Adrian Bunk Signed-off-by: Andi Kleen --- arch/i386/oprofile/nmi_int.c | 4 ++-- arch/i386/oprofile/nmi_timer_int.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index b3610188bcf..3700eef7874 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -83,8 +83,8 @@ static void exit_driverfs(void) #define exit_driverfs() do { } while (0) #endif /* CONFIG_PM */ -int profile_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +static int profile_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index 93ca48f804a..abf0ba52a63 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -19,8 +19,8 @@ #include #include -int profile_timer_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +static int profile_timer_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; -- cgit v1.2.3 From 260d6790b6a2a0a048b7f96d154c2b49f1e6515a Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] i386: Kdump i386 nmi event notification fix After a crash we should wait for NMI IPI event and not for external NMI or NMI watchdog tick. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen Cc: Don Zickus Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/crash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 736c76d6b31..67d297dc100 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -102,7 +102,7 @@ static int crash_nmi_callback(struct notifier_block *self, struct pt_regs fixed_regs; int cpu; - if (val != DIE_NMI) + if (val != DIE_NMI_IPI) return NOTIFY_OK; regs = ((struct die_args *)data)->regs; @@ -113,7 +113,7 @@ static int crash_nmi_callback(struct notifier_block *self, * an NMI if system was initially booted with nmi_watchdog parameter. */ if (cpu == crashing_cpu) - return 1; + return NOTIFY_STOP; local_irq_disable(); if (!user_mode_vm(regs)) { -- cgit v1.2.3 From 1de84979dfc527c422abf63f27beabe43892989b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] i386: Enable NMI watchdog by default I've had good experiences with having this on by default on x86-64. It turns nasty hangs into easier to debug oopses. Enable the local APIC wdog by default for systems newer than 2004. This comes from a strange compromise: according to arjan the reason it was off by default was some old IBM systems that corrupted registered when NMI happened in SMI. Can't remember more specific, but >= 2004 should avoid these. It's probably overly broad because most older systems should be ok (and the really old systems won't be supported by the local apic watchdog anyways) Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 8e4ed930ce6..6e5085d5d2f 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -204,6 +205,14 @@ static int __init check_nmi_watchdog(void) unsigned int *prev_nmi_count; int cpu; + /* Enable NMI watchdog for newer systems. + Actually it should be safe for most systems before 2004 too except + for some IBM systems that corrupt registers when NMI happens + during SMM. Unfortunately we don't have more exact information + on these and use this coarse check. */ + if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004) + nmi_watchdog = NMI_LOCAL_APIC; + if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) return 0; -- cgit v1.2.3 From 248dcb2ffffe8f3e4a369556a68988788c208111 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: i386/x86-64 Add nmi watchdog support for new Intel CPUs AK: This redoes the changes I temporarily reverted. Intel now has support for Architectural Performance Monitoring Counters ( Refer to IA-32 Intel Architecture Software Developer's Manual http://www.intel.com/design/pentium4/manuals/253669.htm ). This feature is present starting from Intel Core Duo and Intel Core Solo processors. What this means is, the performance monitoring counters and some performance monitoring events are now defined in an architectural way (using cpuid). And there will be no need to check for family/model etc for these architectural events. Below is the patch to use this performance counters in nmi watchdog driver. Patch handles both i386 and x86-64 kernels. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 121 insertions(+), 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 6e5085d5d2f..7b9a053effa 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "mach_traps.h" @@ -77,6 +78,9 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) case X86_VENDOR_AMD: return (msr - MSR_K7_PERFCTR0); case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_PERFCTR0); + switch (boot_cpu_data.x86) { case 6: return (msr - MSR_P6_PERFCTR0); @@ -95,6 +99,9 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) case X86_VENDOR_AMD: return (msr - MSR_K7_EVNTSEL0); case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_EVENTSEL0); + switch (boot_cpu_data.x86) { case 6: return (msr - MSR_P6_EVNTSEL0); @@ -174,7 +181,10 @@ static __cpuinit inline int nmi_known_cpu(void) case X86_VENDOR_AMD: return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); case X86_VENDOR_INTEL: - return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return 1; + else + return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); } return 0; } @@ -261,8 +271,24 @@ static int __init check_nmi_watchdog(void) /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) + if (nmi_watchdog == NMI_LOCAL_APIC) { + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + nmi_hz = 1; + /* + * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter + * are writable, with higher bits sign extending from bit 31. + * So, we can only program the counter with 31 bit values and + * 32nd bit should be 1, for 33.. to be 1. + * Find the appropriate nmi_hz + */ + if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && + ((u64)cpu_khz * 1000) > 0x7fffffffULL) { + u64 count = (u64)cpu_khz * 1000; + do_div(count, 0x7fffffffUL); + nmi_hz = count + 1; + } + } kfree(prev_nmi_count); return 0; @@ -637,6 +663,85 @@ static void stop_p4_watchdog(void) release_perfctr_nmi(wd->perfctr_msr); } +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + +static int setup_intel_arch_watchdog(void) +{ + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebx indicates event present. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + goto fail; + + perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; + evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; + + if (!reserve_perfctr_nmi(perfctr_msr)) + goto fail; + + if (!reserve_evntsel_nmi(evntsel_msr)) + goto fail1; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd->check_bit = 1ULL << (eax.split.bit_width - 1); + return 1; +fail1: + release_perfctr_nmi(perfctr_msr); +fail: + return 0; +} + +static void stop_intel_arch_watchdog(void) +{ + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebx indicates event present. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return; + + wrmsr(wd->evntsel_msr, 0, 0); + release_evntsel_nmi(wd->evntsel_msr); + release_perfctr_nmi(wd->perfctr_msr); +} + void setup_apic_nmi_watchdog (void *unused) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); @@ -663,6 +768,11 @@ void setup_apic_nmi_watchdog (void *unused) return; break; case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + if (!setup_intel_arch_watchdog()) + return; + break; + } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) @@ -708,6 +818,10 @@ void stop_apic_nmi_watchdog(void *unused) stop_k7_watchdog(); break; case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + stop_intel_arch_watchdog(); + break; + } switch (boot_cpu_data.x86) { case 6: if (boot_cpu_data.x86_model > 0xd) @@ -831,10 +945,12 @@ int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) wrmsrl(wd->cccr_msr, dummy); apic_write(APIC_LVTPC, APIC_DM_NMI); } - else if (wd->perfctr_msr == MSR_P6_PERFCTR0) { - /* Only P6 based Pentium M need to re-unmask + else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || + wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { + /* P6 based Pentium M need to re-unmask * the apic vector but it doesn't hurt - * other P6 variant */ + * other P6 variant. + * ArchPerfom/Core Duo also needs this */ apic_write(APIC_LVTPC, APIC_DM_NMI); } /* start the cycle over again */ -- cgit v1.2.3 From 3cfc348bf90ffaa777c188652aa297f04eb94de8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:28 +0200 Subject: [PATCH] x86: Add portable getcpu call For NUMA optimization and some other algorithms it is useful to have a fast to get the current CPU and node numbers in user space. x86-64 added a fast way to do this in a vsyscall. This adds a generic syscall for other architectures to make it a generic portable facility. I expect some of them will also implement it as a faster vsyscall. The cache is an optimization for the x86-64 vsyscall optimization. Since what the syscall returns is an approximation anyways and user space often wants very fast results it can be cached for some time. The norma methods to get this information in user space are relatively slow The vsyscall is in a better position to manage the cache because it has direct access to a fast time stamp (jiffies). For the generic syscall optimization it doesn't help much, but enforce a valid argument to keep programs portable I only added an i386 syscall entry for now. Other architectures can follow as needed. AK: Also added some cleanups from Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/syscall_table.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index dd63d477539..7e639f78b0b 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -317,3 +317,4 @@ ENTRY(sys_call_table) .long sys_tee /* 315 */ .long sys_vmsplice .long sys_move_pages + .long sys_getcpu -- cgit v1.2.3 From c16b63e09d9d03158e0a92e961234e94c4862620 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:28 +0200 Subject: [PATCH] i386/x86-64: Don't randomize stack top when no randomization personality is set Based on patch from Frank van Maarseveen , but extended. Signed-off-by: Andi Kleen --- arch/i386/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 8657c739656..b741c3e1a5e 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -905,7 +906,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) unsigned long arch_align_stack(unsigned long sp) { - if (randomize_va_space) + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) sp -= get_random_int() % 8192; return sp & ~0xf; } -- cgit v1.2.3 From 0cb91a2293648507886563ccb91979cfc94d6a4b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:28 +0200 Subject: [PATCH] i386: Account spinlocks to the caller during profiling for !FP kernels This ports the algorithm from x86-64 (with improvements) to i386. Previously this only worked for frame pointer enabled kernels. But spinlocks have a very simple stack frame that can be manually analyzed. Do this. Signed-off-by: Andi Kleen --- arch/i386/kernel/time.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index edd00f6cee3..5af802ef00b 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -130,18 +130,33 @@ static int set_rtc_mmss(unsigned long nowtime) int timer_ack; -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - if (!user_mode_vm(regs) && in_lock_functions(pc)) +#ifdef CONFIG_SMP + if (!user_mode_vm(regs) && in_lock_functions(pc)) { +#ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->ebp + 4); - +#else + unsigned long *sp; + if ((regs->xcs & 3) == 0) + sp = (unsigned long *)®s->esp; + else + sp = (unsigned long *)regs->esp; + /* Return address is either directly at stack pointer + or above a saved eflags. Eflags has bits 22-31 zero, + kernel addresses don't. */ + if (sp[0] >> 22) + return sp[0]; + if (sp[1] >> 22) + return sp[1]; +#endif + } +#endif return pc; } EXPORT_SYMBOL(profile_pc); -#endif /* * This is the same as the above, except we _also_ save the current -- cgit v1.2.3 From ecaf45ee5ce60afe7cc46e91d82c1b0cbda09387 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:29 +0200 Subject: [PATCH] i386: Redo semaphore and rwlock assembly helpers - Move them to a pure assembly file. Previously they were in a C file that only consisted of inline assembly. Doing it in pure assembler is much nicer. - Add a frame.i include with FRAME/ENDFRAME macros to easily add frame pointers to assembly functions - Add dwarf2 annotation to them so that the new dwarf2 unwinder doesn't get stuck on them - Random cleanups Includes feedback from Jan Beulich and a UML build fix from Andrew Morton. Cc: jbeulich@novell.com Cc: jdike@addtoit.com Signed-off-by: Andi Kleen --- arch/i386/kernel/Makefile | 2 +- arch/i386/kernel/semaphore.c | 134 ------------------------------------- arch/i386/lib/Makefile | 2 +- arch/i386/lib/semaphore.S | 154 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 156 insertions(+), 136 deletions(-) delete mode 100644 arch/i386/kernel/semaphore.c create mode 100644 arch/i386/lib/semaphore.S (limited to 'arch/i386') diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 5427a842e84..dab497472de 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -4,7 +4,7 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ +obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o bootflag.o \ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c deleted file mode 100644 index 98352c374c7..00000000000 --- a/arch/i386/kernel/semaphore.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * i386 semaphore implementation. - * - * (C) Copyright 1999 Linus Torvalds - * - * Portions Copyright 1999 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * rw semaphores implemented November 1999 by Benjamin LaHaise - */ -#include - -/* - * The semaphore operations have a special calling sequence that - * allow us to do a simpler in-line version of them. These routines - * need to convert that sequence back into the C sequence when - * there is contention on the semaphore. - * - * %eax contains the semaphore pointer on entry. Save the C-clobbered - * registers (%eax, %edx and %ecx) except %eax whish is either a return - * value or just clobbered.. - */ -asm( -".section .sched.text\n" -".align 4\n" -".globl __down_failed\n" -"__down_failed:\n\t" -#if defined(CONFIG_FRAME_POINTER) - "pushl %ebp\n\t" - "movl %esp,%ebp\n\t" -#endif - "pushl %edx\n\t" - "pushl %ecx\n\t" - "call __down\n\t" - "popl %ecx\n\t" - "popl %edx\n\t" -#if defined(CONFIG_FRAME_POINTER) - "movl %ebp,%esp\n\t" - "popl %ebp\n\t" -#endif - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __down_failed_interruptible\n" -"__down_failed_interruptible:\n\t" -#if defined(CONFIG_FRAME_POINTER) - "pushl %ebp\n\t" - "movl %esp,%ebp\n\t" -#endif - "pushl %edx\n\t" - "pushl %ecx\n\t" - "call __down_interruptible\n\t" - "popl %ecx\n\t" - "popl %edx\n\t" -#if defined(CONFIG_FRAME_POINTER) - "movl %ebp,%esp\n\t" - "popl %ebp\n\t" -#endif - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __down_failed_trylock\n" -"__down_failed_trylock:\n\t" -#if defined(CONFIG_FRAME_POINTER) - "pushl %ebp\n\t" - "movl %esp,%ebp\n\t" -#endif - "pushl %edx\n\t" - "pushl %ecx\n\t" - "call __down_trylock\n\t" - "popl %ecx\n\t" - "popl %edx\n\t" -#if defined(CONFIG_FRAME_POINTER) - "movl %ebp,%esp\n\t" - "popl %ebp\n\t" -#endif - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __up_wakeup\n" -"__up_wakeup:\n\t" - "pushl %edx\n\t" - "pushl %ecx\n\t" - "call __up\n\t" - "popl %ecx\n\t" - "popl %edx\n\t" - "ret" -); - -/* - * rw spinlock fallbacks - */ -#if defined(CONFIG_SMP) -asm( -".section .sched.text\n" -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n\t" - LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n\t" - "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jne 1b\n\t" - LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jnz __write_lock_failed\n\t" - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n\t" - LOCK_PREFIX "incl (%eax)\n" -"1: rep; nop\n\t" - "cmpl $1,(%eax)\n\t" - "js 1b\n\t" - LOCK_PREFIX "decl (%eax)\n\t" - "js __read_lock_failed\n\t" - "ret" -); -#endif diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile index 914933e9ec3..d86a548b8d5 100644 --- a/arch/i386/lib/Makefile +++ b/arch/i386/lib/Makefile @@ -4,6 +4,6 @@ lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \ - bitops.o + bitops.o semaphore.o lib-$(CONFIG_X86_USE_3DNOW) += mmx.o diff --git a/arch/i386/lib/semaphore.S b/arch/i386/lib/semaphore.S new file mode 100644 index 00000000000..e16ff169639 --- /dev/null +++ b/arch/i386/lib/semaphore.S @@ -0,0 +1,154 @@ +/* + * i386 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * + * Portions Copyright 1999 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * rw semaphores implemented November 1999 by Benjamin LaHaise + */ + +#include +#include +#include +#include +#include +#include + +/* + * The semaphore operations have a special calling sequence that + * allow us to do a simpler in-line version of them. These routines + * need to convert that sequence back into the C sequence when + * there is contention on the semaphore. + * + * %eax contains the semaphore pointer on entry. Save the C-clobbered + * registers (%eax, %edx and %ecx) except %eax whish is either a return + * value or just clobbered.. + */ + .section .sched.text +ENTRY(__down_failed) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed) + +ENTRY(__down_failed_interruptible) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down_interruptible + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed_interruptible) + +ENTRY(__down_failed_trylock) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down_trylock + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed_trylock) + +ENTRY(__up_wakeup) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __up + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__up_wakeup) + +/* + * rw spinlock fallbacks + */ +#ifdef CONFIG_SMP +ENTRY(__write_lock_failed) + CFI_STARTPROC simple + FRAME +2: LOCK_PREFIX + addl $ RW_LOCK_BIAS,(%eax) +1: rep; nop + cmpl $ RW_LOCK_BIAS,(%eax) + jne 1b + LOCK_PREFIX + subl $ RW_LOCK_BIAS,(%eax) + jnz 2b + ENDFRAME + ret + CFI_ENDPROC + END(__write_lock_failed) + +ENTRY(__read_lock_failed) + CFI_STARTPROC + FRAME +2: LOCK_PREFIX + incl (%eax) +1: rep; nop + cmpl $1,(%eax) + js 1b + LOCK_PREFIX + decl (%eax) + js 2b + ENDFRAME + ret + CFI_ENDPROC + END(__read_lock_failed) + +#endif -- cgit v1.2.3 From c1a58b42b428e717afbbb298356e041cea54ad17 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:30 +0200 Subject: [PATCH] i386/x86-64: Remove obsolete sanity check in mptable parsing It apparently has never triggered in many years. Signed-off-by: Andi Kleen --- arch/i386/kernel/mpparse.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 82756968856..a2b126fe9a5 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -294,19 +294,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); - /* - * Well it seems all SMP boards in existence - * use ExtINT/LVT1 == LINT0 and - * NMI/LVT2 == LINT1 - the following check - * will show us if this assumptions is false. - * Until then we do not have to add baggage. - */ - if ((m->mpc_irqtype == mp_ExtINT) && - (m->mpc_destapiclint != 0)) - BUG(); - if ((m->mpc_irqtype == mp_NMI) && - (m->mpc_destapiclint != 1)) - BUG(); } #ifdef CONFIG_X86_NUMAQ -- cgit v1.2.3 From cf4c6a2f27f5db810b69dcb1da7f194489e8ff88 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:30 +0200 Subject: [PATCH] i386: Factor out common io apic routing entry access The IO APIC code had lots of duplicated code to read/write 64bit routing entries into the IO-APIC. Factor this out int common read/write functions In a few cases the IO APIC lock is taken more often now, but this isn't a problem because it's all initialization/shutdown only slow path code. Similar to earlier x86-64 patch. Includes a fix by Jiri Slaby for a mistake that broke resume Signed-off-by: Andi Kleen --- arch/i386/kernel/io_apic.c | 100 +++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 57 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 4eacd52eeb7..b713f27d7e8 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -94,6 +94,34 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #define vector_to_irq(vector) (vector) #endif + +union entry_union { + struct { u32 w1, w2; }; + struct IO_APIC_route_entry entry; +}; + +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) +{ + union entry_union eu; + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + return eu.entry; +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + union entry_union eu; + eu.entry = e; + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x10 + 2*pin, eu.w1); + io_apic_write(apic, 0x11 + 2*pin, eu.w2); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -201,13 +229,9 @@ static void unmask_IO_APIC_irq (unsigned int irq) static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; - unsigned long flags; /* Check delivery_mode to be sure we're not clearing an SMI pin */ - spin_lock_irqsave(&ioapic_lock, flags); - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == dest_SMI) return; @@ -216,10 +240,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) */ memset(&entry, 0, sizeof(entry)); entry.mask = 1; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry); } static void clear_IO_APIC (void) @@ -1284,9 +1305,8 @@ static void __init setup_IO_APIC_irqs(void) if (!apic && (irq < 16)) disable_8259A_irq(irq); } + ioapic_write_entry(apic, pin, entry); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1302,7 +1322,6 @@ static void __init setup_IO_APIC_irqs(void) static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; - unsigned long flags; memset(&entry,0,sizeof(entry)); @@ -1332,10 +1351,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in /* * Add it to the IO-APIC irq-routing table: */ - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry); enable_8259A_irq(0); } @@ -1445,10 +1461,7 @@ void __init print_IO_APIC(void) for (i = 0; i <= reg_01.bits.entries; i++) { struct IO_APIC_route_entry entry; - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry = ioapic_read_entry(apic, i); printk(KERN_DEBUG " %02x %03X %02X ", i, @@ -1667,10 +1680,7 @@ static void __init enable_IO_APIC(void) /* See if any of the pins is in ExtINT mode */ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { struct IO_APIC_route_entry entry; - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry = ioapic_read_entry(apic, pin); /* If the interrupt line is enabled and in ExtInt mode @@ -1727,7 +1737,6 @@ void disable_IO_APIC(void) */ if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; - unsigned long flags; memset(&entry, 0, sizeof(entry)); entry.mask = 0; /* Enabled */ @@ -1744,12 +1753,7 @@ void disable_IO_APIC(void) /* * Add it to the IO-APIC irq-routing table: */ - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, - *(((int *)&entry)+1)); - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, - *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } disconnect_bsp_APIC(ioapic_i8259.pin != -1); } @@ -2214,17 +2218,13 @@ static inline void unlock_ExtINT_logic(void) int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; - unsigned long flags; pin = find_isa_irq_pin(8, mp_INT); apic = find_isa_irq_apic(8, mp_INT); if (pin == -1) return; - spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + entry0 = ioapic_read_entry(apic, pin); clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2237,10 +2237,7 @@ static inline void unlock_ExtINT_logic(void) entry1.trigger = 0; entry1.vector = 0; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry1); save_control = CMOS_READ(RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); @@ -2259,10 +2256,7 @@ static inline void unlock_ExtINT_logic(void) CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); clear_IO_APIC_pin(apic, pin); - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + ioapic_write_entry(apic, pin, entry0); } int timer_uses_ioapic_pin_0; @@ -2462,17 +2456,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { struct IO_APIC_route_entry *entry; struct sysfs_ioapic_data *data; - unsigned long flags; int i; data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); - } - spin_unlock_irqrestore(&ioapic_lock, flags); + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) + entry[i] = ioapic_read_entry(dev->id, i); return 0; } @@ -2494,11 +2483,9 @@ static int ioapic_resume(struct sys_device *dev) reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; io_apic_write(dev->id, 0, reg_00.raw); } - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); - } spin_unlock_irqrestore(&ioapic_lock, flags); + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) + ioapic_write_entry(dev->id, i, entry[i]); return 0; } @@ -2695,9 +2682,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a if (!ioapic && (irq < 16)) disable_8259A_irq(irq); + ioapic_write_entry(ioapic, pin, entry); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); -- cgit v1.2.3 From 606bd58de6542e847c51b1b6d83a4cd70a632fe7 Mon Sep 17 00:00:00 2001 From: Diego Calleja Date: Tue, 26 Sep 2006 10:52:30 +0200 Subject: [PATCH] x86: AUX_DEVICE_INFO is one byte long, use 'movb' Bugzilla #6552 says: "In arch/i386/boot/setup.S, movw is used instead of movb for PS/2 mouse information, although it is unsigned char. This does not harm, because the jmp instruction overwritten by movw is used before executing movw, and never be used again" I've no idea if this is a real bug or how it gets fixed, so I'm submitting it for review instead of letting it die of boredom in bugzilla. Aditionally to i386, I've changed x86-64, which mirrors the same code. Credits to Yoshinori K. Okuji, who found the problem and suggested a fix. Signed-off-by: Diego Calleja Signed-off-by: Andi Kleen --- arch/i386/boot/setup.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index d2b684cd620..3aec4538a11 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -494,12 +494,12 @@ no_voyager: movw %cs, %ax # aka SETUPSEG subw $DELTA_INITSEG, %ax # aka INITSEG movw %ax, %ds - movw $0, (0x1ff) # default is no pointing device + movb $0, (0x1ff) # default is no pointing device int $0x11 # int 0x11: equipment list testb $0x04, %al # check if mouse installed jz no_psmouse - movw $0xAA, (0x1ff) # device present + movb $0xAA, (0x1ff) # device present no_psmouse: #if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) -- cgit v1.2.3 From 19f03ffecdb599c1f64113b6dda0a1f143d2bab9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:30 +0200 Subject: [PATCH] i386: Clean up code style in mpparse.c ACPI code Remove some unlinuxy ways to write function parameter definitions. Remove some stray "return;"s No functional change. Cc: len.brown@intel.com Signed-off-by: Andi Kleen --- arch/i386/kernel/mpparse.c | 52 +++++++++++++++------------------------------- 1 file changed, 17 insertions(+), 35 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index a2b126fe9a5..bdaa75a5bc1 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -810,8 +810,7 @@ int es7000_plat; #ifdef CONFIG_ACPI -void __init mp_register_lapic_address ( - u64 address) +void __init mp_register_lapic_address(u64 address) { mp_lapic_addr = (unsigned long) address; @@ -823,13 +822,10 @@ void __init mp_register_lapic_address ( Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); } - -void __devinit mp_register_lapic ( - u8 id, - u8 enabled) +void __devinit mp_register_lapic (u8 id, u8 enabled) { struct mpc_config_processor processor; - int boot_cpu = 0; + int boot_cpu = 0; if (MAX_APICS - id <= 0) { printk(KERN_WARNING "Processor #%d invalid (max %d)\n", @@ -866,11 +862,9 @@ static struct mp_ioapic_routing { u32 pin_programmed[4]; } mp_ioapic_routing[MAX_IO_APICS]; - -static int mp_find_ioapic ( - int gsi) +static int mp_find_ioapic (int gsi) { - int i = 0; + int i = 0; /* Find the IOAPIC that manages this GSI. */ for (i = 0; i < nr_ioapics; i++) { @@ -883,15 +877,11 @@ static int mp_find_ioapic ( return -1; } - -void __init mp_register_ioapic ( - u8 id, - u32 address, - u32 gsi_base) +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) { - int idx = 0; - int tmpid; + int idx = 0; + int tmpid; if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " @@ -937,16 +927,10 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); - - return; } - -void __init mp_override_legacy_irq ( - u8 bus_irq, - u8 polarity, - u8 trigger, - u32 gsi) +void __init +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { struct mpc_config_intsrc intsrc; int ioapic = -1; @@ -984,15 +968,13 @@ void __init mp_override_legacy_irq ( mp_irqs[mp_irq_entries] = intsrc; if (++mp_irq_entries == MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!\n"); - - return; } void __init mp_config_acpi_legacy_irqs (void) { struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; + int i = 0; + int ioapic = -1; /* * Fabricate the legacy ISA bus (bus #31). @@ -1061,12 +1043,12 @@ void __init mp_config_acpi_legacy_irqs (void) #define MAX_GSI_NUM 4096 -int mp_register_gsi (u32 gsi, int triggering, int polarity) +int mp_register_gsi(u32 gsi, int triggering, int polarity) { - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; - static int pci_irq = 16; + int ioapic = -1; + int ioapic_pin = 0; + int idx, bit = 0; + static int pci_irq = 16; /* * Mapping between Global System Interrups, which * represent all possible interrupts, and IRQs -- cgit v1.2.3 From e2414910f212c52d9d7c64c99a22863488ac5b48 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:30 +0200 Subject: [PATCH] x86: Detect CFI support in the assembler at runtime ... instead of using a CONFIG option. The config option still controls if the resulting executable actually has unwind information. This is useful to prevent compilation errors when users select CONFIG_STACK_UNWIND on old binutils and also allows to use CFI in the future for non kernel debugging applications. Cc: jbeulich@novell.com Cc: sam@ravnborg.org Signed-off-by: Andi Kleen --- arch/i386/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 3e4adb1e224..508cdbeb3a0 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -46,6 +46,10 @@ cflags-y += -ffreestanding # a lot more stack due to the lack of sharing of stacklots: CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) +# do binutils support CFI? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) + CFLAGS += $(cflags-y) # Default subarch .c files -- cgit v1.2.3 From ba9c231f7499ff6918c069c72ff5fd836c76b963 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Sep 2006 10:52:31 +0200 Subject: [PATCH] i386: initialize end-of-memory variables as early as possible Move initialization of all memory end variables to as early as possible, so that dependent code doesn't need to check whether these variables have already been set. Change the range check in kunmap_atomic to actually make use of this so that the no-mapping-estabished path (under CONFIG_DEBUG_HIGHMEM) gets used only when the address is inside the lowmem area (and BUG() otherwise). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/kernel/setup.c | 8 ++++++++ arch/i386/mm/discontig.c | 5 +++++ arch/i386/mm/highmem.c | 2 +- arch/i386/mm/init.c | 20 -------------------- 4 files changed, 14 insertions(+), 21 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index f1682206d30..71a540362b7 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1170,6 +1170,14 @@ static unsigned long __init setup_memory(void) } printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; +#endif +#ifdef CONFIG_FLATMEM + max_mapnr = num_physpages; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(max_low_pfn)); diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 7c392dc553b..f0c10b3cd15 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -313,6 +313,11 @@ unsigned long __init setup_memory(void) highstart_pfn = system_max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = system_max_low_pfn; + high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(system_max_low_pfn)); diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index b6eb4dcb877..ba44000b906 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -54,7 +54,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) { // FIXME + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { dec_preempt_count(); preempt_check_resched(); return; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 89e8486aac3..d0c2fdf6a4d 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -552,18 +552,6 @@ static void __init test_wp_bit(void) } } -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_HIGHMEM - num_physpages = highend_pfn; -#else - num_physpages = max_low_pfn; -#endif -#ifdef CONFIG_FLATMEM - max_mapnr = num_physpages; -#endif -} - static struct kcore_list kcore_mem, kcore_vmalloc; void __init mem_init(void) @@ -590,14 +578,6 @@ void __init mem_init(void) } #endif - set_max_mapnr_init(); - -#ifdef CONFIG_HIGHMEM - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); -- cgit v1.2.3 From add659bf8aa92f8b3f01a8c0220557c959507fb1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:31 +0200 Subject: [PATCH] i386: Remove lock section support in rwsem.h Lock sections don't work the new dwarf2 unwinder This generates slightly smaller code. It adds one more taken jump to the fast path. Also move the trampolines into semaphore.S and add proper CFI annotations. Cc: jbeulich@novell.com Signed-off-by: Andi Kleen --- arch/i386/lib/semaphore.S | 63 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/lib/semaphore.S b/arch/i386/lib/semaphore.S index e16ff169639..01f80b5c45d 100644 --- a/arch/i386/lib/semaphore.S +++ b/arch/i386/lib/semaphore.S @@ -152,3 +152,66 @@ ENTRY(__read_lock_failed) END(__read_lock_failed) #endif + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + push %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + call rwsem_down_read_failed + pop %edx + CFI_ADJUST_CFA_OFFSET -4 + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + calll rwsem_down_write_failed + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + CFI_STARTPROC + decw %dx /* do nothing if still outstanding active readers */ + jnz 1f + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call rwsem_wake + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 +1: ret + CFI_ENDPROC + END(call_rwsem_wake) + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_downgrade_wake) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + push %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + call rwsem_downgrade_wake + pop %edx + CFI_ADJUST_CFA_OFFSET -4 + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_downgrade_wake) + -- cgit v1.2.3 From 1a3f239ddf9208f2e52d36fef1c1c4518cbbbabe Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Sep 2006 10:52:32 +0200 Subject: [PATCH] i386: Replace i386 open-coded cmdline parsing with This patch replaces the open-coded early commandline parsing throughout the i386 boot code with the generic mechanism (already used by ppc, powerpc, ia64 and s390). The code was inconsistent with whether it deletes the option from the cmdline or not, meaning some of these will get passed through the environment into init. This transformation is mainly mechanical, but there are some notable parts: 1) Grammar: s/linux never set's it up/linux never sets it up/ 2) Remove hacked-in earlyprintk= option scanning. When someone actually implements CONFIG_EARLY_PRINTK, then they can use early_param(). [AK: actually it is implemented, but I'm adding the early_param it in the next x86-64 patch] 3) Move declaration of generic_apic_probe() from setup.c into asm/apic.h 4) Various parameters now moved into their appropriate files (thanks Andi). 5) All parse functions which examine arg need to check for NULL, except one where it has subtle humor value. AK: readded acpi_sci handling which was completely dropped AK: moved some more variables into acpi/boot.c Cc: len.brown@intel.com Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/boot.c | 76 ++++++++- arch/i386/kernel/apic.c | 15 ++ arch/i386/kernel/io_apic.c | 24 ++- arch/i386/kernel/machine_kexec.c | 23 +++ arch/i386/kernel/setup.c | 350 +++++++++++++-------------------------- arch/i386/kernel/smpboot.c | 13 ++ arch/i386/mach-generic/probe.c | 58 ++++--- arch/i386/mm/init.c | 18 +- 8 files changed, 306 insertions(+), 271 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index ee003bc0e8b..87e2ab50b69 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -36,6 +36,8 @@ #include #include +int __initdata acpi_force = 0; + #ifdef CONFIG_X86_64 extern void __init clustered_apic_check(void); @@ -860,8 +862,6 @@ static void __init acpi_process_madt(void) return; } -extern int acpi_force; - #ifdef __i386__ static int __init disable_acpi_irq(struct dmi_system_id *d) @@ -1163,3 +1163,75 @@ int __init acpi_boot_init(void) return 0; } + +static int __init parse_acpi(char *arg) +{ + if (!arg) + return -EINVAL; + + /* "acpi=off" disables both ACPI table parsing and interpreter */ + if (strcmp(arg, "off") == 0) { + disable_acpi(); + } + /* acpi=force to over-ride black-list */ + else if (strcmp(arg, "force") == 0) { + acpi_force = 1; + acpi_ht = 1; + acpi_disabled = 0; + } + /* acpi=strict disables out-of-spec workarounds */ + else if (strcmp(arg, "strict") == 0) { + acpi_strict = 1; + } + /* Limit ACPI just to boot-time to enable HT */ + else if (strcmp(arg, "ht") == 0) { + if (!acpi_force) + disable_acpi(); + acpi_ht = 1; + } + /* "acpi=noirq" disables ACPI interrupt routing */ + else if (strcmp(arg, "noirq") == 0) { + acpi_noirq_set(); + } else { + /* Core will printk when we return error. */ + return -EINVAL; + } + return 0; +} +early_param("acpi", parse_acpi); + +/* FIXME: Using pci= for an ACPI parameter is a travesty. */ +static int __init parse_pci(char *arg) +{ + if (arg && strcmp(arg, "noacpi") == 0) + acpi_disable_pci(); + return 0; +} +early_param("pci", parse_pci); + +#ifdef CONFIG_X86_IO_APIC +static int __init parse_acpi_skip_timer_override(char *arg) +{ + acpi_skip_timer_override = 1; + return 0; +} +early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); +#endif /* CONFIG_X86_IO_APIC */ + +static int __init setup_acpi_sci(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "edge")) + acpi_sci_flags.trigger = 1; + else if (!strcmp(s, "level")) + acpi_sci_flags.trigger = 3; + else if (!strcmp(s, "high")) + acpi_sci_flags.polarity = 1; + else if (!strcmp(s, "low")) + acpi_sci_flags.polarity = 3; + else + return -EINVAL; + return 0; +} +early_param("acpi_sci", setup_acpi_sci); diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 1a34fc57800..ce75e71b694 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1372,3 +1372,18 @@ int __init APIC_init_uniprocessor (void) return 0; } + +static int __init parse_lapic(char *arg) +{ + lapic_enable(); + return 0; +} +early_param("lapic", parse_lapic); + +static int __init parse_nolapic(char *arg) +{ + lapic_disable(); + return 0; +} +early_param("nolapic", parse_nolapic); + diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index b713f27d7e8..fd0df75cfbd 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -66,7 +66,7 @@ int sis_apic_bug = -1; */ int nr_ioapic_registers[MAX_IO_APICS]; -int disable_timer_pin_1 __initdata; +static int disable_timer_pin_1 __initdata; /* * Rough estimation of how many shared IRQs there are, can @@ -2691,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a } #endif /* CONFIG_ACPI */ + +static int __init parse_disable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", parse_disable_timer_pin_1); + +static int __init parse_enable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = -1; + return 0; +} +early_param("enable_timer_pin_1", parse_enable_timer_pin_1); + +static int __init parse_noapic(char *arg) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 6b1ae6ba76f..66c3dc99a65 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -209,3 +210,25 @@ NORET_TYPE void machine_kexec(struct kimage *image) rnk = (relocate_new_kernel_t) reboot_code_buffer; (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); } + +/* crashkernel=size@addr specifies the location to reserve for + * a crash kernel. By reserving this memory we guarantee + * that linux never sets it up as a DMA target. + * Useful for holding code to do something appropriate + * after a kernel panic. + */ +static int __init parse_crashkernel(char *arg) +{ + unsigned long size, base; + size = memparse(arg, &arg); + if (*arg == '@') { + base = memparse(arg+1, &arg); + /* FIXME: Do I want a sanity check + * to validate the memory range? + */ + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + return 0; +} +early_param("crashkernel", parse_crashkernel); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 71a540362b7..c6e31ed386f 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -96,11 +96,6 @@ unsigned long mmu_cr4_features; #endif EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI -int __initdata acpi_force = 0; -extern acpi_interrupt_flags acpi_sci_flags; -#endif - /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; #ifdef CONFIG_MCA @@ -148,7 +143,6 @@ EXPORT_SYMBOL(ist_info); struct e820map e820; extern void early_cpu_init(void); -extern void generic_apic_probe(char *); extern int root_mountflags; unsigned long saved_videomode; @@ -700,238 +694,132 @@ static inline void copy_edd(void) } #endif -static void __init parse_cmdline_early (char ** cmdline_p) -{ - char c = ' ', *to = command_line, *from = saved_command_line; - int len = 0; - int userdef = 0; +static int __initdata user_defined_memmap = 0; - /* Save unparsed command line copy for /proc/cmdline */ - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; +/* + * "mem=nopentium" disables the 4MB page tables. + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM + * to , overriding the bios size. + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from + * to +, overriding the bios size. + * + * HPA tells me bootloaders need to parse mem=, so no new + * option should be mem= [also see Documentation/i386/boot.txt] + */ +static int __init parse_mem(char *arg) +{ + if (!arg) + return -EINVAL; - for (;;) { - if (c != ' ') - goto next_char; - /* - * "mem=nopentium" disables the 4MB page tables. - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM - * to , overriding the bios size. - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from - * to +, overriding the bios size. - * - * HPA tells me bootloaders need to parse mem=, so no new - * option should be mem= [also see Documentation/i386/boot.txt] + if (strcmp(arg, "nopentium") == 0) { + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. */ - if (!memcmp(from, "mem=", 4)) { - if (to != command_line) - to--; - if (!memcmp(from+4, "nopentium", 9)) { - from += 9+4; - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); - disable_pse = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long mem_size; + unsigned long long mem_size; - mem_size = memparse(from+4, &from); - limit_regions(mem_size); - userdef=1; - } - } - - else if (!memcmp(from, "memmap=", 7)) { - if (to != command_line) - to--; - if (!memcmp(from+7, "exactmap", 8)) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - from += 8+7; - e820.nr_map = 0; - userdef = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(from+7, &from); - if (*from == '@') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*from == '#') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*from == '$') { - start_at = memparse(from+1, &from); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - userdef=1; - } - } - } - - else if (!memcmp(from, "noexec=", 7)) - noexec_setup(from + 7); + mem_size = memparse(arg, &arg); + limit_regions(mem_size); + user_defined_memmap = 1; + } + return 0; +} +early_param("mem", parse_mem); +static int __init parse_memmap(char *arg) +{ + if (!arg) + return -EINVAL; -#ifdef CONFIG_X86_SMP - /* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. + if (strcmp(arg, "exactmap") == 0) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. */ - else if (!memcmp(from, "maxcpus=", 8)) { - extern unsigned int maxcpus; - - maxcpus = simple_strtoul(from + 8, NULL, 0); - } + find_max_pfn(); + saved_max_pfn = max_pfn; #endif - -#ifdef CONFIG_ACPI - /* "acpi=off" disables both ACPI table parsing and interpreter */ - else if (!memcmp(from, "acpi=off", 8)) { - disable_acpi(); - } - - /* acpi=force to over-ride black-list */ - else if (!memcmp(from, "acpi=force", 10)) { - acpi_force = 1; - acpi_ht = 1; - acpi_disabled = 0; - } - - /* acpi=strict disables out-of-spec workarounds */ - else if (!memcmp(from, "acpi=strict", 11)) { - acpi_strict = 1; - } - - /* Limit ACPI just to boot-time to enable HT */ - else if (!memcmp(from, "acpi=ht", 7)) { - if (!acpi_force) - disable_acpi(); - acpi_ht = 1; - } - - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */ - else if (!memcmp(from, "pci=noacpi", 10)) { - acpi_disable_pci(); - } - /* "acpi=noirq" disables ACPI interrupt routing */ - else if (!memcmp(from, "acpi=noirq", 10)) { - acpi_noirq_set(); + e820.nr_map = 0; + user_defined_memmap = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long start_at, mem_size; + + mem_size = memparse(arg, &arg); + if (*arg == '@') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*arg == '#') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*arg == '$') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + limit_regions(mem_size); + user_defined_memmap = 1; } + } + return 0; +} +early_param("memmap", parse_memmap); - else if (!memcmp(from, "acpi_sci=edge", 13)) - acpi_sci_flags.trigger = 1; - - else if (!memcmp(from, "acpi_sci=level", 14)) - acpi_sci_flags.trigger = 3; - - else if (!memcmp(from, "acpi_sci=high", 13)) - acpi_sci_flags.polarity = 1; - - else if (!memcmp(from, "acpi_sci=low", 12)) - acpi_sci_flags.polarity = 3; - -#ifdef CONFIG_X86_IO_APIC - else if (!memcmp(from, "acpi_skip_timer_override", 24)) - acpi_skip_timer_override = 1; - - if (!memcmp(from, "disable_timer_pin_1", 19)) - disable_timer_pin_1 = 1; - if (!memcmp(from, "enable_timer_pin_1", 18)) - disable_timer_pin_1 = -1; +#ifdef CONFIG_PROC_VMCORE +/* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. + */ +static int __init parse_elfcorehdr(char *arg) +{ + if (!arg) + return -EINVAL; - /* disable IO-APIC */ - else if (!memcmp(from, "noapic", 6)) - disable_ioapic_setup(); -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ + elfcorehdr_addr = memparse(arg, &arg); + return 0; +} +early_param("elfcorehdr", parse_elfcorehdr); +#endif /* CONFIG_PROC_VMCORE */ -#ifdef CONFIG_X86_LOCAL_APIC - /* enable local APIC */ - else if (!memcmp(from, "lapic", 5)) - lapic_enable(); +/* + * highmem=size forces highmem to be exactly 'size' bytes. + * This works even on boxes that have no highmem otherwise. + * This also works to reduce highmem size on bigger boxes. + */ +static int __init parse_highmem(char *arg) +{ + if (!arg) + return -EINVAL; - /* disable local APIC */ - else if (!memcmp(from, "nolapic", 6)) - lapic_disable(); -#endif /* CONFIG_X86_LOCAL_APIC */ + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; + return 0; +} +early_param("highmem", parse_highmem); -#ifdef CONFIG_KEXEC - /* crashkernel=size@addr specifies the location to reserve for - * a crash kernel. By reserving this memory we guarantee - * that linux never set's it up as a DMA target. - * Useful for holding code to do something appropriate - * after a kernel panic. - */ - else if (!memcmp(from, "crashkernel=", 12)) { - unsigned long size, base; - size = memparse(from+12, &from); - if (*from == '@') { - base = memparse(from+1, &from); - /* FIXME: Do I want a sanity check - * to validate the memory range? - */ - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - } -#endif -#ifdef CONFIG_PROC_VMCORE - /* elfcorehdr= specifies the location of elf core header - * stored by the crashed kernel. - */ - else if (!memcmp(from, "elfcorehdr=", 11)) - elfcorehdr_addr = memparse(from+11, &from); -#endif +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the + * vmalloc area - the default is 128m. + */ +static int __init parse_vmalloc(char *arg) +{ + if (!arg) + return -EINVAL; - /* - * highmem=size forces highmem to be exactly 'size' bytes. - * This works even on boxes that have no highmem otherwise. - * This also works to reduce highmem size on bigger boxes. - */ - else if (!memcmp(from, "highmem=", 8)) - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; - - /* - * vmalloc=size forces the vmalloc area to be exactly 'size' - * bytes. This can be used to increase (or decrease) the - * vmalloc area - the default is 128m. - */ - else if (!memcmp(from, "vmalloc=", 8)) - __VMALLOC_RESERVE = memparse(from+8, &from); - - next_char: - c = *(from++); - if (!c) - break; - if (COMMAND_LINE_SIZE <= ++len) - break; - *(to++) = c; - } - *to = '\0'; - *cmdline_p = command_line; - if (userdef) { - printk(KERN_INFO "user-defined physical RAM map:\n"); - print_memory_map("user"); - } + __VMALLOC_RESERVE = memparse(arg, &arg); + return 0; } +early_param("vmalloc", parse_vmalloc); /* * Callback for efi_memory_walk. @@ -1507,17 +1395,15 @@ void __init setup_arch(char **cmdline_p) data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; - parse_cmdline_early(cmdline_p); + parse_early_param(); -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } + if (user_defined_memmap) { + printk(KERN_INFO "user-defined physical RAM map:\n"); + print_memory_map("user"); } -#endif + + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; max_low_pfn = setup_memory(); @@ -1546,7 +1432,7 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH - generic_apic_probe(*cmdline_p); + generic_apic_probe(); #endif if (efi_enabled) efi_map_memmap(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 9367af76ce3..517eb387455 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1491,3 +1491,16 @@ void __init smp_intr_init(void) /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); } + +/* + * If the BIOS enumerates physical processors before logical, + * maxcpus=N at enumeration-time can be used to disable HT. + */ +static int __init parse_maxcpus(char *arg) +{ + extern unsigned int maxcpus; + + maxcpus = simple_strtoul(arg, NULL, 0); + return 0; +} +early_param("maxcpus", parse_maxcpus); diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c index 793d1b47325..94b1fd9cbe3 100644 --- a/arch/i386/mach-generic/probe.c +++ b/arch/i386/mach-generic/probe.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -29,7 +30,24 @@ struct genapic *apic_probe[] __initdata = { NULL, }; -static int cmdline_apic; +static int cmdline_apic __initdata; +static int __init parse_apic(char *arg) +{ + int i; + + if (!arg) + return -EINVAL; + + for (i = 0; apic_probe[i]; i++) { + if (!strcmp(apic_probe[i]->name, arg)) { + genapic = apic_probe[i]; + cmdline_apic = 1; + return 0; + } + } + return -ENOENT; +} +early_param("apic", parse_apic); void __init generic_bigsmp_probe(void) { @@ -48,40 +66,20 @@ void __init generic_bigsmp_probe(void) } } -void __init generic_apic_probe(char *command_line) +void __init generic_apic_probe(void) { - char *s; - int i; - int changed = 0; - - s = strstr(command_line, "apic="); - if (s && (s == command_line || isspace(s[-1]))) { - char *p = strchr(s, ' '), old; - if (!p) - p = strchr(s, '\0'); - old = *p; - *p = 0; - for (i = 0; !changed && apic_probe[i]; i++) { - if (!strcmp(apic_probe[i]->name, s+5)) { - changed = 1; + if (!cmdline_apic) { + int i; + for (i = 0; apic_probe[i]; i++) { + if (apic_probe[i]->probe()) { genapic = apic_probe[i]; + break; } } - if (!changed) - printk(KERN_ERR "Unknown genapic `%s' specified.\n", s); - *p = old; - cmdline_apic = changed; - } - for (i = 0; !changed && apic_probe[i]; i++) { - if (apic_probe[i]->probe()) { - changed = 1; - genapic = apic_probe[i]; - } + /* Not visible without early console */ + if (!apic_probe[i]) + panic("Didn't find an APIC driver"); } - /* Not visible without early console */ - if (!changed) - panic("Didn't find an APIC driver"); - printk(KERN_INFO "Using APIC driver %s\n", genapic->name); } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index d0c2fdf6a4d..951386606d0 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; * on Enable * off Disable */ -void __init noexec_setup(const char *str) +static int __init noexec_setup(char *str) { - if (!strncmp(str, "on",2) && cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str,"off",3)) { + if (!str || !strcmp(str, "on")) { + if (cpu_has_nx) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } + } else if (!strcmp(str,"off")) { disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; - } + } else + return -EINVAL; + + return 0; } +early_param("noexec", noexec_setup); int nx_enabled = 0; #ifdef CONFIG_X86_PAE -- cgit v1.2.3 From df3bb57d2c0160ccd1ee51322f50aa295c3b0858 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:33 +0200 Subject: [PATCH] i386/x86-64: Move acpi_disabled variables into acpi/boot.c Removes code duplication between i386/x86-64. Not needed anymore in setup.c since early_param cleanup Cc: len.brown@intel.com Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/boot.c | 7 +++++++ arch/i386/kernel/setup.c | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 87e2ab50b69..f3761b98c8d 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -38,6 +38,13 @@ int __initdata acpi_force = 0; +#ifdef CONFIG_ACPI +int acpi_disabled = 0; +#else +int acpi_disabled = 1; +#endif +EXPORT_SYMBOL(acpi_disabled); + #ifdef CONFIG_X86_64 extern void __init clustered_apic_check(void); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index c6e31ed386f..ea17567dbe7 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -89,13 +89,6 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; -#ifdef CONFIG_ACPI - int acpi_disabled = 0; -#else - int acpi_disabled = 1; -#endif -EXPORT_SYMBOL(acpi_disabled); - /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; #ifdef CONFIG_MCA -- cgit v1.2.3 From 91cd444e56ebe0c2acd9576a045d77490b26f607 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:33 +0200 Subject: [PATCH] x86: Remove unneeded externs in acpi/boot.c And move one into proto.h Cc: len.brown@intel.com Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/boot.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index f3761b98c8d..c809a3f4c0b 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -47,9 +47,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 -extern void __init clustered_apic_check(void); - -extern int gsi_irq_sharing(int gsi); #include static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } -- cgit v1.2.3 From 2ade2920dcefdf5595c6380ebed131c964190855 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Tue, 26 Sep 2006 10:52:33 +0200 Subject: [PATCH] i386/x86-64: rename is_at_popf(), add iret to tests and fix is_at_popf() needs to test for the iret instruction as well as popf. So add that test and rename it to is_setting_trap_flag(). Also change max insn length from 16 to 15 to match reality. LAHF / SAHF can't affect TF, so the comment in x86_64 is removed. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- arch/i386/kernel/ptrace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index d3db03f4085..775f50e9395 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -185,17 +185,17 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ return addr; } -static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs) +static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) { int i, copied; - unsigned char opcode[16]; + unsigned char opcode[15]; unsigned long addr = convert_eip_to_linear(child, regs); copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); for (i = 0; i < copied; i++) { switch (opcode[i]) { - /* popf */ - case 0x9d: + /* popf and iret */ + case 0x9d: case 0xcf: return 1; /* opcode and address size prefixes */ case 0x66: case 0x67: @@ -247,7 +247,7 @@ static void set_singlestep(struct task_struct *child) * don't mark it as being "us" that set it, so that we * won't clear it by hand later. */ - if (is_at_popf(child, regs)) + if (is_setting_trap_flag(child, regs)) return; child->ptrace |= PT_DTRACE; -- cgit v1.2.3 From 5a1b3999d6cb7ab87f1f3b1700bc91839fd6fa29 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] x86: Some preparationary cleanup for stack trace - Remove unused all_contexts parameter No caller used it - Move skip argument into the structure (needed for followon patches) Cc: mingo@elte.hu Signed-off-by: Andi Kleen --- arch/i386/kernel/stacktrace.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c index e62a037ab39..ae3c32a87ad 100644 --- a/arch/i386/kernel/stacktrace.c +++ b/arch/i386/kernel/stacktrace.c @@ -61,12 +61,8 @@ save_context_stack(struct stack_trace *trace, unsigned int skip, /* * Save stack-backtrace addresses into a stack_trace buffer. - * If all_contexts is set, all contexts (hardirq, softirq and process) - * are saved. If not set then only the current context is saved. */ -void save_stack_trace(struct stack_trace *trace, - struct task_struct *task, int all_contexts, - unsigned int skip) +void save_stack_trace(struct stack_trace *trace, struct task_struct *task) { unsigned long ebp; unsigned long *stack = &ebp; @@ -85,10 +81,9 @@ void save_stack_trace(struct stack_trace *trace, struct thread_info *context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - ebp = save_context_stack(trace, skip, context, stack, ebp); + ebp = save_context_stack(trace, trace->skip, context, stack, ebp); stack = (unsigned long *)context->previous_esp; - if (!all_contexts || !stack || - trace->nr_entries >= trace->max_entries) + if (!stack || trace->nr_entries >= trace->max_entries) break; trace->entries[trace->nr_entries++] = ULONG_MAX; if (trace->nr_entries >= trace->max_entries) -- cgit v1.2.3 From 2b14a78cd07a52001b8c3865ed615d8b9b905b78 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] i386: Do stacktracer conversion too Following x86-64 patches. Reuses code from them in fact. Convert the standard backtracer to do all output using callbacks. Use the x86-64 stack tracer implementation that uses these callbacks to implement the stacktrace interface. This allows to use the new dwarf2 unwinder for stacktrace and get better backtraces. Cc: mingo@elte.hu Signed-off-by: Andi Kleen --- arch/i386/kernel/Makefile | 1 + arch/i386/kernel/stacktrace.c | 93 ------------------------------------ arch/i386/kernel/traps.c | 108 +++++++++++++++++++++++++++++++----------- 3 files changed, 81 insertions(+), 121 deletions(-) delete mode 100644 arch/i386/kernel/stacktrace.c (limited to 'arch/i386') diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index dab497472de..1a884b6e6e5 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -81,4 +81,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ $(call if_changed,syscall) k8-y += ../../x86_64/kernel/k8.o +stacktrace-y += ../../x86_64/kernel/stacktrace.o diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c deleted file mode 100644 index ae3c32a87ad..00000000000 --- a/arch/i386/kernel/stacktrace.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * arch/i386/kernel/stacktrace.c - * - * Stack trace management functions - * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar - */ -#include -#include - -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) -{ - return p > (void *)tinfo && - p < (void *)tinfo + THREAD_SIZE - 3; -} - -/* - * Save stack-backtrace addresses into a stack_trace buffer: - */ -static inline unsigned long -save_context_stack(struct stack_trace *trace, unsigned int skip, - struct thread_info *tinfo, unsigned long *stack, - unsigned long ebp) -{ - unsigned long addr; - -#ifdef CONFIG_FRAME_POINTER - while (valid_stack_ptr(tinfo, (void *)ebp)) { - addr = *(unsigned long *)(ebp + 4); - if (!skip) - trace->entries[trace->nr_entries++] = addr; - else - skip--; - if (trace->nr_entries >= trace->max_entries) - break; - /* - * break out of recursive entries (such as - * end_of_stack_stop_unwind_function): - */ - if (ebp == *(unsigned long *)ebp) - break; - - ebp = *(unsigned long *)ebp; - } -#else - while (valid_stack_ptr(tinfo, stack)) { - addr = *stack++; - if (__kernel_text_address(addr)) { - if (!skip) - trace->entries[trace->nr_entries++] = addr; - else - skip--; - if (trace->nr_entries >= trace->max_entries) - break; - } - } -#endif - - return ebp; -} - -/* - * Save stack-backtrace addresses into a stack_trace buffer. - */ -void save_stack_trace(struct stack_trace *trace, struct task_struct *task) -{ - unsigned long ebp; - unsigned long *stack = &ebp; - - WARN_ON(trace->nr_entries || !trace->max_entries); - - if (!task || task == current) { - /* Grab ebp right from our regs: */ - asm ("movl %%ebp, %0" : "=r" (ebp)); - } else { - /* ebp is the last reg pushed by switch_to(): */ - ebp = *(unsigned long *) task->thread.esp; - } - - while (1) { - struct thread_info *context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - - ebp = save_context_stack(trace, trace->skip, context, stack, ebp); - stack = (unsigned long *)context->previous_esp; - if (!stack || trace->nr_entries >= trace->max_entries) - break; - trace->entries[trace->nr_entries++] = ULONG_MAX; - if (trace->nr_entries >= trace->max_entries) - break; - } -} - diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 3c85c89f68d..4ced4285163 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -51,6 +51,7 @@ #include #include #include +#include #include @@ -118,26 +119,16 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) p < (void *)tinfo + THREAD_SIZE - 3; } -/* - * Print one address/symbol entries per line. - */ -static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl) -{ - printk(" [<%08lx>] ", addr); - - print_symbol("%s\n", addr); -} - static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long ebp, - char *log_lvl) + struct stacktrace_ops *ops, void *data) { unsigned long addr; #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - print_addr_and_symbol(addr, log_lvl); + ops->address(data, addr); /* * break out of recursive entries (such as * end_of_stack_stop_unwind_function): @@ -150,28 +141,35 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) - print_addr_and_symbol(addr, log_lvl); + ops->address(data, addr); } #endif return ebp; } +struct ops_and_data { + struct stacktrace_ops *ops; + void *data; +}; + static asmlinkage int -show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) +dump_trace_unwind(struct unwind_frame_info *info, void *data) { + struct ops_and_data *oad = (struct ops_and_data *)data; int n = 0; while (unwind(info) == 0 && UNW_PC(info)) { n++; - print_addr_and_symbol(UNW_PC(info), log_lvl); + oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; } return n; } -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl) +void dump_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, + struct stacktrace_ops *ops, void *data) { unsigned long ebp; @@ -181,31 +179,37 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, if (call_trace >= 0) { int unw_ret = 0; struct unwind_frame_info info; + struct ops_and_data oad = { .ops = ops, .data = data }; if (regs) { if (unwind_init_frame_info(&info, task, regs) == 0) - unw_ret = show_trace_unwind(&info, log_lvl); + unw_ret = dump_trace_unwind(&info, &oad); } else if (task == current) - unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); else { if (unwind_init_blocked(&info, task) == 0) - unw_ret = show_trace_unwind(&info, log_lvl); + unw_ret = dump_trace_unwind(&info, &oad); } if (unw_ret > 0) { if (call_trace == 1 && !arch_unw_user_mode(&info)) { - print_symbol("DWARF2 unwinder stuck at %s\n", + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", UNW_PC(&info)); if (UNW_SP(&info) >= PAGE_OFFSET) { - printk("Leftover inexact backtrace:\n"); + ops->warning(data, "Leftover inexact backtrace:\n"); stack = (void *)UNW_SP(&info); } else - printk("Full inexact backtrace again:\n"); + ops->warning(data, "Full inexact backtrace again:\n"); } else if (call_trace >= 1) return; else - printk("Full inexact backtrace again:\n"); + ops->warning(data, "Full inexact backtrace again:\n"); } else - printk("Inexact backtrace:\n"); + ops->warning(data, "Inexact backtrace:\n"); + } else if (!stack) { + unsigned long dummy; + stack = &dummy; + if (task && task != current) + stack = (unsigned long *)task->thread.esp; } if (task == current) { @@ -220,15 +224,63 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, struct thread_info *context; context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - ebp = print_context_stack(context, stack, ebp, log_lvl); + ebp = print_context_stack(context, stack, ebp, ops, data); + /* Should be after the line below, but somewhere + in early boot context comes out corrupted and we + can't reference it -AK */ + if (ops->stack(data, "IRQ") < 0) + break; stack = (unsigned long*)context->previous_esp; if (!stack) break; - printk("%s =======================\n", log_lvl); } } +EXPORT_SYMBOL(dump_trace); + +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + printk(data); + print_symbol(msg, symbol); + printk("\n"); +} + +static void print_trace_warning(void *data, char *msg) +{ + printk("%s%s\n", (char *)data, msg); +} + +static int print_trace_stack(void *data, char *name) +{ + return 0; +} + +/* + * Print one address/symbol entries per line. + */ +static void print_trace_address(void *data, unsigned long addr) +{ + printk("%s [<%08lx>] ", (char *)data, addr); + print_symbol("%s\n", addr); +} + +static struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +static void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long * stack, char *log_lvl) +{ + dump_trace(task, regs, stack, &print_trace_ops, log_lvl); + printk("%s =======================\n", log_lvl); +} -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long * stack) { show_trace_log_lvl(task, regs, stack, ""); } -- cgit v1.2.3 From 950fee84557416a3427dd404a13addc4be7b3e6c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] i386: Terminate backtrace fallback early if unwinder stack pointer is zero Cc: jbeulich@novell.com Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 4ced4285163..86fa7e47f30 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -197,6 +197,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (UNW_SP(&info) >= PAGE_OFFSET) { ops->warning(data, "Leftover inexact backtrace:\n"); stack = (void *)UNW_SP(&info); + if (!stack) + return; } else ops->warning(data, "Full inexact backtrace again:\n"); } else if (call_trace >= 1) -- cgit v1.2.3 From a32cf3975bed3b84491f8ffeb24abe8c45d86ab0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] i386: Get ebp from unwinder state when continuing fallback backtrace Cc: jbeulich@novell.com Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 86fa7e47f30..bdf949c30c7 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -171,7 +171,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, struct stacktrace_ops *ops, void *data) { - unsigned long ebp; + unsigned long ebp = 0; if (!task) task = current; @@ -199,6 +199,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (void *)UNW_SP(&info); if (!stack) return; + ebp = UNW_FP(&info); } else ops->warning(data, "Full inexact backtrace again:\n"); } else if (call_trace >= 1) @@ -207,20 +208,25 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ops->warning(data, "Full inexact backtrace again:\n"); } else ops->warning(data, "Inexact backtrace:\n"); - } else if (!stack) { + } + if (!stack) { unsigned long dummy; stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.esp; } - if (task == current) { - /* Grab ebp right from our regs */ - asm ("movl %%ebp, %0" : "=r" (ebp) : ); - } else { - /* ebp is the last reg pushed by switch_to */ - ebp = *(unsigned long *) task->thread.esp; +#ifdef CONFIG_FRAME_POINTER + if (!ebp) { + if (task == current) { + /* Grab ebp right from our regs */ + asm ("movl %%ebp, %0" : "=r" (ebp) : ); + } else { + /* ebp is the last reg pushed by switch_to */ + ebp = *(unsigned long *) task->thread.esp; + } } +#endif while (1) { struct thread_info *context; -- cgit v1.2.3 From 5758d5dfef1c514200bda3f29ba700f1c3e3bc99 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] i386: fix dubious segment register clear in cpu_init() Fix a very dubious piece of code in arch/i386/kernel/cpu/common.c:cpu_init(). This clears out %fs and %gs, but clobbers %eax in the process without telling gcc. It turns out that gcc happens to be not using %eax at that point anyway so it doesn't matter much, but it looks like a bomb waiting to go off. This does end up saving an instruction, because gcc wants %eax==0 for the set_debugreg()s below. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 70c87de582c..730a7ab7500 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -675,7 +675,7 @@ old_gdt: #endif /* Clear %fs and %gs. */ - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); /* Clear all 6 debug registers: */ set_debugreg(0, 0); -- cgit v1.2.3 From 3ca113ea74836a80645c79adba24caaa7a74120c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] i386: don't taint UP K7's running SMP kernels. We have a test that looks for invalid pairings of certain athlon/durons that weren't designed for SMP, and taint accordingly (with 'S') if we find such a configuration. However, this test shouldn't fire if there's only a single CPU present. It's perfectly valid for an SMP kernel to boot on UP hardware for example. AK: changed to num_possible_cpus() Signed-off-by: Dave Jones Signed-off-by: Andi Kleen --- arch/i386/kernel/smpboot.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 517eb387455..020d873b7d2 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -177,6 +177,9 @@ static void __devinit smp_store_cpu_info(int id) */ if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + if (num_possible_cpus() == 1) + goto valid_k7; + /* Athlon 660/661 is valid. */ if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) goto valid_k7; -- cgit v1.2.3 From d28c4393a7bf558538e9def269c1caeab6ec056f Mon Sep 17 00:00:00 2001 From: "Prasanna S.P" Date: Tue, 26 Sep 2006 10:52:34 +0200 Subject: [PATCH] x86: error_code is not safe for kprobes This patch moves the entry.S:error_entry to .kprobes.text section, since code marked unsafe for kprobes jumps directly to entry.S::error_entry, that must be marked unsafe as well. This patch also moves all the ".previous.text" asm directives to ".previous" for kprobes section. AK: Following a similar i386 patch from Chuck Ebbert AK: Also merged Jeremy's fix in. +From: Jeremy Fitzhardinge KPROBE_ENTRY does a .section .kprobes.text, and expects its users to do a .previous at the end of the function. Unfortunately, if any code within the function switches sections, for example .fixup, then the .previous ends up putting all subsequent code into .fixup. Worse, any subsequent .fixup code gets intermingled with the code its supposed to be fixing (which is also in .fixup). It's surprising this didn't cause more havok. The fix is to use .pushsection/.popsection, so this stuff nests properly. A further cleanup would be to get rid of all .section/.previous pairs, since they're inherently fragile. +From: Chuck Ebbert <76306.1226@compuserve.com> Because code marked unsafe for kprobes jumps directly to entry.S::error_code, that must be marked unsafe as well. The easiest way to do that is to move the page fault entry point to just before error_code and let it inherit the same section. Also moved all the ".previous" asm directives for kprobes sections to column 1 and removed ".text" from them. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- arch/i386/kernel/entry.S | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 87f9f60b803..ba22ec8fab5 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -591,11 +591,9 @@ ENTRY(name) \ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" -ENTRY(divide_error) - RING0_INT_FRAME - pushl $0 # no error code - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_divide_error +KPROBE_ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: @@ -645,6 +643,7 @@ error_code: call *%edi jmp ret_from_exception CFI_ENDPROC +KPROBE_END(page_fault) ENTRY(coprocessor_error) RING0_INT_FRAME @@ -720,7 +719,8 @@ debug_stack_correct: call do_debug jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(debug) + /* * NMI is doubly nasty. It can happen _while_ we're handling * a debug fault, and the debug fault hasn't yet been able to @@ -816,7 +816,7 @@ KPROBE_ENTRY(int3) call do_int3 jmp ret_from_exception CFI_ENDPROC - .previous .text +KPROBE_END(int3) ENTRY(overflow) RING0_INT_FRAME @@ -881,7 +881,7 @@ KPROBE_ENTRY(general_protection) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text +KPROBE_END(general_protection) ENTRY(alignment_check) RING0_EC_FRAME @@ -890,13 +890,14 @@ ENTRY(alignment_check) jmp error_code CFI_ENDPROC -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault +ENTRY(divide_error) + RING0_INT_FRAME + pushl $0 # no error code + CFI_ADJUST_CFA_OFFSET 4 + pushl $do_divide_error CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC - .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) -- cgit v1.2.3 From 1edf777803bdd2aeeb04cf44508fd9b88737fba8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386/x86-64: Improve Kconfig description of CRASH_DUMP Improve Kconfig description of CONFIG_CRASH_DUMP. Previously it was too brief to be useful. Cc: vgoyal@in.ibm.com Cc: ebiederm@xmission.com Signed-off-by: Andi Kleen --- arch/i386/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index c134a545b37..e34a4dbd6b9 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -761,6 +761,13 @@ config CRASH_DUMP depends on HIGHMEM help Generate crash dump after being started by kexec. + This should be normally only set in special crash dump kernels + which are loaded in the main kernel with kexec-tools into + a specially reserved region and then later executed after + a crash by kdump/kexec. The crash dump kernel must be compiled + to a memory address not used by the main kernel or BIOS using + PHYSICAL_START. + For more details see Documentation/kdump/kdump.txt config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) -- cgit v1.2.3 From 5e4edbb711417be40f350a319db39888a4edd450 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: Fix warning in mpparse.c Fix linux/arch/i386/kernel/mpparse.c: In function #MP_bus_info#: linux/arch/i386/kernel/mpparse.c:232: warning: comparison is always false due to limited range of data type Signed-off-by: Andi Kleen --- arch/i386/kernel/mpparse.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index bdaa75a5bc1..772a4233bfe 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -229,12 +229,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m) mpc_oem_bus_info(m, str, translation_table[mpc_record]); +#if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { printk(KERN_WARNING "MP table busid value (%d) for bustype %s " " is too large, max. supported is %d\n", m->mpc_busid, str, MAX_MP_BUSSES - 1); return; } +#endif if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; -- cgit v1.2.3 From 474c256841074b913e76e392082373e12103a75d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: make fault notifier unconditional and export it It's needed for external debuggers and overhead is very small. Also make the actual notifier chain they use static Cc: jbeulich@novell.com Signed-off-by: Andi Kleen --- arch/i386/mm/fault.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index f7279468323..0ce86168a0b 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -30,18 +30,20 @@ extern void die(const char *,struct pt_regs *,long); -#ifdef CONFIG_KPROBES -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + int register_page_fault_notifier(struct notifier_block *nb) { vmalloc_sync_all(); return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(register_page_fault_notifier); int unregister_page_fault_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); static inline int notify_page_fault(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) @@ -55,14 +57,6 @@ static inline int notify_page_fault(enum die_val val, const char *str, }; return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); } -#else -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -{ - return NOTIFY_DONE; -} -#endif - /* * Unlock any spinlocks which will prevent us from getting the -- cgit v1.2.3 From e8924acb2ef46b96c93f97025815ef3843cb67a2 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: Make acpi_force static acpi_force can become static. Cc: len.brown@intel.com Signed-off-by: Adrian Bunk Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index c809a3f4c0b..0fba420d668 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -36,7 +36,7 @@ #include #include -int __initdata acpi_force = 0; +static int __initdata acpi_force = 0; #ifdef CONFIG_ACPI int acpi_disabled = 0; -- cgit v1.2.3 From 3d08a256da8aed5300bd0752200ece426f49b050 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: Make enable_local_apic static enable_local_apic can now become static. Cc: len.brown@intel.com Signed-off-by: Adrian Bunk Signed-off-by: Andi Kleen --- arch/i386/kernel/apic.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index ce75e71b694..90faae5c5d3 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -52,7 +52,18 @@ static cpumask_t timer_bcast_ipi; /* * Knob to control our willingness to enable the local APIC. */ -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ +static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ + +static inline void lapic_disable(void) +{ + enable_local_apic = -1; + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); +} + +static inline void lapic_enable(void) +{ + enable_local_apic = 1; +} /* * Debug level -- cgit v1.2.3 From 02ba1a32dbd3d406530a17a2643a8f0f8cbf3acc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: move kernel_thread_helper into entry.S And add proper CFI annotation to it which was previously impossible. This prevents "stuck" messages by the dwarf2 unwinder when reaching the top of a kernel stack. Includes feedback from Jan Beulich Cc: jbeulich@novell.com Signed-off-by: Andi Kleen --- arch/i386/kernel/entry.S | 13 +++++++++++++ arch/i386/kernel/process.c | 9 --------- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index ba22ec8fab5..dede506e5bd 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -950,6 +950,19 @@ ENTRY(arch_unwind_init_running) ENDPROC(arch_unwind_init_running) #endif +ENTRY(kernel_thread_helper) + pushl $0 # fake return address for unwinder + CFI_STARTPROC + movl %edx,%eax + push %edx + CFI_ADJUST_CFA_OFFSET 4 + call *%ebx + push %eax + CFI_ADJUST_CFA_OFFSET 4 + call do_exit + CFI_ENDPROC +ENDPROC(kernel_thread_helper) + .section .rodata,"a" #include "syscall_table.S" diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index b741c3e1a5e..220aeca59c3 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -321,15 +321,6 @@ void show_regs(struct pt_regs * regs) * the "args". */ extern void kernel_thread_helper(void); -__asm__(".section .text\n" - ".align 4\n" - "kernel_thread_helper:\n\t" - "movl %edx,%eax\n\t" - "pushl %edx\n\t" - "call *%ebx\n\t" - "pushl %eax\n\t" - "call do_exit\n" - ".previous"); /* * Create a kernel thread -- cgit v1.2.3 From 522e93e3fcdbf00ba85c72fde6df28cfc0486a65 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: Descriptor and trap table cleanups. The implementation comes from Zach's [RFC, PATCH 10/24] i386 Vmi descriptor changes: Descriptor and trap table cleanups. Add cleanly written accessors for IDT and GDT gates so the subarch may override them. Note that this allows the hypervisor to transparently tweak the DPL of the descriptors as well as the RPL of segments in those descriptors, with no unnecessary kernel code modification. It also allows the hypervisor implementation of the VMI to tweak the gates, allowing for custom exception frames or extra layers of indirection above the guest fault / IRQ handlers. Signed-off-by: Zachary Amsden Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index bdf949c30c7..00d643f3de4 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -1165,20 +1165,6 @@ void __init trap_init_f00f_bug(void) } #endif -#define _set_gate(gate_addr,type,dpl,addr,seg) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ - "movw %4,%%dx\n\t" \ - "movl %%eax,%0\n\t" \ - "movl %%edx,%1" \ - :"=m" (*((long *) (gate_addr))), \ - "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ - :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ - "3" ((char *) (addr)),"2" ((seg) << 16)); \ -} while (0) - - /* * This needs to use 'idt_table' rather than 'idt', and * thus use the _nonmapped_ version of the IDT, as the @@ -1187,7 +1173,7 @@ do { \ */ void set_intr_gate(unsigned int n, void *addr) { - _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); + _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS); } /* @@ -1195,22 +1181,22 @@ void set_intr_gate(unsigned int n, void *addr) */ static inline void set_system_intr_gate(unsigned int n, void *addr) { - _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); + _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS); } static void __init set_trap_gate(unsigned int n, void *addr) { - _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); + _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS); } static void __init set_system_gate(unsigned int n, void *addr) { - _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); + _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS); } static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) { - _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); + _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3)); } -- cgit v1.2.3 From aada06c9b7f4cdedbeb2bc905893cf1923a0abad Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: clean up topology.c There is no need to duplicate the topology_init() function. Signed-off-by: Magnus Damm Signed-off-by: Andi Kleen --- arch/i386/kernel/topology.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index e2e281d4bcc..07d6da36a82 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -28,6 +28,7 @@ #include #include #include +#include #include static struct i386_cpu cpu_devices[NR_CPUS]; @@ -55,34 +56,18 @@ EXPORT_SYMBOL(arch_register_cpu); EXPORT_SYMBOL(arch_unregister_cpu); #endif /*CONFIG_HOTPLUG_CPU*/ - - -#ifdef CONFIG_NUMA -#include - static int __init topology_init(void) { int i; +#ifdef CONFIG_NUMA for_each_online_node(i) register_one_node(i); +#endif /* CONFIG_NUMA */ for_each_present_cpu(i) arch_register_cpu(i); return 0; } -#else /* !CONFIG_NUMA */ - -static int __init topology_init(void) -{ - int i; - - for_each_present_cpu(i) - arch_register_cpu(i); - return 0; -} - -#endif /* CONFIG_NUMA */ - subsys_initcall(topology_init); -- cgit v1.2.3 From c0d83745cc67ed71a08c14739a0b286d0239b1e2 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: mark two more functions as __init cyrix_identify() should be __init because transmeta_identify() is. tsc_init() is only called from setup_arch() which is marked as __init. These two section mismatches have been detected using running modpost on a vmlinux image compiled with CONFIG_RELOCATABLE=y. Signed-off-by: Magnus Damm Signed-off-by: Andi Kleen --- arch/i386/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 7e0d8dab207..b8fa0a8b2e4 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -192,7 +192,7 @@ int recalibrate_cpu_khz(void) EXPORT_SYMBOL(recalibrate_cpu_khz); -void tsc_init(void) +void __init tsc_init(void) { if (!cpu_has_tsc || tsc_disable) return; -- cgit v1.2.3 From 73fea175303926055440c06bc8894f0c5c58afc8 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 26 Sep 2006 10:52:35 +0200 Subject: [PATCH] i386: Support physical cpu hotplug for x86_64 This patch enables ACPI based physical CPU hotplug support for x86_64. Implements acpi_map_lsapic() and acpi_unmap_lsapic() to support physical cpu hotplug. Signed-off-by: Ashok Raj Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Brown, Len" Signed-off-by: Andrew Morton --- arch/i386/kernel/acpi/boot.c | 69 +++++++++++++++++++++++++++++++++++++++++--- arch/i386/kernel/mpparse.c | 2 +- 2 files changed, 66 insertions(+), 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 0fba420d668..8a6c5b41234 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -512,16 +513,76 @@ EXPORT_SYMBOL(acpi_register_gsi); #ifdef CONFIG_ACPI_HOTPLUG_CPU int acpi_map_lsapic(acpi_handle handle, int *pcpu) { - /* TBD */ - return -EINVAL; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *obj; + struct acpi_table_lapic *lapic; + cpumask_t tmp_map, new_map; + u8 physid; + int cpu; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return -EINVAL; + + if (!buffer.length || !buffer.pointer) + return -EINVAL; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*lapic)) { + kfree(buffer.pointer); + return -EINVAL; + } + + lapic = (struct acpi_table_lapic *)obj->buffer.pointer; + + if ((lapic->header.type != ACPI_MADT_LAPIC) || + (!lapic->flags.enabled)) { + kfree(buffer.pointer); + return -EINVAL; + } + + physid = lapic->id; + + kfree(buffer.pointer); + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + tmp_map = cpu_present_map; + mp_register_lapic(physid, lapic->flags.enabled); + + /* + * If mp_register_lapic successfully generates a new logical cpu + * number, then the following will get us exactly what was mapped + */ + cpus_andnot(new_map, cpu_present_map, tmp_map); + if (cpus_empty(new_map)) { + printk ("Unable to map lapic to logical cpu number\n"); + return -EINVAL; + } + + cpu = first_cpu(new_map); + + *pcpu = cpu; + return 0; } EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { - /* TBD */ - return -EINVAL; + int i; + + for_each_possible_cpu(i) { + if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) { + x86_acpiid_to_apicid[i] = -1; + break; + } + } + x86_cpu_to_apicid[cpu] = -1; + cpu_clear(cpu, cpu_present_map); + num_processors--; + + return (0); } EXPORT_SYMBOL(acpi_unmap_lsapic); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 772a4233bfe..442aaf8c77e 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -69,7 +69,7 @@ unsigned int def_to_bigsmp = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; /* Internal processor count */ -static unsigned int __devinitdata num_processors; +unsigned int __cpuinitdata num_processors; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; -- cgit v1.2.3 From 68bbc172cd1b0ee01814304b8a7bef8922d5fdca Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] i386: remove redundant generic_identify() calls when identifying cpus cpu_dev->c_identify is only called from arch/i386/common.c:identify_cpu(), and this after generic_identify() already has been called. There is no need to call this function twice and hook it in c_identify - but I may be wrong, please double check before applying. This patch also removes generic_identify() from cpu.h to avoid unnecessary future nesting. Signed-off-by: Magnus Damm Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/amd.c | 1 - arch/i386/kernel/cpu/common.c | 2 +- arch/i386/kernel/cpu/cpu.h | 2 -- arch/i386/kernel/cpu/cyrix.c | 2 -- arch/i386/kernel/cpu/intel.c | 1 - arch/i386/kernel/cpu/nexgen.c | 1 - arch/i386/kernel/cpu/transmeta.c | 1 - 7 files changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index e6a2d6b80cd..d6e7778bac7 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -275,7 +275,6 @@ static struct cpu_dev amd_cpu_dev __initdata = { }, }, .c_init = init_amd, - .c_identify = generic_identify, .c_size_cache = amd_size_cache, }; diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 730a7ab7500..24ac51a0237 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -265,7 +265,7 @@ static void __init early_cpu_detect(void) } } -void __cpuinit generic_identify(struct cpuinfo_x86 * c) +static void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int ebx; diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h index 5a1d4f163e8..2f6432cef6f 100644 --- a/arch/i386/kernel/cpu/cpu.h +++ b/arch/i386/kernel/cpu/cpu.h @@ -24,7 +24,5 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; extern int get_model_name(struct cpuinfo_x86 *c); extern void display_cacheinfo(struct cpuinfo_x86 *c); -extern void generic_identify(struct cpuinfo_x86 * c); - extern void early_intel_workaround(struct cpuinfo_x86 *c); diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index f03b7f94c30..bb02ed1fe56 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -427,7 +427,6 @@ static void cyrix_identify(struct cpuinfo_x86 * c) local_irq_restore(flags); } } - generic_identify(c); } static struct cpu_dev cyrix_cpu_dev __initdata = { @@ -457,7 +456,6 @@ static struct cpu_dev nsc_cpu_dev __initdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, - .c_identify = generic_identify, }; int __init nsc_init_cpu(void) diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5a2e270924b..26243e019d1 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -263,7 +263,6 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { }, }, .c_init = init_intel, - .c_identify = generic_identify, .c_size_cache = intel_size_cache, }; diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index ad87fa58058..df188bfd759 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c @@ -38,7 +38,6 @@ static void __init nexgen_identify(struct cpuinfo_x86 * c) if ( deep_magic_nexgen_probe() ) { strcpy(c->x86_vendor_id, "NexGenDriven"); } - generic_identify(c); } static struct cpu_dev nexgen_cpu_dev __initdata = { diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 7214c9b577a..4027c369bc9 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -88,7 +88,6 @@ static void __init init_transmeta(struct cpuinfo_x86 *c) static void __init transmeta_identify(struct cpuinfo_x86 * c) { u32 xlvl; - generic_identify(c); /* Transmeta-defined flags: level 0x80860001 */ xlvl = cpuid_eax(0x80860000); -- cgit v1.2.3 From 95414930548871c6c92a5b0e607b12b81f3d84d8 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] i386: mark cpu_dev structures as __cpuinitdata The different cpu_dev structures are all used from __cpuinit callers what I can tell. So mark them as __cpuinitdata instead of __initdata. I am a little bit unsure about arch/i386/common.c:default_cpu, especially when it comes to the purpose of this_cpu. Signed-off-by: Magnus Damm Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/amd.c | 2 +- arch/i386/kernel/cpu/centaur.c | 2 +- arch/i386/kernel/cpu/common.c | 2 +- arch/i386/kernel/cpu/cyrix.c | 4 ++-- arch/i386/kernel/cpu/nexgen.c | 2 +- arch/i386/kernel/cpu/rise.c | 2 +- arch/i386/kernel/cpu/transmeta.c | 2 +- arch/i386/kernel/cpu/umc.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index d6e7778bac7..eb134a20a7b 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -259,7 +259,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev amd_cpu_dev __initdata = { +static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, .c_models = { diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index bd75629dd26..b4d70dcea9f 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -457,7 +457,7 @@ static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size return size; } -static struct cpu_dev centaur_cpu_dev __initdata = { +static struct cpu_dev centaur_cpu_dev __cpuinitdata = { .c_vendor = "Centaur", .c_ident = { "CentaurHauls" }, .c_init = init_centaur, diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 24ac51a0237..99144449b9a 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -49,7 +49,7 @@ static void default_init(struct cpuinfo_x86 * c) } } -static struct cpu_dev default_cpu = { +static struct cpu_dev __cpuinitdata default_cpu = { .c_init = default_init, .c_vendor = "Unknown", }; diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index bb02ed1fe56..c2a0da9e0b5 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -429,7 +429,7 @@ static void cyrix_identify(struct cpuinfo_x86 * c) } } -static struct cpu_dev cyrix_cpu_dev __initdata = { +static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { .c_vendor = "Cyrix", .c_ident = { "CyrixInstead" }, .c_init = init_cyrix, @@ -452,7 +452,7 @@ static int __init cyrix_exit_cpu(void) late_initcall(cyrix_exit_cpu); -static struct cpu_dev nsc_cpu_dev __initdata = { +static struct cpu_dev nsc_cpu_dev __cpuinitdata = { .c_vendor = "NSC", .c_ident = { "Geode by NSC" }, .c_init = init_nsc, diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index df188bfd759..9d622598c05 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c @@ -40,7 +40,7 @@ static void __init nexgen_identify(struct cpuinfo_x86 * c) } } -static struct cpu_dev nexgen_cpu_dev __initdata = { +static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { .c_vendor = "Nexgen", .c_ident = { "NexGenDriven" }, .c_models = { diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c index d08d5a2811c..b597e435f6c 100644 --- a/arch/i386/kernel/cpu/rise.c +++ b/arch/i386/kernel/cpu/rise.c @@ -28,7 +28,7 @@ static void __init init_rise(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_CX8, c->x86_capability); } -static struct cpu_dev rise_cpu_dev __initdata = { +static struct cpu_dev rise_cpu_dev __cpuinitdata = { .c_vendor = "Rise", .c_ident = { "RiseRiseRise" }, .c_models = { diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 4027c369bc9..0b6e8392ed3 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -97,7 +97,7 @@ static void __init transmeta_identify(struct cpuinfo_x86 * c) } } -static struct cpu_dev transmeta_cpu_dev __initdata = { +static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { .c_vendor = "Transmeta", .c_ident = { "GenuineTMx86", "TransmetaCPU" }, .c_init = init_transmeta, diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c index 2cd988f6dc5..392b195948d 100644 --- a/arch/i386/kernel/cpu/umc.c +++ b/arch/i386/kernel/cpu/umc.c @@ -10,7 +10,7 @@ static void __init init_umc(struct cpuinfo_x86 * c) } -static struct cpu_dev umc_cpu_dev __initdata = { +static struct cpu_dev umc_cpu_dev __cpuinitdata = { .c_vendor = "UMC", .c_ident = { "UMC UMC UMC" }, .c_models = { -- cgit v1.2.3 From b4af3f7cf11e6b5904af08a652d4a2429af17c74 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] i386: mark cpu init functions as __cpuinit, data as __cpuinitdata Mark i386-specific cpu init functions as __cpuinit. They are all only called from arch/i386/common.c:identify_cpu() that already is marked as __cpuinit. This patch also removes the empty function init_umc(). Signed-off-by: Magnus Damm Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/amd.c | 2 +- arch/i386/kernel/cpu/centaur.c | 20 ++++++++++---------- arch/i386/kernel/cpu/common.c | 2 +- arch/i386/kernel/cpu/cyrix.c | 32 ++++++++++++++++---------------- arch/i386/kernel/cpu/nexgen.c | 2 +- arch/i386/kernel/cpu/rise.c | 2 +- arch/i386/kernel/cpu/transmeta.c | 2 +- arch/i386/kernel/cpu/umc.c | 5 ----- 8 files changed, 31 insertions(+), 36 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index eb134a20a7b..856cd08f2f3 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -22,7 +22,7 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -static void __init init_amd(struct cpuinfo_x86 *c) +static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 l, h; int mbytes = num_physpages >> (20-PAGE_SHIFT); diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index b4d70dcea9f..34a81ede789 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -9,7 +9,7 @@ #ifdef CONFIG_X86_OOSTORE -static u32 __init power2(u32 x) +static u32 __cpuinit power2(u32 x) { u32 s=1; while(s<=x) @@ -22,7 +22,7 @@ static u32 __init power2(u32 x) * Set up an actual MCR */ -static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) +static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) { u32 lo, hi; @@ -40,7 +40,7 @@ static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) * Shortcut: We know you can't put 4Gig of RAM on a winchip */ -static u32 __init ramtop(void) /* 16388 */ +static u32 __cpuinit ramtop(void) /* 16388 */ { int i; u32 top = 0; @@ -91,7 +91,7 @@ static u32 __init ramtop(void) /* 16388 */ * Compute a set of MCR's to give maximum coverage */ -static int __init centaur_mcr_compute(int nr, int key) +static int __cpuinit centaur_mcr_compute(int nr, int key) { u32 mem = ramtop(); u32 root = power2(mem); @@ -166,7 +166,7 @@ static int __init centaur_mcr_compute(int nr, int key) return ct; } -static void __init centaur_create_optimal_mcr(void) +static void __cpuinit centaur_create_optimal_mcr(void) { int i; /* @@ -189,7 +189,7 @@ static void __init centaur_create_optimal_mcr(void) wrmsr(MSR_IDT_MCR0+i, 0, 0); } -static void __init winchip2_create_optimal_mcr(void) +static void __cpuinit winchip2_create_optimal_mcr(void) { u32 lo, hi; int i; @@ -227,7 +227,7 @@ static void __init winchip2_create_optimal_mcr(void) * Handle the MCR key on the Winchip 2. */ -static void __init winchip2_unprotect_mcr(void) +static void __cpuinit winchip2_unprotect_mcr(void) { u32 lo, hi; u32 key; @@ -239,7 +239,7 @@ static void __init winchip2_unprotect_mcr(void) wrmsr(MSR_IDT_MCR_CTRL, lo, hi); } -static void __init winchip2_protect_mcr(void) +static void __cpuinit winchip2_protect_mcr(void) { u32 lo, hi; @@ -257,7 +257,7 @@ static void __init winchip2_protect_mcr(void) #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ -static void __init init_c3(struct cpuinfo_x86 *c) +static void __cpuinit init_c3(struct cpuinfo_x86 *c) { u32 lo, hi; @@ -303,7 +303,7 @@ static void __init init_c3(struct cpuinfo_x86 *c) display_cacheinfo(c); } -static void __init init_centaur(struct cpuinfo_x86 *c) +static void __cpuinit init_centaur(struct cpuinfo_x86 *c) { enum { ECX8=1<<1, diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 99144449b9a..2799baaadf4 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -36,7 +36,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; extern int disable_pse; -static void default_init(struct cpuinfo_x86 * c) +static void __cpuinit default_init(struct cpuinfo_x86 * c) { /* Not much we can do here... */ /* Check if at least it has cpuid */ diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index c2a0da9e0b5..d34b1bdb15f 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -52,25 +52,25 @@ static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) * Actually since bugs.h doesn't even reference this perhaps someone should * fix the documentation ??? */ -static unsigned char Cx86_dir0_msb __initdata = 0; +static unsigned char Cx86_dir0_msb __cpuinitdata = 0; -static char Cx86_model[][9] __initdata = { +static char Cx86_model[][9] __cpuinitdata = { "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", "M II ", "Unknown" }; -static char Cx486_name[][5] __initdata = { +static char Cx486_name[][5] __cpuinitdata = { "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", "SRx2", "DRx2" }; -static char Cx486S_name[][4] __initdata = { +static char Cx486S_name[][4] __cpuinitdata = { "S", "S2", "Se", "S2e" }; -static char Cx486D_name[][4] __initdata = { +static char Cx486D_name[][4] __cpuinitdata = { "DX", "DX2", "?", "?", "?", "DX4" }; -static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock"; -static char cyrix_model_mult1[] __initdata = "12??43"; -static char cyrix_model_mult2[] __initdata = "12233445"; +static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; +static char cyrix_model_mult1[] __cpuinitdata = "12??43"; +static char cyrix_model_mult2[] __cpuinitdata = "12233445"; /* * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old @@ -82,7 +82,7 @@ static char cyrix_model_mult2[] __initdata = "12233445"; extern void calibrate_delay(void) __init; -static void __init check_cx686_slop(struct cpuinfo_x86 *c) +static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) { unsigned long flags; @@ -107,7 +107,7 @@ static void __init check_cx686_slop(struct cpuinfo_x86 *c) } -static void __init set_cx86_reorder(void) +static void __cpuinit set_cx86_reorder(void) { u8 ccr3; @@ -122,7 +122,7 @@ static void __init set_cx86_reorder(void) setCx86(CX86_CCR3, ccr3); } -static void __init set_cx86_memwb(void) +static void __cpuinit set_cx86_memwb(void) { u32 cr0; @@ -137,7 +137,7 @@ static void __init set_cx86_memwb(void) setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); } -static void __init set_cx86_inc(void) +static void __cpuinit set_cx86_inc(void) { unsigned char ccr3; @@ -158,7 +158,7 @@ static void __init set_cx86_inc(void) * Configure later MediaGX and/or Geode processor. */ -static void __init geode_configure(void) +static void __cpuinit geode_configure(void) { unsigned long flags; u8 ccr3, ccr4; @@ -184,14 +184,14 @@ static void __init geode_configure(void) #ifdef CONFIG_PCI -static struct pci_device_id __initdata cyrix_55x0[] = { +static struct pci_device_id __cpuinitdata cyrix_55x0[] = { { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, { }, }; #endif -static void __init init_cyrix(struct cpuinfo_x86 *c) +static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) { unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; char *buf = c->x86_model_id; @@ -346,7 +346,7 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) /* * Handle National Semiconductor branded processors */ -static void __init init_nsc(struct cpuinfo_x86 *c) +static void __cpuinit init_nsc(struct cpuinfo_x86 *c) { /* There may be GX1 processors in the wild that are branded * NSC and not Cyrix. diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index 9d622598c05..d8cc88224d1 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c @@ -27,7 +27,7 @@ static int __init deep_magic_nexgen_probe(void) return ret; } -static void __init init_nexgen(struct cpuinfo_x86 * c) +static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) { c->x86_cache_size = 256; /* A few had 1 MB... */ } diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c index b597e435f6c..9317f741498 100644 --- a/arch/i386/kernel/cpu/rise.c +++ b/arch/i386/kernel/cpu/rise.c @@ -5,7 +5,7 @@ #include "cpu.h" -static void __init init_rise(struct cpuinfo_x86 *c) +static void __cpuinit init_rise(struct cpuinfo_x86 *c) { printk("CPU: Rise iDragon"); if (c->x86_model > 2) diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 0b6e8392ed3..725404cd77f 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -5,7 +5,7 @@ #include #include "cpu.h" -static void __init init_transmeta(struct cpuinfo_x86 *c) +static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) { unsigned int cap_mask, uk, max, dummy; unsigned int cms_rev1, cms_rev2; diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c index 392b195948d..1bf3f87e9c5 100644 --- a/arch/i386/kernel/cpu/umc.c +++ b/arch/i386/kernel/cpu/umc.c @@ -5,10 +5,6 @@ /* UMC chips appear to be only either 386 or 486, so no special init takes place. */ -static void __init init_umc(struct cpuinfo_x86 * c) -{ - -} static struct cpu_dev umc_cpu_dev __cpuinitdata = { .c_vendor = "UMC", @@ -21,7 +17,6 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = { } }, }, - .c_init = init_umc, }; int __init umc_init_cpu(void) -- cgit v1.2.3 From 5f0f1c166647860bb2c2a206338e7d9af3834753 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] i386: mark cpu identify functions as __cpuinit Mark i386-specific cpu identification functions as __cpuinit. They are all only called from arch/i386/common.c:identify_cpu() that already is marked as __cpuinit. Signed-off-by: Magnus Damm Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/cyrix.c | 4 ++-- arch/i386/kernel/cpu/nexgen.c | 4 ++-- arch/i386/kernel/cpu/transmeta.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index d34b1bdb15f..c0c3b59de32 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c @@ -12,7 +12,7 @@ /* * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU */ -static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) { unsigned char ccr2, ccr3; unsigned long flags; @@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void) return (unsigned char) (test >> 8) == 0x02; } -static void cyrix_identify(struct cpuinfo_x86 * c) +static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) { /* Detect Cyrix with disabled CPUID */ if ( c->x86 == 4 && test_cyrix_52div() ) { diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index d8cc88224d1..8bf23cc80c6 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c @@ -10,7 +10,7 @@ * to have CPUID. (Thanks to Herbert Oppmann) */ -static int __init deep_magic_nexgen_probe(void) +static int __cpuinit deep_magic_nexgen_probe(void) { int ret; @@ -32,7 +32,7 @@ static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) c->x86_cache_size = 256; /* A few had 1 MB... */ } -static void __init nexgen_identify(struct cpuinfo_x86 * c) +static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) { /* Detect NexGen with old hypercode */ if ( deep_magic_nexgen_probe() ) { diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 725404cd77f..4056fb7d2cd 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -85,7 +85,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) #endif } -static void __init transmeta_identify(struct cpuinfo_x86 * c) +static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) { u32 xlvl; -- cgit v1.2.3 From e9dff0ee6694b2edd40b1b448cb786f6a7b02336 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] i386: mark cpu cache functions as __cpuinit Mark i386-specific cpu cache functions as __cpuinit. They are all only called from arch/i386/common.c:display_cache_info() that already is marked as __cpuinit. Signed-off-by: Magnus Damm Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/amd.c | 2 +- arch/i386/kernel/cpu/centaur.c | 2 +- arch/i386/kernel/cpu/intel.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 856cd08f2f3..e4758095d87 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -246,7 +246,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) num_cache_leaves = 3; } -static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) { /* AMD errata T13 (order #21922) */ if ((c->x86 == 6)) { diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index 34a81ede789..8c25047975c 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -442,7 +442,7 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c) } } -static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) { /* VIA C3 CPUs (670-68F) need further shifting. */ if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 26243e019d1..94a95aa5227 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -198,7 +198,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) } -static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) { /* Intel PIII Tualatin. This comes in two flavours. * One has 256kb of cache, the other 512. We have no way -- cgit v1.2.3 From 6f6b1e0477ccb2f25a9b045e38440347d2ce21c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=E1zquez=20Cao?= Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] i386: Disallow kprobes on NMI handlers A kprobe executes IRET early and that could cause NMI recursion and stack corruption. Note: This problem was originally spotted by Andi Kleen. This patch adds fixes not included in his original patch. [AK: Jan Beulich originally discovered these classes of bugs] Signed-off-by: Fernando Vazquez Signed-off-by: Andi Kleen --- arch/i386/kernel/mca.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index cd5456f14af..eb57a851789 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -414,7 +415,8 @@ subsys_initcall(mca_init); /*--------------------------------------------------------------------*/ -static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) +static __kprobes void +mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) { int slot = mca_dev->slot; @@ -444,7 +446,7 @@ static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) /*--------------------------------------------------------------------*/ -static int mca_handle_nmi_callback(struct device *dev, void *data) +static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) { struct mca_device *mca_dev = to_mca_device(dev); unsigned char pos5; @@ -462,7 +464,7 @@ static int mca_handle_nmi_callback(struct device *dev, void *data) return 0; } -void mca_handle_nmi(void) +void __kprobes mca_handle_nmi(void) { /* First try - scan the various adapters and see if a specific * adapter was responsible for the error. -- cgit v1.2.3 From 06039754d775d3e48e4a292e4f353321205eff53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=E1zquez=20Cao?= Date: Tue, 26 Sep 2006 10:52:36 +0200 Subject: [PATCH] i386: Disallow kprobes on NMI handlers A kprobe executes IRET early and that could cause NMI recursion and stack corruption. Note: This problem was originally spotted and solved by Andi Kleen in the x86_64 architecture. This patch is an adaption of his patch for i386. AK: Merged with current code which was a bit different. AK: Removed printk in nmi handler that shouldn't be there in the first time AK: Added missing include. AK: added KPROBES_END Signed-off-by: Fernando Vazquez Signed-off-by: Andi Kleen --- arch/i386/kernel/entry.S | 3 ++- arch/i386/kernel/nmi.c | 6 +++--- arch/i386/kernel/traps.c | 15 +++++++++------ 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index dede506e5bd..0928f70639a 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -729,7 +729,7 @@ KPROBE_END(debug) * check whether we got an NMI on the debug path where the debug * fault happened on the sysenter path. */ -ENTRY(nmi) +KPROBE_ENTRY(nmi) RING0_INT_FRAME pushl %eax CFI_ADJUST_CFA_OFFSET 4 @@ -805,6 +805,7 @@ nmi_16bit_stack: .align 4 .long 1b,iret_exc .previous +KPROBE_END(nmi) KPROBE_ENTRY(int3) RING0_INT_FRAME diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 7b9a053effa..dbda706fdd1 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -882,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) +__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { /* @@ -962,8 +963,7 @@ int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) * This matches the old behaviour. */ rc = 1; - } else - printk(KERN_WARNING "Unknown enabled NMI hardware?!\n"); + } } done: return rc; diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 00d643f3de4..5c0f4960c67 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -689,7 +689,8 @@ gp_in_kernel: } } -static void mem_parity_error(unsigned char reason, struct pt_regs * regs) +static __kprobes void +mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " "CPU %d.\n", reason, smp_processor_id()); @@ -704,7 +705,8 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs) clear_mem_error(reason); } -static void io_check_error(unsigned char reason, struct pt_regs * regs) +static __kprobes void +io_check_error(unsigned char reason, struct pt_regs * regs) { unsigned long i; @@ -720,7 +722,8 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +static __kprobes void +unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { #ifdef CONFIG_MCA /* Might actually be able to figure out what the guilty party @@ -741,7 +744,7 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) static DEFINE_SPINLOCK(nmi_print_lock); -void die_nmi (struct pt_regs *regs, const char *msg) +void __kprobes die_nmi(struct pt_regs *regs, const char *msg) { if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) @@ -773,7 +776,7 @@ void die_nmi (struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static void default_do_nmi(struct pt_regs * regs) +static __kprobes void default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -811,7 +814,7 @@ static void default_do_nmi(struct pt_regs * regs) reassert_nmi(); } -fastcall void do_nmi(struct pt_regs * regs, long error_code) +fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) { int cpu; -- cgit v1.2.3 From a549b86dd0f3cbffcd5f9343f4ae7fcd59f7e756 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Tue, 26 Sep 2006 10:52:37 +0200 Subject: [PATCH] i386: annotate FIX_STACK() and the rest of nmi() In i386's entry.S, FIX_STACK() needs annotation because it replaces the stack pointer. And the rest of nmi() needs annotation in order to compile with these new annotations. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- arch/i386/kernel/entry.S | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 0928f70639a..4b084524922 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -701,9 +701,15 @@ device_not_available_emulate: jne ok; \ label: \ movl TSS_sysenter_esp0+offset(%esp),%esp; \ + CFI_DEF_CFA esp, 0; \ + CFI_UNDEFINED eip; \ pushfl; \ + CFI_ADJUST_CFA_OFFSET 4; \ pushl $__KERNEL_CS; \ - pushl $sysenter_past_esp + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $sysenter_past_esp; \ + CFI_ADJUST_CFA_OFFSET 4; \ + CFI_REL_OFFSET eip, 0 KPROBE_ENTRY(debug) RING0_INT_FRAME @@ -754,6 +760,7 @@ KPROBE_ENTRY(nmi) cmpl $sysenter_entry,12(%esp) je nmi_debug_stack_check nmi_stack_correct: + /* We have a RING0_INT_FRAME here */ pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL @@ -764,9 +771,12 @@ nmi_stack_correct: CFI_ENDPROC nmi_stack_fixup: + RING0_INT_FRAME FIX_STACK(12,nmi_stack_correct, 1) jmp nmi_stack_correct + nmi_debug_stack_check: + /* We have a RING0_INT_FRAME here */ cmpw $__KERNEL_CS,16(%esp) jne nmi_stack_correct cmpl $debug,(%esp) @@ -777,8 +787,10 @@ nmi_debug_stack_check: jmp nmi_stack_correct nmi_16bit_stack: - RING0_INT_FRAME - /* create the pointer to lss back */ + /* We have a RING0_INT_FRAME here. + * + * create the pointer to lss back + */ pushl %ss CFI_ADJUST_CFA_OFFSET 4 pushl %esp -- cgit v1.2.3 From 575400d1b483fbe9e03c68758059bfaf4e4768d1 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 26 Sep 2006 10:52:38 +0200 Subject: [PATCH] i386: Fix the EDD code misparsing the command line The EDD code would scan the command line as a fixed array, without taking account of either whitespace, null-termination, the old command-line protocol, late overrides early, or the fact that the command line may not be reachable from INITSEG. This should fix those problems, and enable us to use a longer command line. Signed-off-by: H. Peter Anvin Signed-off-by: Andi Kleen --- arch/i386/boot/edd.S | 97 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 22 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/boot/edd.S b/arch/i386/boot/edd.S index 4b84ea216f2..34321368011 100644 --- a/arch/i386/boot/edd.S +++ b/arch/i386/boot/edd.S @@ -15,42 +15,95 @@ #include #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + +# It is assumed that %ds == INITSEG here + movb $0, (EDD_MBR_SIG_NR_BUF) movb $0, (EDDNR) -# Check the command line for two options: +# Check the command line for options: # edd=of disables EDD completely (edd=off) # edd=sk skips the MBR test (edd=skipmbr) +# edd=on re-enables EDD (edd=on) + pushl %esi - cmpl $0, %cs:cmd_line_ptr - jz done_cl + movw $edd_mbr_sig_start, %di # Default to edd=on + movl %cs:(cmd_line_ptr), %esi -# ds:esi has the pointer to the command line now - movl $(COMMAND_LINE_SIZE-7), %ecx -# loop through kernel command line one byte at a time -cl_loop: - cmpl $EDD_CL_EQUALS, (%si) + andl %esi, %esi + jz old_cl # Old boot protocol? + +# Convert to a real-mode pointer in fs:si + movl %esi, %eax + shrl $4, %eax + movw %ax, %fs + andw $0xf, %si + jmp have_cl_pointer + +# Old-style boot protocol? +old_cl: + push %ds # aka INITSEG + pop %fs + + cmpw $0xa33f, (0x20) + jne done_cl # No command line at all? + movw (0x22), %si # Pointer relative to INITSEG + +# fs:si has the pointer to the command line now +have_cl_pointer: + +# Loop through kernel command line one byte at a time. Just in +# case the loader is buggy and failed to null-terminate the command line +# terminate if we get close enough to the end of the segment that we +# cannot fit "edd=XX"... +cl_atspace: + cmpw $-5, %si # Watch for segment wraparound + jae done_cl + movl %fs:(%si), %eax + andb %al, %al # End of line? + jz done_cl + cmpl $EDD_CL_EQUALS, %eax jz found_edd_equals - incl %esi - loop cl_loop - jmp done_cl + cmpb $0x20, %al # <= space consider whitespace + ja cl_skipword + incw %si + jmp cl_atspace + +cl_skipword: + cmpw $-5, %si # Watch for segment wraparound + jae done_cl + movb %fs:(%si), %al # End of string? + andb %al, %al + jz done_cl + cmpb $0x20, %al + jbe cl_atspace + incw %si + jmp cl_skipword + found_edd_equals: # only looking at first two characters after equals - addl $4, %esi - cmpw $EDD_CL_OFF, (%si) # edd=of - jz do_edd_off - cmpw $EDD_CL_SKIP, (%si) # edd=sk - jz do_edd_skipmbr - jmp done_cl +# late overrides early on the command line, so keep going after finding something + movw %fs:4(%si), %ax + cmpw $EDD_CL_OFF, %ax # edd=of + je do_edd_off + cmpw $EDD_CL_SKIP, %ax # edd=sk + je do_edd_skipmbr + cmpw $EDD_CL_ON, %ax # edd=on + je do_edd_on + jmp cl_skipword do_edd_skipmbr: - popl %esi - jmp edd_start + movw $edd_start, %di + jmp cl_skipword do_edd_off: - popl %esi - jmp edd_done + movw $edd_done, %di + jmp cl_skipword +do_edd_on: + movw $edd_mbr_sig_start, %di + jmp cl_skipword + done_cl: popl %esi - + jmpw *%di # Read the first sector of each BIOS disk device and store the 4-byte signature edd_mbr_sig_start: -- cgit v1.2.3 From 3566561bfadffcb5dbc85d576be80c0dbf2cccc9 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 26 Sep 2006 10:52:38 +0200 Subject: [PATCH] i386: Avoid overwriting the current pgd (V4, i386) kexec: Avoid overwriting the current pgd (V4, i386) This patch upgrades the i386-specific kexec code to avoid overwriting the current pgd. Overwriting the current pgd is bad when CONFIG_CRASH_DUMP is used to start a secondary kernel that dumps the memory of the previous kernel. The code introduces a new set of page tables. These tables are used to provide an executable identity mapping without overwriting the current pgd. Signed-off-by: Magnus Damm Signed-off-by: Andi Kleen --- arch/i386/kernel/machine_kexec.c | 117 +++++++-------------------- arch/i386/kernel/relocate_kernel.S | 162 +++++++++++++++++++++++++++++++++---- 2 files changed, 174 insertions(+), 105 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 66c3dc99a65..91966bafb3d 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -21,70 +21,13 @@ #include #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) - -#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define L2_ATTR (_PAGE_PRESENT) - -#define LEVEL0_SIZE (1UL << 12UL) - -#ifndef CONFIG_X86_PAE -#define LEVEL1_SIZE (1UL << 22UL) -static u32 pgtable_level1[1024] PAGE_ALIGNED; - -static void identity_map_page(unsigned long address) -{ - unsigned long level1_index, level2_index; - u32 *pgtable_level2; - - /* Find the current page table */ - pgtable_level2 = __va(read_cr3()); - - /* Find the indexes of the physical address to identity map */ - level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; - level2_index = address / LEVEL1_SIZE; - - /* Identity map the page table entry */ - pgtable_level1[level1_index] = address | L0_ATTR; - pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; - - /* Flush the tlb so the new mapping takes effect. - * Global tlb entries are not flushed but that is not an issue. - */ - load_cr3(pgtable_level2); -} - -#else -#define LEVEL1_SIZE (1UL << 21UL) -#define LEVEL2_SIZE (1UL << 30UL) -static u64 pgtable_level1[512] PAGE_ALIGNED; -static u64 pgtable_level2[512] PAGE_ALIGNED; - -static void identity_map_page(unsigned long address) -{ - unsigned long level1_index, level2_index, level3_index; - u64 *pgtable_level3; - - /* Find the current page table */ - pgtable_level3 = __va(read_cr3()); - - /* Find the indexes of the physical address to identity map */ - level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; - level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; - level3_index = address / LEVEL2_SIZE; - - /* Identity map the page table entry */ - pgtable_level1[level1_index] = address | L0_ATTR; - pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; - set_64bit(&pgtable_level3[level3_index], - __pa(pgtable_level2) | L2_ATTR); - - /* Flush the tlb so the new mapping takes effect. - * Global tlb entries are not flushed but that is not an issue. - */ - load_cr3(pgtable_level3); -} +static u32 kexec_pgd[1024] PAGE_ALIGNED; +#ifdef CONFIG_X86_PAE +static u32 kexec_pmd0[1024] PAGE_ALIGNED; +static u32 kexec_pmd1[1024] PAGE_ALIGNED; #endif +static u32 kexec_pte0[1024] PAGE_ALIGNED; +static u32 kexec_pte1[1024] PAGE_ALIGNED; static void set_idt(void *newidt, __u16 limit) { @@ -128,16 +71,6 @@ static void load_segments(void) #undef __STR } -typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( - unsigned long indirection_page, - unsigned long reboot_code_buffer, - unsigned long start_address, - unsigned int has_pae) ATTRIB_NORET; - -extern const unsigned char relocate_new_kernel[]; -extern void relocate_new_kernel_end(void); -extern const unsigned int relocate_new_kernel_size; - /* * A architecture hook called to validate the * proposed image and prepare the control pages @@ -170,25 +103,29 @@ void machine_kexec_cleanup(struct kimage *image) */ NORET_TYPE void machine_kexec(struct kimage *image) { - unsigned long page_list; - unsigned long reboot_code_buffer; - - relocate_new_kernel_t rnk; + unsigned long page_list[PAGES_NR]; + void *control_page; /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); - /* Compute some offsets */ - reboot_code_buffer = page_to_pfn(image->control_code_page) - << PAGE_SHIFT; - page_list = image->head; - - /* Set up an identity mapping for the reboot_code_buffer */ - identity_map_page(reboot_code_buffer); - - /* copy it out */ - memcpy((void *)reboot_code_buffer, relocate_new_kernel, - relocate_new_kernel_size); + control_page = page_address(image->control_code_page); + memcpy(control_page, relocate_kernel, PAGE_SIZE); + + page_list[PA_CONTROL_PAGE] = __pa(control_page); + page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; + page_list[PA_PGD] = __pa(kexec_pgd); + page_list[VA_PGD] = (unsigned long)kexec_pgd; +#ifdef CONFIG_X86_PAE + page_list[PA_PMD_0] = __pa(kexec_pmd0); + page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; + page_list[PA_PMD_1] = __pa(kexec_pmd1); + page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; +#endif + page_list[PA_PTE_0] = __pa(kexec_pte0); + page_list[VA_PTE_0] = (unsigned long)kexec_pte0; + page_list[PA_PTE_1] = __pa(kexec_pte1); + page_list[VA_PTE_1] = (unsigned long)kexec_pte1; /* The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is @@ -207,8 +144,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) set_idt(phys_to_virt(0),0); /* now call it */ - rnk = (relocate_new_kernel_t) reboot_code_buffer; - (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); + relocate_kernel((unsigned long)image->head, (unsigned long)page_list, + image->start, cpu_has_pae); } /* crashkernel=size@addr specifies the location to reserve for diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S index d312616effa..f151d6fae46 100644 --- a/arch/i386/kernel/relocate_kernel.S +++ b/arch/i386/kernel/relocate_kernel.S @@ -7,16 +7,138 @@ */ #include +#include +#include + +/* + * Must be relocatable PIC code callable as a C function + */ + +#define PTR(x) (x << 2) +#define PAGE_ALIGNED (1 << PAGE_SHIFT) +#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ +#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ + + .text + .align PAGE_ALIGNED + .globl relocate_kernel +relocate_kernel: + movl 8(%esp), %ebp /* list of pages */ + +#ifdef CONFIG_X86_PAE + /* map the control page at its virtual address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0xc0000000, %eax + shrl $27, %eax + addl %edi, %eax + + movl PTR(PA_PMD_0)(%ebp), %edx + orl $PAE_PGD_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PMD_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x3fe00000, %eax + shrl $18, %eax + addl %edi, %eax + + movl PTR(PA_PTE_0)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x001ff000, %eax + shrl $9, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + /* identity map the control page at its physical address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0xc0000000, %eax + shrl $27, %eax + addl %edi, %eax + + movl PTR(PA_PMD_1)(%ebp), %edx + orl $PAE_PGD_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PMD_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x3fe00000, %eax + shrl $18, %eax + addl %edi, %eax + + movl PTR(PA_PTE_1)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x001ff000, %eax + shrl $9, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) +#else + /* map the control page at its virtual address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0xffc00000, %eax + shrl $20, %eax + addl %edi, %eax + + movl PTR(PA_PTE_0)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_0)(%ebp), %edi + movl PTR(VA_CONTROL_PAGE)(%ebp), %eax + andl $0x003ff000, %eax + shrl $10, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + /* identity map the control page at its physical address */ + + movl PTR(VA_PGD)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0xffc00000, %eax + shrl $20, %eax + addl %edi, %eax + + movl PTR(PA_PTE_1)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) + + movl PTR(VA_PTE_1)(%ebp), %edi + movl PTR(PA_CONTROL_PAGE)(%ebp), %eax + andl $0x003ff000, %eax + shrl $10, %eax + addl %edi, %eax + + movl PTR(PA_CONTROL_PAGE)(%ebp), %edx + orl $PAGE_ATTR, %edx + movl %edx, (%eax) +#endif - /* - * Must be relocatable PIC code callable as a C function, that once - * it starts can not use the previous processes stack. - */ - .globl relocate_new_kernel relocate_new_kernel: /* read the arguments and say goodbye to the stack */ movl 4(%esp), %ebx /* page_list */ - movl 8(%esp), %ebp /* reboot_code_buffer */ + movl 8(%esp), %ebp /* list of pages */ movl 12(%esp), %edx /* start address */ movl 16(%esp), %ecx /* cpu_has_pae */ @@ -24,11 +146,26 @@ relocate_new_kernel: pushl $0 popfl - /* set a new stack at the bottom of our page... */ - lea 4096(%ebp), %esp + /* get physical address of control page now */ + /* this is impossible after page table switch */ + movl PTR(PA_CONTROL_PAGE)(%ebp), %edi - /* store the parameters back on the stack */ - pushl %edx /* store the start address */ + /* switch to new set of page tables */ + movl PTR(PA_PGD)(%ebp), %eax + movl %eax, %cr3 + + /* setup a new stack at the end of the physical control page */ + lea 4096(%edi), %esp + + /* jump to identity mapped page */ + movl %edi, %eax + addl $(identity_mapped - relocate_kernel), %eax + pushl %eax + ret + +identity_mapped: + /* store the start address on the stack */ + pushl %edx /* Set cr0 to a known state: * 31 0 == Paging disabled @@ -113,8 +250,3 @@ relocate_new_kernel: xorl %edi, %edi xorl %ebp, %ebp ret -relocate_new_kernel_end: - - .globl relocate_new_kernel_size -relocate_new_kernel_size: - .long relocate_new_kernel_end - relocate_new_kernel -- cgit v1.2.3 From 0da5db313317e3195482d3e660a1074857374a89 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Sep 2006 10:52:39 +0200 Subject: [PATCH] i386: Abstract sensitive instructions Abstract sensitive instructions in assembler code, replacing them with macros (which currently are #defined to the native versions). We use long names: assembler is case-insensitive, so if something goes wrong and macros do not expand, it would assemble anyway. Resulting object files are exactly the same as before. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/entry.S | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 4b084524922..3872fca5c74 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -76,8 +76,15 @@ DF_MASK = 0x00000400 NT_MASK = 0x00004000 VM_MASK = 0x00020000 +/* These are replaces for paravirtualization */ +#define DISABLE_INTERRUPTS cli +#define ENABLE_INTERRUPTS sti +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +#define INTERRUPT_RETURN iret +#define GET_CR0_INTO_EAX movl %cr0, %eax + #ifdef CONFIG_PREEMPT -#define preempt_stop cli; TRACE_IRQS_OFF +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF #else #define preempt_stop #define resume_kernel restore_nocheck @@ -236,7 +243,7 @@ check_userspace: testl $(VM_MASK | 3), %eax jz resume_kernel ENTRY(resume_userspace) - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -247,7 +254,7 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - cli + DISABLE_INTERRUPTS cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: @@ -275,7 +282,7 @@ sysenter_past_esp: * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - sti + ENABLE_INTERRUPTS pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ss, 0*/ @@ -320,7 +327,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) - cli + DISABLE_INTERRUPTS TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx @@ -330,8 +337,7 @@ sysenter_past_esp: movl OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON - sti - sysexit + ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC @@ -356,7 +362,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) # store the return value syscall_exit: - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -381,11 +387,11 @@ restore_nocheck_notrace: RESTORE_REGS addl $4, %esp CFI_ADJUST_CFA_OFFSET -4 -1: iret +1: INTERRUPT_RETURN .section .fixup,"ax" iret_exc: TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -409,7 +415,7 @@ ldt_ss: * dosemu and wine happy. */ subl $8, %esp # reserve space for switch16 pointer CFI_ADJUST_CFA_OFFSET 8 - cli + DISABLE_INTERRUPTS TRACE_IRQS_OFF movl %esp, %eax /* Set up the 16bit stack frame with switch32 pointer on top, @@ -419,7 +425,7 @@ ldt_ss: TRACE_IRQS_IRET RESTORE_REGS lss 20+4(%esp), %esp # switch to 16bit stack -1: iret +1: INTERRUPT_RETURN .section __ex_table,"a" .align 4 .long 1b,iret_exc @@ -434,7 +440,7 @@ work_pending: jz work_notifysig work_resched: call schedule - cli # make sure we don't miss an interrupt + DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -490,7 +496,7 @@ syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending TRACE_IRQS_ON - sti # could let do_syscall_trace() call + ENABLE_INTERRUPTS # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -668,7 +674,7 @@ ENTRY(device_not_available) pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL - movl %cr0, %eax + GET_CR0_INTO_EAX testl $0x4, %eax # EM (math emulation bit) jne device_not_available_emulate preempt_stop @@ -811,7 +817,7 @@ nmi_16bit_stack: call do_nmi RESTORE_REGS lss 12+4(%esp), %esp # back to 16bit stack -1: iret +1: INTERRUPT_RETURN CFI_ENDPROC .section __ex_table,"a" .align 4 -- cgit v1.2.3 From 78be3706b21a232310590fe00258b224177ac05f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 26 Sep 2006 10:52:39 +0200 Subject: [PATCH] i386: Allow a kernel not to be in ring 0 We allow for the fact that the guest kernel may not run in ring 0. This requires some abstraction in a few places when setting %cs or checking privilege level (user vs kernel). This is Chris' [RFC PATCH 15/33] move segment checks to subarch, except rather than using #define USER_MODE_MASK which depends on a config option, we use Zach's more flexible approach of assuming ring 3 == userspace. I also used "get_kernel_rpl()" over "get_kernel_cs()" because I think it reads better in the code... 1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3 2) Add a get_kernel_rpl() macro, and don't assume it's zero. And: Clean up of patch for letting kernel run other than ring 0: a. Add some comments about the SEGMENT_IS_*_CODE() macros. b. Add a USER_RPL macro. (Code was comparing a value to a mask in some places and to the magic number 3 in other places.) c. Add macros for table indicator field and use them. d. Change the entry.S tests for LDT stack segment to use the macros Signed-off-by: Rusty Russell Signed-off-by: Zachary Amsden Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/entry.S | 9 +++++---- arch/i386/kernel/process.c | 2 +- arch/i386/mm/extable.c | 2 +- arch/i386/mm/fault.c | 11 ++++------- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3872fca5c74..284f2e908ad 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -240,8 +240,9 @@ ret_from_intr: check_userspace: movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al - testl $(VM_MASK | 3), %eax - jz resume_kernel + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax + cmpl $USER_RPL, %eax + jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -377,8 +378,8 @@ restore_all: # See comments in process.c:copy_thread() for details. movb OLDSS(%esp), %ah movb CS(%esp), %al - andl $(VM_MASK | (4 << 8) | 3), %eax - cmpl $((4 << 8) | 3), %eax + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 220aeca59c3..8c190ca7ae4 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -338,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xes = __USER_DS; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; - regs.xcs = __KERNEL_CS; + regs.xcs = __KERNEL_CS | get_kernel_rpl(); regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; /* Ok, create the new process.. */ diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c index de03c5430ab..0ce4f22a263 100644 --- a/arch/i386/mm/extable.c +++ b/arch/i386/mm/extable.c @@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs) const struct exception_table_entry *fixup; #ifdef CONFIG_PNPBIOS - if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3))) + if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 0ce86168a0b..5e17a3f43b4 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -27,6 +27,7 @@ #include #include #include +#include extern void die(const char *,struct pt_regs *,long); @@ -113,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, } /* The standard kernel/user address space limit. */ - *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; /* By far the most common cases. */ - if (likely(seg == __USER_CS || seg == __KERNEL_CS)) + if (likely(SEGMENT_IS_FLAT_CODE(seg))) return eip; /* Check the segment exists, is within the current LDT/GDT size, @@ -430,11 +431,7 @@ good_area: write = 0; switch (error_code & 3) { default: /* 3: write, present */ -#ifdef TEST_VERIFY_AREA - if (regs->cs == KERNEL_CS) - printk("WP fault at %08lx\n", regs->eip); -#endif - /* fall through */ + /* fall through */ case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto bad_area; -- cgit v1.2.3 From ec5c09269ba9cd3b9cf879390db6d5c7dcdcaf1e Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Tue, 26 Sep 2006 10:52:39 +0200 Subject: [PATCH] i386: Do better early exception handlers Add early i386 fault handlers with debug information for common faults. Handles: divide error invalid opcode protection fault page fault Also adds code to detect early recursive/multiple faults and halt the system when they happen (taken from x86_64.) Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Ingo Molnar Signed-off-by: Andrew Morton --- arch/i386/kernel/head.S | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index a6b8bd89aa2..be9d883c62c 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -371,8 +371,65 @@ rp_sidt: addl $8,%edi dec %ecx jne rp_sidt + +.macro set_early_handler handler,trapno + lea \handler,%edx + movl $(__KERNEL_CS << 16),%eax + movw %dx,%ax + movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ + lea idt_table,%edi + movl %eax,8*\trapno(%edi) + movl %edx,8*\trapno+4(%edi) +.endm + + set_early_handler handler=early_divide_err,trapno=0 + set_early_handler handler=early_illegal_opcode,trapno=6 + set_early_handler handler=early_protection_fault,trapno=13 + set_early_handler handler=early_page_fault,trapno=14 + ret +early_divide_err: + xor %edx,%edx + pushl $0 /* fake errcode */ + jmp early_fault + +early_illegal_opcode: + movl $6,%edx + pushl $0 /* fake errcode */ + jmp early_fault + +early_protection_fault: + movl $13,%edx + jmp early_fault + +early_page_fault: + movl $14,%edx + jmp early_fault + +early_fault: + cld +#ifdef CONFIG_PRINTK + movl $(__KERNEL_DS),%eax + movl %eax,%ds + movl %eax,%es + cmpl $2,early_recursion_flag + je hlt_loop + incl early_recursion_flag + movl %cr2,%eax + pushl %eax + pushl %edx /* trapno */ + pushl $fault_msg +#ifdef CONFIG_EARLY_PRINTK + call early_printk +#else + call printk +#endif +#endif +hlt_loop: + hlt + jmp hlt_loop + /* This is the default interrupt "handler" :-) */ ALIGN ignore_int: @@ -386,6 +443,9 @@ ignore_int: movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es + cmpl $2,early_recursion_flag + je hlt_loop + incl early_recursion_flag pushl 16(%esp) pushl 24(%esp) pushl 32(%esp) @@ -431,9 +491,16 @@ ENTRY(stack_start) ready: .byte 0 +early_recursion_flag: + .long 0 + int_msg: .asciz "Unknown interrupt or fault at EIP %p %p %p\n" +fault_msg: + .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" + .asciz "Stack: %p %p %p %p %p %p %p %p\n" + /* * The IDT and GDT 'descriptors' are a strange 48-bit object * only used by the lidt and lgdt instructions. They are not -- cgit v1.2.3 From f0f4c3432e5e1087b3a8c0e6bd4113d3c37497ff Mon Sep 17 00:00:00 2001 From: "adurbin@google.com" Date: Tue, 26 Sep 2006 10:52:39 +0200 Subject: [PATCH] i386: add HPET(s) into resource map Add HPET(s) into resource map. This will allow for the HPET(s) to be visibile within /proc/iomem. Signed-off-by: Aaron Durbin Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/boot.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 8a6c5b41234..1aaea6ab8c4 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include @@ -646,6 +648,8 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) { struct acpi_table_hpet *hpet_tbl; + struct resource *hpet_res; + resource_size_t res_start; if (!phys || !size) return -EINVAL; @@ -661,12 +665,26 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) "memory.\n"); return -1; } + +#define HPET_RESOURCE_NAME_SIZE 9 + hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); + if (hpet_res) { + memset(hpet_res, 0, sizeof(*hpet_res)); + hpet_res->name = (void *)&hpet_res[1]; + hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, + "HPET %u", hpet_tbl->number); + hpet_res->end = (1 * 1024) - 1; + } + #ifdef CONFIG_X86_64 vxtime.hpet_address = hpet_tbl->addr.addrl | ((long)hpet_tbl->addr.addrh << 32); printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, vxtime.hpet_address); + + res_start = vxtime.hpet_address; #else /* X86 */ { extern unsigned long hpet_address; @@ -674,9 +692,17 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) hpet_address = hpet_tbl->addr.addrl; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet_address); + + res_start = hpet_address; } #endif /* X86 */ + if (hpet_res) { + hpet_res->start = res_start; + hpet_res->end += res_start; + insert_resource(&iomem_resource, hpet_res); + } + return 0; } #else -- cgit v1.2.3 From 371c2f2783eb880df333d0e9ac35c78143e79d0a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 26 Sep 2006 10:52:40 +0200 Subject: [PATCH] i386: Remove experimental mark of kexec kexec has been marked experimental for a year now and all of the serious problems have been worked through. So it is time (if not past time) to remove the experimental mark. Signed-off-by: Eric W. Biederman Signed-off-by: Andi Kleen --- arch/i386/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index e34a4dbd6b9..9b6cddc8c51 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -739,8 +739,7 @@ config SECCOMP source kernel/Kconfig.hz config KEXEC - bool "kexec system call (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "kexec system call" help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot -- cgit v1.2.3 From 5e544d618f0fb21011f36f28d5e3952b9dc109d2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:40 +0200 Subject: [PATCH] i386/x86-64: PCI: split probing and initialization of type 1 config space access First probe if type1/2 accesses work, but then only initialize them at the end. This is useful for a later patch that needs this information inbetween. Signed-off-by: Andi Kleen --- arch/i386/pci/direct.c | 25 ++++++++++++++++--------- arch/i386/pci/init.c | 9 +++++++-- arch/i386/pci/mmconfig.c | 2 +- arch/i386/pci/pci.h | 6 ++++-- 4 files changed, 28 insertions(+), 14 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c index 5d81fb51037..5acf0b4743c 100644 --- a/arch/i386/pci/direct.c +++ b/arch/i386/pci/direct.c @@ -254,7 +254,16 @@ static int __init pci_check_type2(void) return works; } -void __init pci_direct_init(void) +void __init pci_direct_init(int type) +{ + printk(KERN_INFO "PCI: Using configuration type %d\n", type); + if (type == 1) + raw_pci_ops = &pci_direct_conf1; + else + raw_pci_ops = &pci_direct_conf2; +} + +int __init pci_direct_probe(void) { struct resource *region, *region2; @@ -264,19 +273,16 @@ void __init pci_direct_init(void) if (!region) goto type2; - if (pci_check_type1()) { - printk(KERN_INFO "PCI: Using configuration type 1\n"); - raw_pci_ops = &pci_direct_conf1; - return; - } + if (pci_check_type1()) + return 1; release_resource(region); type2: if ((pci_probe & PCI_PROBE_CONF2) == 0) - return; + return 0; region = request_region(0xCF8, 4, "PCI conf2"); if (!region) - return; + return 0; region2 = request_region(0xC000, 0x1000, "PCI conf2"); if (!region2) goto fail2; @@ -284,10 +290,11 @@ void __init pci_direct_init(void) if (pci_check_type2()) { printk(KERN_INFO "PCI: Using configuration type 2\n"); raw_pci_ops = &pci_direct_conf2; - return; + return 2; } release_resource(region2); fail2: release_resource(region); + return 0; } diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c index 51087a9d917..d028e1b05c3 100644 --- a/arch/i386/pci/init.c +++ b/arch/i386/pci/init.c @@ -6,8 +6,13 @@ in the right sequence from here. */ static __init int pci_access_init(void) { + int type = 0; + +#ifdef CONFIG_PCI_DIRECT + type = pci_direct_probe(); +#endif #ifdef CONFIG_PCI_MMCONFIG - pci_mmcfg_init(); + pci_mmcfg_init(type); #endif if (raw_pci_ops) return 0; @@ -21,7 +26,7 @@ static __init int pci_access_init(void) * fails. */ #ifdef CONFIG_PCI_DIRECT - pci_direct_init(); + pci_direct_init(type); #endif return 0; } diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 972180f738d..44155c5e85d 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -187,7 +187,7 @@ static __init void unreachable_devices(void) } } -void __init pci_mmcfg_init(void) +void __init pci_mmcfg_init(int type) { if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index bf4e7933538..8a7cf1f2368 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h @@ -81,7 +81,9 @@ extern int pci_conf1_write(unsigned int seg, unsigned int bus, extern int pci_conf1_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value); -extern void pci_direct_init(void); +extern int pci_direct_probe(void); +extern void pci_direct_init(int type); extern void pci_pcbios_init(void); -extern void pci_mmcfg_init(void); +extern void pci_mmcfg_init(int type); extern void pcibios_sort(void); + -- cgit v1.2.3 From 9abd79280bbb9f56049f0168f412c3538cadb6eb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:40 +0200 Subject: [PATCH] i386/x86-64: Only do MCFG e820 check when type 1 works Needs earlier patch to split type 1 probing from use. This patch should fix the x86 macs where type 1 PCI config space access doesn't work, but MCFG does. They also don't have a usable e820 table so the e820 sanity check failed. Instead assume now that if type 1 doesn't work then MCFG must work and don't do the e820 check. Signed-off-by: Andi Kleen --- arch/i386/pci/mmconfig.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 44155c5e85d..2c4f585510c 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -198,7 +198,9 @@ void __init pci_mmcfg_init(int type) (pci_mmcfg_config[0].base_address == 0)) return; - if (!e820_all_mapped(pci_mmcfg_config[0].base_address, + /* Only do this check when type 1 works. If it doesn't work + assume we run on a Mac and always use MCFG */ + if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].base_address, pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, E820_RESERVED)) { printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", -- cgit v1.2.3 From de09bddb9d6f96785be470c832b881e6d72d589f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:40 +0200 Subject: [PATCH] i386: Add MMCFG resources to i386 too Following earlier x86-64 patch Cc: gregkh@suse.de Signed-off-by: Andi Kleen --- arch/i386/pci/mmconfig.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 2c4f585510c..05be8db58a8 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c @@ -151,6 +151,38 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; + +static __init void pci_mmcfg_insert_resources(void) +{ +#define PCI_MMCFG_RESOURCE_NAME_LEN 19 + int i; + struct resource *res; + char *names; + unsigned num_buses; + + res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), + pci_mmcfg_config_num, GFP_KERNEL); + + if (!res) { + printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); + return; + } + + names = (void *)&res[pci_mmcfg_config_num]; + for (i = 0; i < pci_mmcfg_config_num; i++, res++) { + num_buses = pci_mmcfg_config[i].end_bus_number - + pci_mmcfg_config[i].start_bus_number + 1; + res->name = names; + snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u", + pci_mmcfg_config[i].pci_segment_group_number); + res->start = pci_mmcfg_config[i].base_address; + res->end = res->start + (num_buses << 20) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + insert_resource(&iomem_resource, res); + names += PCI_MMCFG_RESOURCE_NAME_LEN; + } +} + /* K8 systems have some devices (typically in the builtin northbridge) that are only accessible using type1 Normally this can be expressed in the MCFG by not listing them @@ -187,6 +219,8 @@ static __init void unreachable_devices(void) } } + + void __init pci_mmcfg_init(int type) { if ((pci_probe & PCI_PROBE_MMCONF) == 0) @@ -214,4 +248,5 @@ void __init pci_mmcfg_init(int type) pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; unreachable_devices(); + pci_mmcfg_insert_resources(); } -- cgit v1.2.3 From f354b3a92af9b132b188b9c8ebbfb74de699926d Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] i386: Split multi-line printk in oops output. Sometimes, bug reports come in where we've had an oops, and the only record we have is what the reporter saw on screen shortly before the system locked up completely. Unfortunatly, syslog only prints lines beginning with KERN_EMERG to the console, so some lines get lost. An example of this can be seen at https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=203723 Some of this information isn't vital to diagnosis, but some parts are useful, such as the tainted flag. Signed-off-by: Dave Jones Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 5c0f4960c67..c7adb076e81 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -351,8 +351,9 @@ void show_registers(struct pt_regs *regs) ss = regs->xss & 0xffff; } print_modules(); - printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" - "EFLAGS: %08lx (%s %.*s) \n", + printk(KERN_EMERG "CPU: %d\n" + KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" + KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags, system_utsname.release, (int)strcspn(system_utsname.version, " "), -- cgit v1.2.3 From 35d534a3ff4905c0a7bddbad0fc9df55967742c7 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] x86: - restore i8259A eoi status on resume Got it. i8259A_resume calls init_8259A(0) unconditionally, even if auto_eoi has been set. Keep track of the current status and restore that on resume. This fixes it for AMD64 and i386. Signed-off-by: Matthew Garrett Signed-off-by: Andi Kleen --- arch/i386/kernel/i8259.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index d4756d154f4..ea5f4e7958d 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -45,6 +45,8 @@ static void end_8259A_irq (unsigned int irq) #define shutdown_8259A_irq disable_8259A_irq +static int i8259A_auto_eoi; + static void mask_and_ack_8259A(unsigned int); unsigned int startup_8259A_irq(unsigned int irq) @@ -253,7 +255,7 @@ static void save_ELCR(char *trigger) static int i8259A_resume(struct sys_device *dev) { - init_8259A(0); + init_8259A(i8259A_auto_eoi); restore_ELCR(irq_trigger); return 0; } @@ -301,6 +303,8 @@ void init_8259A(int auto_eoi) { unsigned long flags; + i8259A_auto_eoi = auto_eoi; + spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ -- cgit v1.2.3 From adf1423698f00d00b267f7dca8231340ce7d65ef Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] i386/x86-64: Work around gcc bug with noreturn functions in unwinder Current gcc generates calls not jumps to noreturn functions. When that happens the return address can point to the next function, which confuses the unwinder. This patch works around it by marking asynchronous exception frames in contrast normal call frames in the unwind information. Then teach the unwinder to decode this. For normal call frames the unwinder now subtracts one from the address which avoids this problem. The standard libgcc unwinder uses the same trick. It doesn't include adjustment of the printed address (i.e. for the original example, it'd still be kernel_math_error+0 that gets displayed, but the unwinder wouldn't get confused anymore. This only works with binutils 2.6.17+ and some versions of H.J.Lu's 2.6.16 unfortunately because earlier binutils don't support .cfi_signal_frame [AK: added automatic detection of the new binutils and wrote description] Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/Makefile | 4 ++++ arch/i386/kernel/entry.S | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 508cdbeb3a0..7cc0b189b82 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -50,6 +50,10 @@ CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-op cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +# is .cfi_signal_frame supported too? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,) + CFLAGS += $(cflags-y) # Default subarch .c files diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 284f2e908ad..5a63d6fdb70 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -183,18 +183,21 @@ VM_MASK = 0x00020000 #define RING0_INT_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 3*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_EC_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, 4*4;\ /*CFI_OFFSET cs, -2*4;*/\ CFI_OFFSET eip, -3*4 #define RING0_PTREGS_FRAME \ CFI_STARTPROC simple;\ + CFI_SIGNAL_FRAME;\ CFI_DEF_CFA esp, OLDESP-EBX;\ /*CFI_OFFSET cs, CS-OLDESP;*/\ CFI_OFFSET eip, EIP-OLDESP;\ @@ -275,6 +278,7 @@ need_resched: # sysenter call handler stub ENTRY(sysenter_entry) CFI_STARTPROC simple + CFI_SIGNAL_FRAME CFI_DEF_CFA esp, 0 CFI_REGISTER esp, ebp movl TSS_sysenter_esp0(%esp),%esp -- cgit v1.2.3 From f157cbb1eb9ce3f33a401ec6d20eb3eb852351a3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] i386/x86-64: Make all early PCI scans dependent on CONFIG_PCI This is useful on systems with broken PCI bus. Affects various scans in x86-64 and i386's early ACPI quirk scan. Cc: gregkh@suse.de Cc: len.brown@intel.com Cc: Trammell Hudson Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/Makefile | 2 ++ arch/i386/kernel/setup.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index 7e9ac99354f..7f7be01f44e 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_ACPI) += boot.o +ifneq ($(CONFIG_PCI),) obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o +endif obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o ifneq ($(CONFIG_ACPI_PROCESSOR),) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index ea17567dbe7..7a99b1369fa 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1437,9 +1437,11 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); #endif +#ifdef CONFIG_PCI #ifdef CONFIG_X86_IO_APIC check_acpi_pci(); /* Checks more than just ACPI actually */ #endif +#endif #ifdef CONFIG_ACPI acpi_boot_init(); -- cgit v1.2.3 From 8f60774a116ced9b73ae3913d511687889efe725 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] x86: Move direct PCI scanning functions out of line Saves about 200 bytes of code space. Signed-off-by: Andi Kleen --- arch/i386/pci/Makefile | 2 +- arch/i386/pci/early.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 arch/i386/pci/early.c (limited to 'arch/i386') diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile index 62ad75c57e6..1594d2f55c8 100644 --- a/arch/i386/pci/Makefile +++ b/arch/i386/pci/Makefile @@ -11,4 +11,4 @@ pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o -obj-y += $(pci-y) common.o +obj-y += $(pci-y) common.o early.o diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c new file mode 100644 index 00000000000..b1f7f40d809 --- /dev/null +++ b/arch/i386/pci/early.c @@ -0,0 +1,44 @@ +#include +#include +#include + +/* Direct PCI access. This is used for PCI accesses in early boot before + the PCI subsystem works. */ + +#define PDprintk(x...) + +u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) +{ + u32 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inl(0xcfc); + if (v != 0xffffffff) + PDprintk("%x reading 4 from %x: %x\n", slot, offset, v); + return v; +} + +u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) +{ + u8 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inb(0xcfc + (offset&3)); + PDprintk("%x reading 1 from %x: %x\n", slot, offset, v); + return v; +} + +u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) +{ + u16 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inw(0xcfc + (offset&2)); + PDprintk("%x reading 2 from %x: %x\n", slot, offset, v); + return v; +} + +void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, + u32 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outl(val, 0xcfc); +} -- cgit v1.2.3 From 0637a70a5db98182d9ad3d6ae1ee30acf20afde9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 26 Sep 2006 10:52:41 +0200 Subject: [PATCH] x86: Allow disabling early pci scans with pci=noearly or disallowing conf1 Some buggy systems can machine check when config space accesses happen for some non existent devices. i386/x86-64 do some early device scans that might trigger this. Allow pci=noearly to disable this. Also when type 1 is disabling also don't do any early accesses which are always type1. This moves the pci= configuration parsing to be a early parameter. I don't think this can break anything because it only changes a single global that is only used by PCI. Cc: gregkh@suse.de Cc: Trammell Hudson Signed-off-by: Andi Kleen --- arch/i386/kernel/acpi/earlyquirk.c | 6 +++++- arch/i386/pci/common.c | 4 ++++ arch/i386/pci/early.c | 8 ++++++++ arch/i386/pci/pci.h | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 1649a175a20..fe799b11ac0 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -48,7 +48,11 @@ void __init check_acpi_pci(void) int num, slot, func; /* Assume the machine supports type 1. If not it will - always read ffffffff and should not have any side effect. */ + always read ffffffff and should not have any side effect. + Actually a few buggy systems can machine check. Allow the user + to disable it by command line option at least -AK */ + if (!early_pci_allowed()) + return; /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) { diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index 0a362e3aeac..68bce194e68 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -242,6 +242,10 @@ char * __devinit pcibios_setup(char *str) acpi_noirq_set(); return NULL; } + else if (!strcmp(str, "noearly")) { + pci_probe |= PCI_PROBE_NOEARLY; + return NULL; + } #ifndef CONFIG_X86_VISWS else if (!strcmp(str, "usepirqmask")) { pci_probe |= PCI_USE_PIRQ_MASK; diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c index b1f7f40d809..713d6c866ca 100644 --- a/arch/i386/pci/early.c +++ b/arch/i386/pci/early.c @@ -1,6 +1,8 @@ #include +#include #include #include +#include "pci.h" /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ @@ -42,3 +44,9 @@ void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outl(val, 0xcfc); } + +int early_pci_allowed(void) +{ + return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == + PCI_PROBE_CONF1; +} diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index 8a7cf1f2368..1814f74569c 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h @@ -17,6 +17,7 @@ #define PCI_PROBE_CONF2 0x0004 #define PCI_PROBE_MMCONF 0x0008 #define PCI_PROBE_MASK 0x000f +#define PCI_PROBE_NOEARLY 0x0010 #define PCI_NO_SORT 0x0100 #define PCI_BIOS_SORT 0x0200 -- cgit v1.2.3 From 15d5f8398311f565682959daaca30e3ca7aea600 Mon Sep 17 00:00:00 2001 From: Dmitriy Zavin Date: Tue, 26 Sep 2006 10:52:42 +0200 Subject: [PATCH] x86: Refactor thermal throttle processing Refactor the event processing (syslog messaging and rate limiting) into separate file therm_throt.c. This allows consistent reporting of CPU thermal throttle events. After ACK'ing the interrupt, if the event is current, the user (p4.c/mce_intel.c) calls therm_throt_process to log (and rate limit) the event. If that function returns 1, the user has the option to log things further (such as to mce_log in x86_64). AK: minor cleanup Signed-off-by: Dmitriy Zavin Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/mcheck/Makefile | 2 +- arch/i386/kernel/cpu/mcheck/p4.c | 23 ++++-------- arch/i386/kernel/cpu/mcheck/therm_throt.c | 58 +++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 arch/i386/kernel/cpu/mcheck/therm_throt.c (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile index 30808f3d671..f1ebe1c1c17 100644 --- a/arch/i386/kernel/cpu/mcheck/Makefile +++ b/arch/i386/kernel/cpu/mcheck/Makefile @@ -1,2 +1,2 @@ -obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o +obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index b95f1b3d53a..d83a669d376 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -13,6 +13,8 @@ #include #include +#include + #include "mce.h" /* as supported by the P4/Xeon family */ @@ -44,25 +46,12 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs) /* P4/Xeon Thermal transition interrupt handler */ static void intel_thermal_interrupt(struct pt_regs *regs) { - u32 l, h; - unsigned int cpu = smp_processor_id(); - static unsigned long next[NR_CPUS]; + __u64 msr_val; ack_APIC_irq(); - if (time_after(next[cpu], jiffies)) - return; - - next[cpu] = jiffies + HZ*5; - rdmsr(MSR_IA32_THERM_STATUS, l, h); - if (l & 0x1) { - printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); - printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", - cpu); - add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); - } + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + therm_throt_process(msr_val & 0x1); } /* Thermal interrupt handler for this CPU setup */ @@ -122,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) rdmsr (MSR_IA32_MISC_ENABLE, l, h); wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); - + l = apic_read (APIC_LVTTHMR); apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c new file mode 100644 index 00000000000..85eba00d680 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -0,0 +1,58 @@ +/* + * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c + * + * Thermal throttle event support code. + * + * Author: Dmitriy Zavin (dmitriyz@google.com) + * + * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. + * + */ + +#include +#include +#include +#include +#include + +/* How long to wait between reporting thermal events */ +#define CHECK_INTERVAL (300 * HZ) + +static DEFINE_PER_CPU(unsigned long, next_check); + +/*** + * therm_throt_process - Process thermal throttling event + * @curr: Whether the condition is current or not (boolean), since the + * thermal interrupt normally gets called both when the thermal + * event begins and once the event has ended. + * + * This function is normally called by the thermal interrupt after the + * IRQ has been acknowledged. + * + * It will take care of rate limiting and printing messages to the syslog. + * + * Returns: 0 : Event should NOT be further logged, i.e. still in + * "timeout" from previous log message. + * 1 : Event should be logged further, and a message has been + * printed to the syslog. + */ +int therm_throt_process(int curr) +{ + unsigned int cpu = smp_processor_id(); + + if (time_before(jiffies, __get_cpu_var(next_check))) + return 0; + + __get_cpu_var(next_check) = jiffies + CHECK_INTERVAL; + + /* if we just entered the thermal event */ + if (curr) { + printk(KERN_CRIT "CPU%d: Temperature above threshold, " + "cpu clock throttled\n", cpu); + add_taint(TAINT_MACHINE_CHECK); + } else { + printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + } + + return 1; +} -- cgit v1.2.3 From 66aea9913cf435fe92ebb7bf869b4f15901ab993 Mon Sep 17 00:00:00 2001 From: Dmitriy Zavin Date: Tue, 26 Sep 2006 10:52:42 +0200 Subject: [PATCH] i386: Make the jiffies compares use the 64bit safe macros. Signed-off-by: Dmitriy Zavin Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/mcheck/therm_throt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index 85eba00d680..101f7ace00c 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -18,7 +18,7 @@ /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) -static DEFINE_PER_CPU(unsigned long, next_check); +static DEFINE_PER_CPU(__u64, next_check); /*** * therm_throt_process - Process thermal throttling event @@ -39,11 +39,12 @@ static DEFINE_PER_CPU(unsigned long, next_check); int therm_throt_process(int curr) { unsigned int cpu = smp_processor_id(); + __u64 tmp_jiffs = get_jiffies_64(); - if (time_before(jiffies, __get_cpu_var(next_check))) + if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) return 0; - __get_cpu_var(next_check) = jiffies + CHECK_INTERVAL; + __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; /* if we just entered the thermal event */ if (curr) { -- cgit v1.2.3 From 3222b36f46c22f46697a0a53fa8804153a32669f Mon Sep 17 00:00:00 2001 From: Dmitriy Zavin Date: Tue, 26 Sep 2006 10:52:42 +0200 Subject: [PATCH] x86: Add a cumulative thermal throttle event counter. The counter is exported to /sys that keeps track of the number of thermal events, such that the user knows how bad the thermal problem might be (since the logging to syslog and mcelog is rate limited). AK: Fixed cpu hotplug locking Signed-off-by: Dmitriy Zavin Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/mcheck/p4.c | 3 + arch/i386/kernel/cpu/mcheck/therm_throt.c | 133 ++++++++++++++++++++++++++++-- 2 files changed, 130 insertions(+), 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index d83a669d376..504434a4601 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -115,6 +115,9 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) l = apic_read (APIC_LVTTHMR); apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); return; } #endif /* CONFIG_X86_MCE_P4THERMAL */ diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index 101f7ace00c..4f43047de40 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -1,15 +1,22 @@ /* * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c * - * Thermal throttle event support code. + * Thermal throttle event support code (such as syslog messaging and rate + * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). + * This allows consistent reporting of CPU thermal throttle events. + * + * Maintains a counter in /sys that keeps track of the number of thermal + * events, such that the user knows how bad the thermal problem might be + * (since the logging to syslog and mcelog is rate limited). * * Author: Dmitriy Zavin (dmitriyz@google.com) * * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. - * + * Inspired by Ross Biro's and Al Borchers' counter code. */ #include +#include #include #include #include @@ -18,15 +25,53 @@ /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) -static DEFINE_PER_CPU(__u64, next_check); +static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; +static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +atomic_t therm_throt_en = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +#define define_therm_throt_sysdev_one_ro(_name) \ + static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) + +#define define_therm_throt_sysdev_show_func(name) \ +static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ + char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + ssize_t ret; \ + \ + preempt_disable(); /* CPU hotplug */ \ + if (cpu_online(cpu)) \ + ret = sprintf(buf, "%lu\n", \ + per_cpu(thermal_throttle_##name, cpu)); \ + else \ + ret = 0; \ + preempt_enable(); \ + \ + return ret; \ +} + +define_therm_throt_sysdev_show_func(count); +define_therm_throt_sysdev_one_ro(count); + +static struct attribute *thermal_throttle_attrs[] = { + &attr_count.attr, + NULL +}; + +static struct attribute_group thermal_throttle_attr_group = { + .attrs = thermal_throttle_attrs, + .name = "thermal_throttle" +}; +#endif /* CONFIG_SYSFS */ /*** - * therm_throt_process - Process thermal throttling event + * therm_throt_process - Process thermal throttling event from interrupt * @curr: Whether the condition is current or not (boolean), since the * thermal interrupt normally gets called both when the thermal * event begins and once the event has ended. * - * This function is normally called by the thermal interrupt after the + * This function is called by the thermal interrupt after the * IRQ has been acknowledged. * * It will take care of rate limiting and printing messages to the syslog. @@ -41,6 +86,9 @@ int therm_throt_process(int curr) unsigned int cpu = smp_processor_id(); __u64 tmp_jiffs = get_jiffies_64(); + if (curr) + __get_cpu_var(thermal_throttle_count)++; + if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) return 0; @@ -49,7 +97,9 @@ int therm_throt_process(int curr) /* if we just entered the thermal event */ if (curr) { printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled\n", cpu); + "cpu clock throttled (total events = %lu)\n", cpu, + __get_cpu_var(thermal_throttle_count)); + add_taint(TAINT_MACHINE_CHECK); } else { printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); @@ -57,3 +107,74 @@ int therm_throt_process(int curr) return 1; } + +#ifdef CONFIG_SYSFS +/* Add/Remove thermal_throttle interface for CPU device */ +static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev) +{ + sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev) +{ + sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); + return 0; +} + +/* Mutex protecting device creation against CPU hotplug */ +static DEFINE_MUTEX(therm_cpu_lock); + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + mutex_lock(&therm_cpu_lock); + switch (action) { + case CPU_ONLINE: + thermal_throttle_add_dev(sys_dev); + break; + case CPU_DEAD: + thermal_throttle_remove_dev(sys_dev); + break; + } + mutex_unlock(&therm_cpu_lock); + return NOTIFY_OK; +} + +static struct notifier_block thermal_throttle_cpu_notifier = +{ + .notifier_call = thermal_throttle_cpu_callback, +}; +#endif /* CONFIG_HOTPLUG_CPU */ + +static __init int thermal_throttle_init_device(void) +{ + unsigned int cpu = 0; + + if (!atomic_read(&therm_throt_en)) + return 0; + + register_hotcpu_notifier(&thermal_throttle_cpu_notifier); + +#ifdef CONFIG_HOTPLUG_CPU + mutex_lock(&therm_cpu_lock); +#endif + /* connect live CPUs to sysfs */ + for_each_online_cpu(cpu) + thermal_throttle_add_dev(get_cpu_sysdev(cpu)); +#ifdef CONFIG_HOTPLUG_CPU + mutex_unlock(&therm_cpu_lock); +#endif + + return 0; +} + +device_initcall(thermal_throttle_init_device); +#endif /* CONFIG_SYSFS */ -- cgit v1.2.3 From dcf10307c3fff2bca4b104ad8fe4e3aa8dcb2ad1 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 26 Sep 2006 10:52:42 +0200 Subject: [PATCH] i386/x86-64: New Intel feature flags Add supplemental SSE3 instructions flag, and Direct Cache Access flag. As described in "Intel Processor idenfication and the CPUID instruction AP485 Sept 2006" AK: also added for x86-64 Signed-off-by: Dave Jones Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index f54a15268ed..76aac088a32 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -46,8 +46,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ -- cgit v1.2.3