diff options
144 files changed, 2025 insertions, 947 deletions
diff --git a/.gitignore b/.gitignore index 22fb8fa9bc3..8d14531846b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ *.s *.ko *.so +*.so.dbg *.mod.c *.i *.lst diff --git a/Documentation/device-mapper/dm-uevent.txt b/Documentation/device-mapper/dm-uevent.txt new file mode 100644 index 00000000000..07edbd85c71 --- /dev/null +++ b/Documentation/device-mapper/dm-uevent.txt @@ -0,0 +1,97 @@ +The device-mapper uevent code adds the capability to device-mapper to create +and send kobject uevents (uevents). Previously device-mapper events were only +available through the ioctl interface. The advantage of the uevents interface +is the event contains environment attributes providing increased context for +the event avoiding the need to query the state of the device-mapper device after +the event is received. + +There are two functions currently for device-mapper events. The first function +listed creates the event and the second function sends the event(s). + +void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti, + const char *path, unsigned nr_valid_paths) + +void dm_send_uevents(struct list_head *events, struct kobject *kobj) + + +The variables added to the uevent environment are: + +Variable Name: DM_TARGET +Uevent Action(s): KOBJ_CHANGE +Type: string +Description: +Value: Name of device-mapper target that generated the event. + +Variable Name: DM_ACTION +Uevent Action(s): KOBJ_CHANGE +Type: string +Description: +Value: Device-mapper specific action that caused the uevent action. + PATH_FAILED - A path has failed. + PATH_REINSTATED - A path has been reinstated. + +Variable Name: DM_SEQNUM +Uevent Action(s): KOBJ_CHANGE +Type: unsigned integer +Description: A sequence number for this specific device-mapper device. +Value: Valid unsigned integer range. + +Variable Name: DM_PATH +Uevent Action(s): KOBJ_CHANGE +Type: string +Description: Major and minor number of the path device pertaining to this +event. +Value: Path name in the form of "Major:Minor" + +Variable Name: DM_NR_VALID_PATHS +Uevent Action(s): KOBJ_CHANGE +Type: unsigned integer +Description: +Value: Valid unsigned integer range. + +Variable Name: DM_NAME +Uevent Action(s): KOBJ_CHANGE +Type: string +Description: Name of the device-mapper device. +Value: Name + +Variable Name: DM_UUID +Uevent Action(s): KOBJ_CHANGE +Type: string +Description: UUID of the device-mapper device. +Value: UUID. (Empty string if there isn't one.) + +An example of the uevents generated as captured by udevmonitor is shown +below. + +1.) Path failure. +UEVENT[1192521009.711215] change@/block/dm-3 +ACTION=change +DEVPATH=/block/dm-3 +SUBSYSTEM=block +DM_TARGET=multipath +DM_ACTION=PATH_FAILED +DM_SEQNUM=1 +DM_PATH=8:32 +DM_NR_VALID_PATHS=0 +DM_NAME=mpath2 +DM_UUID=mpath-35333333000002328 +MINOR=3 +MAJOR=253 +SEQNUM=1130 + +2.) Path reinstate. +UEVENT[1192521132.989927] change@/block/dm-3 +ACTION=change +DEVPATH=/block/dm-3 +SUBSYSTEM=block +DM_TARGET=multipath +DM_ACTION=PATH_REINSTATED +DM_SEQNUM=2 +DM_PATH=8:32 +DM_NR_VALID_PATHS=1 +DM_NAME=mpath2 +DM_UUID=mpath-35333333000002328 +MINOR=3 +MAJOR=253 +SEQNUM=1131 diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt index fe8b0c4892c..616043a6da9 100644 --- a/Documentation/kbuild/kconfig-language.txt +++ b/Documentation/kbuild/kconfig-language.txt @@ -77,7 +77,12 @@ applicable everywhere (see syntax). Optionally, dependencies only for this default value can be added with "if". -- dependencies: "depends on"/"requires" <expr> +- type definition + default value: + "def_bool"/"def_tristate" <expr> ["if" <expr>] + This is a shorthand notation for a type definition plus a value. + Optionally dependencies for this default value can be added with "if". + +- dependencies: "depends on" <expr> This defines a dependency for this menu entry. If multiple dependencies are defined, they are connected with '&&'. Dependencies are applied to all other options within this menu entry (which also @@ -289,3 +294,10 @@ source: "source" <prompt> This reads the specified configuration file. This file is always parsed. + +mainmenu: + + "mainmenu" <prompt> + +This sets the config program's title bar if the config program chooses +to use it. diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index f099b814d38..6166e2d7da7 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -518,6 +518,28 @@ more details, with real examples. In this example for a specific GCC version the build will error out explaining to the user why it stops. + cc-cross-prefix + cc-cross-prefix is used to check if there exist a $(CC) in path with + one of the listed prefixes. The first prefix where there exist a + prefix$(CC) in the PATH is returned - and if no prefix$(CC) is found + then nothing is returned. + Additional prefixes are separated by a single space in the + call of cc-cross-prefix. + This functionality is usefull for architecture Makefile that try + to set CROSS_COMPILE to well know values but may have several + values to select between. + It is recommended only to try to set CROSS_COMPILE is it is a cross + build (host arch is different from target arch). And is CROSS_COMPILE + is already set then leave it with the old value. + + Example: + #arch/m68k/Makefile + ifneq ($(SUBARCH),$(ARCH)) + ifeq ($(CROSS_COMPILE),) + CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu-) + endif + endif + === 4 Host Program support Kbuild supports building executables on the host for use during the diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0a3fed44524..7bf6bd2f530 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -431,8 +431,10 @@ and is between 256 and 4096 characters. It is defined in the file over the 8254 in addition to over the IO-APIC. The kernel tries to set a sensible default. - hpet= [X86-32,HPET] option to disable HPET and use PIT. - Format: disable + hpet= [X86-32,HPET] option to control HPET usage + Format: { enable (default) | disable | force } + disable: disable HPET and use PIT instead + force: allow force enabled of undocumented chips (ICH4, VIA) com20020= [HW,NET] ARCnet - COM20020 chipset Format: diff --git a/MAINTAINERS b/MAINTAINERS index 2534dc4aa95..4ed41394e49 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2178,7 +2178,7 @@ S: Maintained KCONFIG P: Roman Zippel M: zippel@linux-m68k.org -L: kbuild-devel@lists.sourceforge.net +L: linux-kbuild@vger.kernel.org S: Maintained KDUMP @@ -2207,6 +2207,7 @@ KERNEL BUILD (kbuild: Makefile, scripts/Makefile.*) P: Sam Ravnborg M: sam@ravnborg.org T: git kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git +L: linux-kbuild@vger.kernel.org S: Maintained KERNEL JANITORS @@ -887,10 +887,7 @@ prepare2: prepare3 outputmakefile prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \ include/asm include/config/auto.conf -ifneq ($(KBUILD_MODULES),) - $(Q)mkdir -p $(MODVERDIR) - $(Q)rm -f $(MODVERDIR)/* -endif + $(cmd_crmodverdir) archprepare: prepare1 scripts_basic @@ -906,14 +903,24 @@ prepare: prepare0 export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) -# FIXME: The asm symlink changes when $(ARCH) changes. That's -# hard to detect, but I suppose "make mrproper" is a good idea -# before switching between archs anyway. - -include/asm: - @echo ' SYMLINK $@ -> include/asm-$(SRCARCH)' - $(Q)if [ ! -d include ]; then mkdir -p include; fi; - @ln -fsn asm-$(SRCARCH) $@ +# The asm symlink changes when $(ARCH) changes. +# Detect this and ask user to run make mrproper + +include/asm: FORCE + $(Q)set -e; asmlink=`readlink include/asm | cut -d '-' -f 2`; \ + if [ -L include/asm ]; then \ + if [ "$$asmlink" != "$(SRCARCH)" ]; then \ + echo "ERROR: the symlink $@ points to asm-$$asmlink but asm-$(SRCARCH) was expected"; \ + echo " set ARCH or save .config and run 'make mrproper' to fix it"; \ + exit 1; \ + fi; \ + else \ + echo ' SYMLINK $@ -> include/asm-$(SRCARCH)'; \ + if [ ! -d include ]; then \ + mkdir -p include; \ + fi; \ + ln -fsn asm-$(SRCARCH) $@; \ + fi # Generate some files # --------------------------------------------------------------------------- @@ -1023,19 +1030,12 @@ _modinst_: fi $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst -# If System.map exists, run depmod. This deliberately does not have a -# dependency on System.map since that would run the dependency tree on -# vmlinux. This depmod is only for convenience to give the initial +# This depmod is only for convenience to give the initial # boot a modules.dep even before / is mounted read-write. However the # boot script depmod is the master version. -ifeq "$(strip $(INSTALL_MOD_PATH))" "" -depmod_opts := -else -depmod_opts := -b $(INSTALL_MOD_PATH) -r -endif PHONY += _modinst_post _modinst_post: _modinst_ - if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi + $(call cmd,depmod) else # CONFIG_MODULES @@ -1223,8 +1223,7 @@ else # KBUILD_EXTMOD KBUILD_MODULES := 1 PHONY += crmodverdir crmodverdir: - $(Q)mkdir -p $(MODVERDIR) - $(Q)rm -f $(MODVERDIR)/* + $(cmd_crmodverdir) PHONY += $(objtree)/Module.symvers $(objtree)/Module.symvers: @@ -1252,15 +1251,6 @@ _emodinst_: $(Q)mkdir -p $(MODLIB)/$(install-dir) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst -# Run depmod only is we have System.map and depmod is executable -quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) - cmd_depmod = if [ -r System.map -a -x $(DEPMOD) ]; then \ - $(DEPMOD) -ae -F System.map \ - $(if $(strip $(INSTALL_MOD_PATH)), \ - -b $(INSTALL_MOD_PATH) -r) \ - $(KERNELRELEASE); \ - fi - PHONY += _emodinst_post _emodinst_post: _emodinst_ $(call cmd,depmod) @@ -1344,7 +1334,7 @@ define find-sources find $(__srctree)include/asm-generic $(RCS_FIND_IGNORE) \ -name $1 -print; \ find $(__srctree) $(RCS_FIND_IGNORE) \ - \( -name include -o -name arch \) -prune -o \ + \( -name include -o -name arch -o -name '.tmp_*' \) -prune -o \ -name $1 -print; \ ) endef @@ -1493,9 +1483,11 @@ endif # Modules / %/: prepare scripts FORCE + $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) %.ko: prepare scripts FORCE + $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) $(@:.ko=.o) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost @@ -1509,6 +1501,19 @@ quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN $(wildcard $(rm-dirs))) quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files))) cmd_rmfiles = rm -f $(rm-files) +# Run depmod only is we have System.map and depmod is executable +# and we build for the host arch +quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) + cmd_depmod = \ + if [ -r System.map -a -x $(DEPMOD) -a "$(SUBARCH)" == "$(ARCH)" ]; then \ + $(DEPMOD) -ae -F System.map \ + $(if $(strip $(INSTALL_MOD_PATH)), -b $(INSTALL_MOD_PATH) -r) \ + $(KERNELRELEASE); \ + fi + +# Create temporary dir for module support files +cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR); rm -f $(MODVERDIR)/* + a_flags = -Wp,-MD,$(depfile) $(KBUILD_AFLAGS) $(AFLAGS_KERNEL) \ $(NOSTDINC_FLAGS) $(KBUILD_CPPFLAGS) \ diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index d0a4ea1ba14..d1bedbf9deb 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1080,7 +1080,7 @@ config APM_REAL_MODE_POWER_OFF endif # APM -source "arch/x86/kernel/cpu/cpufreq/Kconfig" +source "arch/x86/kernel/cpu/cpufreq/Kconfig_32" source "drivers/cpuidle/Kconfig" diff --git a/arch/i386/Makefile b/arch/i386/Makefile index f036d2dee3d..b88e47ca303 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -102,7 +102,7 @@ core-$(CONFIG_XEN) += arch/x86/xen/ # default subarch .h files mflags-y += -Iinclude/asm-x86/mach-default -head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task_32.o +head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ core-y += arch/x86/kernel/ \ @@ -131,9 +131,9 @@ all: bzImage zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage zImage bzImage: vmlinux - $(Q)mkdir -p $(objtree)/arch/i386/boot - $(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/i386/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage compressed: zImage diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c index 5027650eb27..55822d2cf05 100644 --- a/arch/x86/ia32/ia32_binfmt.c +++ b/arch/x86/ia32/ia32_binfmt.c @@ -5,10 +5,6 @@ * This tricks binfmt_elf.c into loading 32bit binaries using lots * of ugly preprocessor tricks. Talk about very very poor man's inheritance. */ -#define __ASM_X86_64_ELF_H 1 - -#undef ELF_CLASS -#define ELF_CLASS ELFCLASS32 #include <linux/types.h> #include <linux/stddef.h> @@ -19,6 +15,7 @@ #include <linux/binfmts.h> #include <linux/mm.h> #include <linux/security.h> +#include <linux/elfcore-compat.h> #include <asm/segment.h> #include <asm/ptrace.h> @@ -31,6 +28,20 @@ #include <asm/ia32.h> #include <asm/vsyscall32.h> +#undef ELF_ARCH +#undef ELF_CLASS +#define ELF_CLASS ELFCLASS32 +#define ELF_ARCH EM_386 + +#undef elfhdr +#undef elf_phdr +#undef elf_note +#undef elf_addr_t +#define elfhdr elf32_hdr +#define elf_phdr elf32_phdr +#define elf_note elf32_note +#define elf_addr_t Elf32_Off + #define ELF_NAME "elf/i386" #define AT_SYSINFO 32 @@ -48,74 +59,20 @@ int sysctl_vsyscall32 = 1; } while(0) struct file; -struct elf_phdr; #define IA32_EMULATOR 1 -#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) - -#undef ELF_ARCH -#define ELF_ARCH EM_386 - -#define ELF_DATA ELFDATA2LSB +#undef ELF_ET_DYN_BASE -#define USE_ELF_CORE_DUMP 1 - -/* Override elfcore.h */ -#define _LINUX_ELFCORE_H 1 -typedef unsigned int elf_greg_t; - -#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t)) -typedef elf_greg_t elf_gregset_t[ELF_NGREG]; - -struct elf_siginfo -{ - int si_signo; /* signal number */ - int si_code; /* extra code */ - int si_errno; /* errno */ -}; +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) #define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0) -struct elf_prstatus -{ - struct elf_siginfo pr_info; /* Info associated with signal */ - short pr_cursig; /* Current signal */ - unsigned int pr_sigpend; /* Set of pending signals */ - unsigned int pr_sighold; /* Set of held signals */ - pid_t pr_pid; - pid_t pr_ppid; - pid_t pr_pgrp; - pid_t pr_sid; - struct compat_timeval pr_utime; /* User time */ - struct compat_timeval pr_stime; /* System time */ - struct compat_timeval pr_cutime; /* Cumulative user time */ - struct compat_timeval pr_cstime; /* Cumulative system time */ - elf_gregset_t pr_reg; /* GP registers */ - int pr_fpvalid; /* True if math co-processor being used. */ -}; - -#define ELF_PRARGSZ (80) /* Number of chars for args */ - -struct elf_prpsinfo -{ - char pr_state; /* numeric process state */ - char pr_sname; /* char for pr_state */ - char pr_zomb; /* zombie */ - char pr_nice; /* nice val */ - unsigned int pr_flag; /* flags */ - __u16 pr_uid; - __u16 pr_gid; - pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; - /* Lots missing */ - char pr_fname[16]; /* filename of executable */ - char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ -}; - #define _GET_SEG(x) \ ({ __u32 seg; asm("movl %%" __stringify(x) ",%0" : "=r"(seg)); seg; }) /* Assumes current==process to be dumped */ +#undef ELF_CORE_COPY_REGS #define ELF_CORE_COPY_REGS(pr_reg, regs) \ pr_reg[0] = regs->rbx; \ pr_reg[1] = regs->rcx; \ @@ -135,36 +92,41 @@ struct elf_prpsinfo pr_reg[15] = regs->rsp; \ pr_reg[16] = regs->ss; -#define user user32 + +#define elf_prstatus compat_elf_prstatus +#define elf_prpsinfo compat_elf_prpsinfo +#define elf_fpregset_t struct user_i387_ia32_struct +#define elf_fpxregset_t struct user32_fxsr_struct +#define user user32 #undef elf_read_implies_exec #define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) -//#include <asm/ia32.h> -#include <linux/elf.h> - -typedef struct user_i387_ia32_struct elf_fpregset_t; -typedef struct user32_fxsr_struct elf_fpxregset_t; - -static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs) +#define elf_core_copy_regs elf32_core_copy_regs +static inline void elf32_core_copy_regs(compat_elf_gregset_t *elfregs, + struct pt_regs *regs) { - ELF_CORE_COPY_REGS((*elfregs), regs) + ELF_CORE_COPY_REGS((&elfregs->ebx), regs) } -static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) +#define elf_core_copy_task_regs elf32_core_copy_task_regs +static inline int elf32_core_copy_task_regs(struct task_struct *t, + compat_elf_gregset_t* elfregs) { struct pt_regs *pp = task_pt_regs(t); - ELF_CORE_COPY_REGS((*elfregs), pp); + ELF_CORE_COPY_REGS((&elfregs->ebx), pp); /* fix wrong segments */ - (*elfregs)[7] = t->thread.ds; - (*elfregs)[9] = t->thread.fsindex; - (*elfregs)[10] = t->thread.gsindex; - (*elfregs)[8] = t->thread.es; + elfregs->ds = t->thread.ds; + elfregs->fs = t->thread.fsindex; + elfregs->gs = t->thread.gsindex; + elfregs->es = t->thread.es; return 1; } +#define elf_core_copy_task_fpregs elf32_core_copy_task_fpregs static inline int -elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu) +elf32_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, + elf_fpregset_t *fpu) { struct _fpstate_ia32 *fpstate = (void*)fpu; mm_segment_t oldfs = get_fs(); @@ -186,8 +148,9 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr #define ELF_CORE_COPY_XFPREGS 1 #define ELF_CORE_XFPREG_TYPE NT_PRXFPREG +#define elf_core_copy_task_xfpregs elf32_core_copy_task_xfpregs static inline int -elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) +elf32_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) { struct pt_regs *regs = task_pt_regs(t); if (!tsk_used_math(t)) @@ -206,6 +169,10 @@ elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) extern int force_personality32; +#undef ELF_EXEC_PAGESIZE +#undef ELF_HWCAP +#undef ELF_PLATFORM +#undef SET_PERSONALITY #define ELF_EXEC_PAGESIZE PAGE_SIZE #define ELF_HWCAP (boot_cpu_data.x86_capability[0]) #define ELF_PLATFORM ("i686") @@ -231,6 +198,7 @@ do { \ #define load_elf_binary load_elf32_binary +#undef ELF_PLAT_INIT #define ELF_PLAT_INIT(r, load_addr) elf32_init(r) #undef start_thread diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index a3fa11f8f46..ccea590bbb9 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_32.o init_task_32.o vmlinux.lds +extra-y := head_32.o init_task.o vmlinux.lds obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ @@ -17,6 +17,7 @@ obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_PCI) += early-quirks.o obj-$(CONFIG_APM) += apm_32.o obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o obj-$(CONFIG_SMP) += smpcommon_32.o diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 43da66213a4..dec06e76928 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -extra-y := head_64.o head64.o init_task_64.o vmlinux.lds +extra-y := head_64.o head64.o init_task.o vmlinux.lds EXTRA_AFLAGS := -traditional obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ @@ -39,7 +39,7 @@ obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_MODULES) += module_64.o -obj-$(CONFIG_PCI) += early-quirks_64.o +obj-$(CONFIG_PCI) += early-quirks.o obj-y += topology.o obj-y += intel_cacheinfo.o diff --git a/arch/x86/kernel/acpi/Makefile_32 b/arch/x86/kernel/acpi/Makefile_32 index a4852a2e919..045dd54b33e 100644 --- a/arch/x86/kernel/acpi/Makefile_32 +++ b/arch/x86/kernel/acpi/Makefile_32 @@ -1,7 +1,4 @@ obj-$(CONFIG_ACPI) += boot.o -ifneq ($(CONFIG_PCI),) -obj-$(CONFIG_X86_IO_APIC) += earlyquirk_32.o -endif obj-$(CONFIG_ACPI_SLEEP) += sleep_32.o wakeup_32.o ifneq ($(CONFIG_ACPI_PROCESSOR),) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index afd2afe9102..f28b2e251b1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -555,7 +555,7 @@ EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { - x86_cpu_to_apicid[cpu] = -1; + per_cpu(x86_cpu_to_apicid, cpu) = -1; cpu_clear(cpu, cpu_present_map); num_processors--; diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2d39f55d29a..10b67170b13 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -29,7 +29,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, unsigned int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); flags->bm_check = 0; if (num_online_cpus() == 1) @@ -72,7 +72,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct acpi_processor_cx *cx, struct acpi_power_register *reg) { struct cstate_entry *percpu_entry; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; int retval; diff --git a/arch/x86/kernel/acpi/earlyquirk_32.c b/arch/x86/kernel/acpi/earlyquirk_32.c deleted file mode 100644 index 23f78efc577..00000000000 --- a/arch/x86/kernel/acpi/earlyquirk_32.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Do early PCI probing for bug detection when the main PCI subsystem is - * not up yet. - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/acpi.h> - -#include <asm/pci-direct.h> -#include <asm/acpi.h> -#include <asm/apic.h> - -#ifdef CONFIG_ACPI - -static int __init nvidia_hpet_check(struct acpi_table_header *header) -{ - return 0; -} -#endif - -static int __init check_bridge(int vendor, int device) -{ -#ifdef CONFIG_ACPI - static int warned; - /* According to Nvidia all timer overrides are bogus unless HPET - is enabled. */ - if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { - if (!warned && acpi_table_parse(ACPI_SIG_HPET, - nvidia_hpet_check)) { - warned = 1; - acpi_skip_timer_override = 1; - printk(KERN_INFO "Nvidia board " - "detected. Ignoring ACPI " - "timer override.\n"); - printk(KERN_INFO "If you got timer trouble " - "try acpi_use_timer_override\n"); - - } - } -#endif - if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO "ATI board detected. Disabling timer routing " - "over 8254.\n"); - } - return 0; -} - -void __init check_acpi_pci(void) -{ - int num, slot, func; - - /* Assume the machine supports type 1. If not it will - always read ffffffff and should not have any side effect. - Actually a few buggy systems can machine check. Allow the user - to disable it by command line option at least -AK */ - if (!early_pci_allowed()) - return; - - /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { - u32 class; - u32 vendor; - class = read_pci_config(num, slot, func, - PCI_CLASS_REVISION); - if (class == 0xffffffff) - break; - - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) - continue; - - vendor = read_pci_config(num, slot, func, - PCI_VENDOR_ID); - - if (check_bridge(vendor & 0xffff, vendor >> 16)) - return; - } - - } - } -} diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index b54fded4983..2ed0a4ce62f 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -63,7 +63,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) void arch_acpi_processor_init_pdc(struct acpi_processor *pr) { unsigned int cpu = pr->id; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); pr->pdc = NULL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3bd2688bd44..d6405e0842b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -357,14 +357,14 @@ void alternatives_smp_switch(int smp) if (smp) { printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); } else { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_unlock(mod->locks, mod->locks_end, mod->text, mod->text_end); @@ -432,7 +432,7 @@ void __init alternative_instructions(void) if (1 == num_possible_cpus()) { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); alternatives_smp_unlock(__smp_locks, __smp_locks_end, _text, _etext); } diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig_32 index d8c6f132dc7..d8c6f132dc7 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig_32 diff --git a/arch/x86/kernel/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig_64 index a3fd51926cb..9c9699fdcf5 100644 --- a/arch/x86/kernel/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig_64 @@ -19,7 +19,7 @@ config X86_POWERNOW_K8 To compile this driver as a module, choose M here: the module will be called powernow-k8. - For details, take a look at <file:Documentation/cpu-freq/>. + For details, take a look at <file:Documentation/cpu-freq/>. If in doubt, say N. diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 2ca43ba32bc..fea0af0476b 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -77,7 +77,7 @@ static unsigned int acpi_pstate_strict; static int check_est_cpu(unsigned int cpuid) { - struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; + struct cpuinfo_x86 *cpu = &cpu_data(cpuid); if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) @@ -560,7 +560,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; dprintk("acpi_cpufreq_cpu_init\n"); diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c11baaf9f2b..326a4c81f68 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -305,7 +305,7 @@ static struct cpufreq_driver eps_driver = { static int __init eps_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); /* This driver will work only on Centaur C7 processors with * Enhanced SpeedStep/PowerSaver registers */ diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 1e7ae7dafcf..94619c22f56 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -199,7 +199,7 @@ static int elanfreq_target (struct cpufreq_policy *policy, static int elanfreq_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); unsigned int i; int result; @@ -280,7 +280,7 @@ static struct cpufreq_driver elanfreq_driver = { static int __init elanfreq_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 5045f5d583c..749d00cb2eb 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -780,7 +780,7 @@ static int longhaul_setup_southbridge(void) static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); char *cpuname=NULL; int ret; u32 lo, hi; @@ -959,7 +959,7 @@ static struct cpufreq_driver longhaul_driver = { static int __init longhaul_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) return -ENODEV; diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index b2689514295..af4a867a097 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -172,7 +172,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, u32 save_lo, save_hi; u32 eax, ebx, ecx, edx; u32 try_hi; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (!low_freq || !high_freq) return -EINVAL; @@ -298,7 +298,7 @@ static struct cpufreq_driver longrun_driver = { */ static int __init longrun_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (c->x86_vendor != X86_VENDOR_TRANSMETA || !cpu_has(c, X86_FEATURE_LONGRUN)) diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 793eae854f4..14791ec55cf 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -195,7 +195,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *c = &cpu_data(policy->cpu); int cpuid = 0; unsigned int i; @@ -279,7 +279,7 @@ static struct cpufreq_driver p4clockmod_driver = { static int __init cpufreq_p4_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int ret; /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index 6d028533931..42405b4e34e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -215,7 +215,7 @@ static struct cpufreq_driver powernow_k6_driver = { */ static int __init powernow_k6_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || ((c->x86_model != 12) && (c->x86_model != 13))) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index f3686a5f230..b5a9863d6cd 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -114,7 +114,7 @@ static int check_fsb(unsigned int fsbspeed) static int check_powernow(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); unsigned int maxei, eax, ebx, ecx, edx; if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c index d9f3e90a7ae..42da9bd677d 100644 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -102,7 +102,7 @@ static int sc520_freq_target (struct cpufreq_policy *policy, static int sc520_freq_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int result; /* capability check */ @@ -151,7 +151,7 @@ static struct cpufreq_driver sc520_freq_driver = { static int __init sc520_freq_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int err; /* Test if we have the right hardware */ diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 811d4743854..3031f119619 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -230,7 +230,7 @@ static struct cpu_model models[] = static int centrino_cpu_init_table(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); struct cpu_model *model; for(model = models; model->cpu_id != NULL; model++) @@ -340,7 +340,7 @@ static unsigned int get_cur_freq(unsigned int cpu) static int centrino_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); unsigned freq; unsigned l, h; int ret; @@ -612,7 +612,7 @@ static struct cpufreq_driver centrino_driver = { */ static int __init centrino_init(void) { - struct cpuinfo_x86 *cpu = cpu_data; + struct cpuinfo_x86 *cpu = &cpu_data(0); if (!cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index b1acc8ce316..76c3ab0da46 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -228,7 +228,7 @@ EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); unsigned int speedstep_detect_processor (void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); u32 ebx, msr_lo, msr_hi; dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 297a2411694..9921b01fe19 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -295,7 +295,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; #ifdef CONFIG_X86_HT - unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); + unsigned int cpu = c->cpu_index; #endif if (c->cpuid_level > 3) { @@ -417,14 +417,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l2) { l2 = new_l2; #ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l2_id; + per_cpu(cpu_llc_id, cpu) = l2_id; #endif } if (new_l3) { l3 = new_l3; #ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l3_id; + per_cpu(cpu_llc_id, cpu) = l3_id; #endif } @@ -459,7 +459,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; int index_msb, i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; @@ -470,8 +470,8 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { - if (c[i].apicid >> index_msb == - c[cpu].apicid >> index_msb) { + if (cpu_data(i).apicid >> index_msb == + c->apicid >> index_msb) { cpu_set(i, this_leaf->shared_cpu_map); if (i != cpu && cpuid4_info[i]) { sibling_leaf = CPUID4_INFO_IDX(i, index); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 54cdbf1a40f..c02541e6e65 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -120,7 +120,9 @@ int reserve_perfctr_nmi(unsigned int msr) unsigned int counter; counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return 1; if (!test_and_set_bit(counter, perfctr_nmi_owner)) return 1; @@ -132,7 +134,9 @@ void release_perfctr_nmi(unsigned int msr) unsigned int counter; counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return; clear_bit(counter, perfctr_nmi_owner); } @@ -142,7 +146,9 @@ int reserve_evntsel_nmi(unsigned int msr) unsigned int counter; counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return 1; if (!test_and_set_bit(counter, evntsel_nmi_owner)) return 1; @@ -154,7 +160,9 @@ void release_evntsel_nmi(unsigned int msr) unsigned int counter; counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return; clear_bit(counter, evntsel_nmi_owner); } diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 879a0f789b1..2d42b414b77 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -85,12 +85,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* nothing */ }; struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; + int i, n = 0; int fpu_exception; #ifdef CONFIG_SMP if (!cpu_online(n)) return 0; + n = c->cpu_index; #endif seq_printf(m, "processor\t: %d\n" "vendor_id\t: %s\n" @@ -175,11 +176,15 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_possible_map); + if ((*pos) < NR_CPUS && cpu_possible(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_possible_map); return c_start(m, pos); } static void c_stop(struct seq_file *m, void *v) diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 70dcf912d9f..05c9936a16c 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -114,7 +114,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; + struct cpuinfo_x86 *c = &cpu_data(cpu); if (cpu >= NR_CPUS || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ @@ -134,15 +134,18 @@ static const struct file_operations cpuid_fops = { .open = cpuid_open, }; -static int __cpuinit cpuid_device_create(int i) +static __cpuinit int cpuid_device_create(int cpu) { - int err = 0; struct device *dev; - dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), "cpu%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; + dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), + "cpu%d", cpu); + return IS_ERR(dev) ? PTR_ERR(dev) : 0; +} + +static void cpuid_device_destroy(int cpu) +{ + device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); } static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, @@ -150,18 +153,21 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; + int err = 0; switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cpuid_device_create(cpu); + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = cpuid_device_create(cpu); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + cpuid_device_destroy(cpu); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = @@ -198,7 +204,7 @@ static int __init cpuid_init(void) out_class: i = 0; for_each_online_cpu(i) { - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i)); + cpuid_device_destroy(i); } class_destroy(cpuid_class); out_chrdev: @@ -212,7 +218,7 @@ static void __exit cpuid_exit(void) int cpu = 0; for_each_online_cpu(cpu) - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + cpuid_device_destroy(cpu); class_destroy(cpuid_class); unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); diff --git a/arch/x86/kernel/early-quirks_64.c b/arch/x86/kernel/early-quirks.c index 13aa4fd728f..dc34acbd54a 100644 --- a/arch/x86/kernel/early-quirks_64.c +++ b/arch/x86/kernel/early-quirks.c @@ -13,9 +13,13 @@ #include <linux/acpi.h> #include <linux/pci_ids.h> #include <asm/pci-direct.h> -#include <asm/proto.h> -#include <asm/iommu.h> #include <asm/dma.h> +#include <asm/io_apic.h> +#include <asm/apic.h> + +#ifdef CONFIG_IOMMU +#include <asm/iommu.h> +#endif static void __init via_bugs(void) { @@ -23,7 +27,8 @@ static void __init via_bugs(void) if ((end_pfn > MAX_DMA32_PFN || force_iommu) && !iommu_aperture_allowed) { printk(KERN_INFO - "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n"); + "Looks like a VIA chipset. Disabling IOMMU." + " Override with iommu=allowed\n"); iommu_aperture_disabled = 1; } #endif @@ -40,6 +45,7 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header) static void __init nvidia_bugs(void) { #ifdef CONFIG_ACPI +#ifdef CONFIG_X86_IO_APIC /* * All timer overrides on Nvidia are * wrong unless HPET is enabled. @@ -59,17 +65,20 @@ static void __init nvidia_bugs(void) "try acpi_use_timer_override\n"); } #endif +#endif /* RED-PEN skip them on mptables too? */ } static void __init ati_bugs(void) { +#ifdef CONFIG_X86_IO_APIC if (timer_over_8254 == 1) { timer_over_8254 = 0; printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); + "ATI board detected. Disabling timer routing over 8254.\n"); } +#endif } struct chipset { @@ -104,7 +113,7 @@ void __init early_quirks(void) if (class == 0xffffffff) break; - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) + if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) continue; vendor = read_pci_config(num, slot, func, diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 4ae03e3e829..ce703e21c91 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -24,10 +24,19 @@ #include <acpi/acpi_bus.h> #endif -/* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly +/* + * which logical CPU number maps to which CPU (physical APIC ID) + * + * The following static array is used during kernel startup + * and the x86_cpu_to_apicid_ptr contains the address of the + * array during this time. Is it zeroed when the per_cpu + * data area is removed. + */ +u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; -EXPORT_SYMBOL(x86_cpu_to_apicid); +void *x86_cpu_to_apicid_ptr; +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); struct genapic __read_mostly *genapic = &apic_flat; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 91c7526768e..07352b74bda 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -172,7 +172,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) */ cpu = first_cpu(cpumask); if ((unsigned)cpu < NR_CPUS) - return x86_cpu_to_apicid[cpu]; + return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index a7eee0a4751..6b3469311e4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -58,7 +58,7 @@ void __init x86_64_start_kernel(char * real_mode_data) for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_idt((const struct desc_ptr *)&idt_descr); early_printk("Kernel alive\n"); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f8367074da0..22d8f00c80d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -69,12 +69,15 @@ static inline void hpet_clear_mapping(void) * HPET command line enable / disable */ static int boot_hpet_disable; +int hpet_force_user; static int __init hpet_setup(char* str) { if (str) { if (!strncmp("disable", str, 7)) boot_hpet_disable = 1; + if (!strncmp("force", str, 5)) + hpet_force_user = 1; } return 1; } diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index d34a10cc13a..f634fc715c9 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -403,7 +403,8 @@ void __init native_init_IRQ(void) int vector = FIRST_EXTERNAL_VECTOR + i; if (i >= NR_IRQS) break; - if (vector != SYSCALL_VECTOR) + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (!test_bit(vector, used_vectors)) set_intr_gate(vector, interrupt[i]); } diff --git a/arch/x86/kernel/init_task_32.c b/arch/x86/kernel/init_task.c index d26fc063a76..468c9c43784 100644 --- a/arch/x86/kernel/init_task_32.c +++ b/arch/x86/kernel/init_task.c @@ -15,7 +15,6 @@ static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); - EXPORT_SYMBOL(init_mm); /* @@ -25,7 +24,7 @@ EXPORT_SYMBOL(init_mm); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union +union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = { INIT_THREAD_INFO(init_task) }; @@ -35,12 +34,14 @@ union thread_union init_thread_union * All other task structs will be allocated on slabs in fork.c */ struct task_struct init_task = INIT_TASK(init_task); - EXPORT_SYMBOL(init_task); /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. - */ + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; diff --git a/arch/x86/kernel/init_task_64.c b/arch/x86/kernel/init_task_64.c deleted file mode 100644 index 4ff33d4f855..00000000000 --- a/arch/x86/kernel/init_task_64.c +++ /dev/null @@ -1,54 +0,0 @@ -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/init_task.h> -#include <linux/fs.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/desc.h> - -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - -/* - * Initial task structure. - * - * We need to make sure that this is 8192-byte aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data.cacheline_aligned - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - -/* Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -#define ALIGN_TO_4K __attribute__((section(".data.init_task"))) diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 5f10c718953..0c55e9d86f6 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1198,7 +1198,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 } static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - int vector, offset, i; + int vector, offset; BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); @@ -1215,11 +1215,8 @@ next: } if (vector == current_vector) return -ENOSPC; - if (vector == SYSCALL_VECTOR) + if (test_and_set_bit(vector, used_vectors)) goto next; - for (i = 0; i < NR_IRQ_VECTORS; i++) - if (irq_vector[i] == vector) - goto next; current_vector = vector; current_offset = offset; @@ -2295,6 +2292,12 @@ static inline void __init check_timer(void) void __init setup_IO_APIC(void) { + int i; + + /* Reserve all the system vectors. */ + for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + set_bit(i, used_vectors); + enable_IO_APIC(); if (acpi_ioapic) diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/mce_64.c index 66e6b797b2c..2cf20de5bec 100644 --- a/arch/x86/kernel/mce_64.c +++ b/arch/x86/kernel/mce_64.c @@ -799,7 +799,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) { int err; int i; - if (!mce_available(&cpu_data[cpu])) + + if (!mce_available(&cpu_data(cpu))) return -EIO; memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/mce_amd_64.c index 0d2afd96aca..752fb16a817 100644 --- a/arch/x86/kernel/mce_amd_64.c +++ b/arch/x86/kernel/mce_amd_64.c @@ -472,11 +472,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) sprintf(name, "threshold_bank%i", bank); #ifdef CONFIG_SMP - if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */ + if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ i = first_cpu(per_cpu(cpu_core_map, cpu)); /* first core not up yet */ - if (cpu_data[i].cpu_core_id) + if (cpu_data(i).cpu_core_id) goto out; /* already linked */ diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 09cf7811035..09c315214a5 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -132,7 +132,7 @@ static struct ucode_cpu_info { static void collect_cpu_info(int cpu_num) { - struct cpuinfo_x86 *c = cpu_data + cpu_num; + struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; unsigned int val[2]; @@ -522,7 +522,7 @@ static struct platform_device *microcode_pdev; static int cpu_request_microcode(int cpu) { char name[30]; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); const struct firmware *firmware; void *buf; unsigned long size; @@ -570,7 +570,7 @@ static int cpu_request_microcode(int cpu) static int apply_microcode_check_cpu(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; unsigned int val[2]; diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 8bf0ca03ac8..ef4aab12358 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -57,6 +57,8 @@ unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; +EXPORT_SYMBOL(boot_cpu_id); + /* Internal processor count */ unsigned int num_processors __cpuinitdata = 0; @@ -86,7 +88,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int cpu; cpumask_t tmp_map; @@ -123,7 +125,18 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) cpu = 0; } bios_cpu_apicid[cpu] = m->mpc_apicid; - x86_cpu_to_apicid[cpu] = m->mpc_apicid; + /* + * We get called early in the the start_kernel initialization + * process when the per_cpu data area is not yet setup, so we + * use a static array that is removed after the per_cpu data + * area is created. + */ + if (x86_cpu_to_apicid_ptr) { + u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr; + x86_cpu_to_apicid[cpu] = m->mpc_apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; + } cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index e18e516cf54..ee6eba4ecfe 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -112,7 +112,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf, static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; + struct cpuinfo_x86 *c = &cpu_data(cpu); if (cpu >= NR_CPUS || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index b2b42bdb0a1..afaf9f12c03 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -11,7 +11,7 @@ #include <asm/iommu.h> #include <asm/calgary.h> -int iommu_merge __read_mostly = 0; +int iommu_merge __read_mostly = 1; EXPORT_SYMBOL(iommu_merge); dma_addr_t bad_dma_address __read_mostly; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 044a47745a5..7b899584d29 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -295,34 +295,52 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); -void show_regs(struct pt_regs * regs) +void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; + unsigned long esp; + unsigned short ss, gs; + + if (user_mode_vm(regs)) { + esp = regs->esp; + ss = regs->xss & 0xffff; + savesegment(gs, gs); + } else { + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); + savesegment(gs, gs); + } printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); + printk("Pid: %d, comm: %s %s (%s %.*s)\n", + task_pid_nr(current), current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + 0xffff & regs->xcs, regs->eip, regs->eflags, + smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, + regs->xfs & 0xffff, gs, ss); + + if (!all) + return; cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); @@ -330,10 +348,16 @@ void show_regs(struct pt_regs * regs) get_debugreg(d3, 3); printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); + get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", d6, d7); + printk("DR6: %08lx DR7: %08lx\n", + d6, d7); +} +void show_regs(struct pt_regs *regs) +{ + __show_registers(regs, 1); show_trace(NULL, regs, ®s->esp); } diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d769e204f94..a4ce1911efd 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -45,9 +45,12 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) if (!(config & 0x2)) pci_write_config_byte(dev, 0xf4, config); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, + quirk_intel_irqbalance); #endif #if defined(CONFIG_HPET_TIMER) @@ -56,7 +59,8 @@ unsigned long force_hpet_address; static enum { NONE_FORCE_HPET_RESUME, OLD_ICH_FORCE_HPET_RESUME, - ICH_FORCE_HPET_RESUME + ICH_FORCE_HPET_RESUME, + VT8237_FORCE_HPET_RESUME } force_hpet_resume_type; static void __iomem *rcba_base; @@ -146,17 +150,17 @@ static void ich_force_enable_hpet(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, - ich_force_enable_hpet); + ich_force_enable_hpet); static struct pci_dev *cached_dev; @@ -232,10 +236,91 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev) printk(KERN_DEBUG "Failed to force enable HPET\n"); } +/* + * Undocumented chipset features. Make sure that the user enforced + * this. + */ +static void old_ich_force_enable_hpet_user(struct pci_dev *dev) +{ + if (hpet_force_user) + old_ich_force_enable_hpet(dev); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, + old_ich_force_enable_hpet_user); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, - old_ich_force_enable_hpet); + old_ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, - old_ich_force_enable_hpet); + old_ich_force_enable_hpet); + + +static void vt8237_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address || !cached_dev) + return; + + val = 0xfed00000 | 0x80; + pci_write_config_dword(cached_dev, 0x68, val); + + pci_read_config_dword(cached_dev, 0x68, &val); + if (val & 0x80) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void vt8237_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0x68, &val); + /* + * Bit 7 is HPET enable bit. + * Bit 31:10 is HPET base address (contrary to what datasheet claims) + */ + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + val = 0xfed00000 | 0x80; + pci_write_config_dword(dev, 0x68, val); + + pci_read_config_dword(dev, 0x68, &val); + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, + vt8237_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + vt8237_force_enable_hpet); + void force_hpet_resume(void) { @@ -246,6 +331,9 @@ void force_hpet_resume(void) case OLD_ICH_FORCE_HPET_RESUME: return old_ich_force_hpet_resume(); + case VT8237_FORCE_HPET_RESUME: + return vt8237_force_hpet_resume(); + default: break; } diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c index 368db2b9c5a..776eb06b651 100644 --- a/arch/x86/kernel/reboot_64.c +++ b/arch/x86/kernel/reboot_64.c @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <asm/io.h> #include <asm/delay.h> +#include <asm/desc.h> #include <asm/hw_irq.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -136,7 +137,7 @@ void machine_emergency_restart(void) } case BOOT_TRIPLE: - __asm__ __volatile__("lidt (%0)": :"r" (&no_idt)); + load_idt((const struct desc_ptr *)&no_idt); __asm__ __volatile__("int3"); reboot_type = BOOT_KBD; diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 8b30b26ad06..1a07bbea7be 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -12,6 +12,7 @@ #include <linux/interrupt.h> #include <asm/reboot_fixups.h> #include <asm/msr.h> +#include <asm/geode.h> static void cs5530a_warm_reset(struct pci_dev *dev) { @@ -24,11 +25,8 @@ static void cs5530a_warm_reset(struct pci_dev *dev) static void cs5536_warm_reset(struct pci_dev *dev) { - /* - * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET) - * writing 1 to the LSB of this MSR causes a hard reset. - */ - wrmsrl(0x51400017, 1ULL); + /* writing 1 to the LSB of this MSR causes a hard reset */ + wrmsrl(MSR_DIVIL_SOFT_RESET, 1ULL); udelay(50); /* shouldn't get here but be safe and spin a while */ } diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index ba918823505..3558ac78c92 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -185,6 +185,12 @@ void __cpuinit check_efer(void) unsigned long kernel_eflags; /* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +/* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a @@ -224,8 +230,8 @@ void __cpuinit cpu_init (void) memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); cpu_gdt_descr[cpu].size = GDT_SIZE; - asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 978dc0196a0..e4f19912476 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -661,9 +661,7 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_PCI -#ifdef CONFIG_X86_IO_APIC - check_acpi_pci(); /* Checks more than just ACPI actually */ -#endif + early_quirks(); #endif #ifdef CONFIG_ACPI diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index cdcba697522..31322d42eaa 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -302,6 +302,11 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); +#ifdef CONFIG_SMP + /* setup to use the static apicid table during kernel startup */ + x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; +#endif + #ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). @@ -554,7 +559,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) but in the same order as the HT nodeids. If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); + int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = apicid_to_node[ht_nodeid]; @@ -878,6 +883,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; + c->cpu_index = 0; #endif } @@ -984,6 +990,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; + int cpu = 0; /* * These flag bits must match the definitions in <asm/cpufeature.h>. @@ -1062,8 +1069,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP - if (!cpu_online(c-cpu_data)) + if (!cpu_online(c->cpu_index)) return 0; + cpu = c->cpu_index; #endif seq_printf(m,"processor\t: %u\n" @@ -1071,7 +1079,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) "cpu family\t: %d\n" "model\t\t: %d\n" "model name\t: %s\n", - (unsigned)(c-cpu_data), + (unsigned)cpu, c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", c->x86, (int)c->x86_model, @@ -1083,7 +1091,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + unsigned int freq = cpufreq_quick_get((unsigned)cpu); if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", @@ -1096,7 +1104,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP if (smp_num_siblings * c->x86_max_cores > 1) { - int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); seq_printf(m, "siblings\t: %d\n", cpus_weight(per_cpu(cpu_core_map, cpu))); @@ -1154,12 +1161,16 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_possible_map); + if ((*pos) < NR_CPUS && cpu_possible(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_possible_map); return c_start(m, pos); } diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index 791d9f8036a..2621ca3b2e4 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -610,7 +610,7 @@ static void stop_this_cpu (void * dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); disable_local_APIC(); - if (cpu_data[smp_processor_id()].hlt_works_ok) + if (cpu_data(smp_processor_id()).hlt_works_ok) for(;;) halt(); for (;;); } @@ -676,7 +676,7 @@ static int convert_apicid_to_cpu(int apic_id) int i; for (i = 0; i < NR_CPUS; i++) { - if (x86_cpu_to_apicid[i] == apic_id) + if (per_cpu(x86_cpu_to_apicid, i) == apic_id) return i; } return -1; diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 5c2964727d1..03fa6ed559c 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -322,17 +322,27 @@ void unlock_ipi_call_lock(void) } /* - * this function sends a 'generic call function' IPI to one other CPU - * in the system. - * - * cpu is a standard Linux logical CPU number. + * this function sends a 'generic call function' IPI to all other CPU + * of the system defined in the mask. */ -static void -__smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) + +static int +__smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { struct call_data_struct data; - int cpus = 1; + cpumask_t allbutself; + int cpus; + + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) + return 0; data.func = func; data.info = info; @@ -343,19 +353,55 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info, call_data = &data; wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); /* Wait for response */ while (atomic_read(&data.started) != cpus) cpu_relax(); if (!wait) - return; + return 0; while (atomic_read(&data.finished) != cpus) cpu_relax(); + + return 0; +} +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + int ret; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + spin_lock(&call_lock); + ret = __smp_call_function_mask(mask, func, info, wait); + spin_unlock(&call_lock); + return ret; } +EXPORT_SYMBOL(smp_call_function_mask); /* * smp_call_function_single - Run a function on a specific CPU @@ -374,6 +420,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, int nonatomic, int wait) { /* prevent preemption and reschedule on another processor */ + int ret; int me = get_cpu(); /* Can deadlock when called with interrupts disabled */ @@ -387,51 +434,14 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, return 0; } - spin_lock(&call_lock); - __smp_call_function_single(cpu, func, info, nonatomic, wait); - spin_unlock(&call_lock); + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + put_cpu(); - return 0; + return ret; } EXPORT_SYMBOL(smp_call_function_single); /* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ -static void __smp_call_function (void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (!wait) - return; - - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - -/* * smp_call_function - run a function on all other CPUs. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. @@ -449,10 +459,7 @@ static void __smp_call_function (void (*func) (void *info), void *info, int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { - spin_lock(&call_lock); - __smp_call_function(func,info,nonatomic,wait); - spin_unlock(&call_lock); - return 0; + return smp_call_function_mask(cpu_online_map, func, info, wait); } EXPORT_SYMBOL(smp_call_function); @@ -479,7 +486,7 @@ void smp_send_stop(void) /* Don't deadlock on the call lock in panic */ nolock = !spin_trylock(&call_lock); local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); + __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); if (!nolock) spin_unlock(&call_lock); disable_local_APIC(); diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index be3faac0471..7b8fdfa169d 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -67,7 +67,7 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; +DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID; /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); @@ -89,12 +89,20 @@ EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; /* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = - { [0 ... NR_CPUS-1] = 0xff }; -EXPORT_SYMBOL(x86_cpu_to_apicid); +/* + * The following static array is used during kernel startup + * and the x86_cpu_to_apicid_ptr contains the address of the + * array during this time. Is it zeroed when the per_cpu + * data area is removed. + */ +u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = + { [0 ... NR_CPUS-1] = BAD_APICID }; +void *x86_cpu_to_apicid_ptr; +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); u8 apicid_2_node[MAX_APICID]; @@ -150,9 +158,10 @@ void __init smp_alloc_memory(void) void __cpuinit smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c = cpu_data + id; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; + c->cpu_index = id; if (id!=0) identify_secondary_cpu(c); /* @@ -294,7 +303,7 @@ static int cpucount; /* maps the cpu to the sched domain representing multi-core */ cpumask_t cpu_coregroup_map(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); /* * For perf, we return last level cache shared map. * And for power savings, we return cpu_core_map @@ -311,41 +320,41 @@ static cpumask_t cpu_sibling_setup_map; void __cpuinit set_cpu_sibling_map(int cpu) { int i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (c[cpu].phys_proc_id == c[i].phys_proc_id && - c[cpu].cpu_core_id == c[i].cpu_core_id) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id && + c->cpu_core_id == cpu_data(i).cpu_core_id) { cpu_set(i, per_cpu(cpu_sibling_map, cpu)); cpu_set(cpu, per_cpu(cpu_sibling_map, i)); cpu_set(i, per_cpu(cpu_core_map, cpu)); cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } } } else { cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } - cpu_set(cpu, c[cpu].llc_shared_map); + cpu_set(cpu, c->llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); - c[cpu].booted_cores = 1; + c->booted_cores = 1; return; } for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (cpu_llc_id[cpu] != BAD_APICID && - cpu_llc_id[cpu] == cpu_llc_id[i]) { - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && + per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } - if (c[cpu].phys_proc_id == c[i].phys_proc_id) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id) { cpu_set(i, per_cpu(cpu_core_map, cpu)); cpu_set(cpu, per_cpu(cpu_core_map, i)); /* @@ -357,15 +366,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) * the booted_cores for this new cpu */ if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) - c[cpu].booted_cores++; + c->booted_cores++; /* * increment the core count for all * the other cpus in this package */ if (i != cpu) - c[i].booted_cores++; - } else if (i != cpu && !c[cpu].booted_cores) - c[cpu].booted_cores = c[i].booted_cores; + cpu_data(i).booted_cores++; + } else if (i != cpu && !c->booted_cores) + c->booted_cores = cpu_data(i).booted_cores; } } } @@ -804,7 +813,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) irq_ctx_init(cpu); - x86_cpu_to_apicid[cpu] = apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; /* * This grunge runs the startup process for * the targeted processor. @@ -844,7 +853,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* number CPUs logically, starting from 1 (BSP is 0) */ Dprintk("OK.\n"); printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); + print_cpu_info(&cpu_data(cpu)); Dprintk("CPU has booted.\n"); } else { boot_error= 1; @@ -866,7 +875,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpucount--; } else { - x86_cpu_to_apicid[cpu] = apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; cpu_set(cpu, cpu_present_map); } @@ -915,7 +924,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu) struct warm_boot_cpu_info info; int apicid, ret; - apicid = x86_cpu_to_apicid[cpu]; + apicid = per_cpu(x86_cpu_to_apicid, cpu); if (apicid == BAD_APICID) { ret = -ENODEV; goto exit; @@ -961,11 +970,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus) */ smp_store_cpu_info(0); /* Final full version of the data */ printk("CPU%d: ", 0); - print_cpu_info(&cpu_data[0]); + print_cpu_info(&cpu_data(0)); boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); - x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; + per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid; current_thread_info()->cpu = 0; @@ -1008,6 +1017,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); + map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); return; @@ -1029,6 +1039,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) } smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); + map_cpu_to_logical_apicid(); cpu_set(0, per_cpu(cpu_sibling_map, 0)); cpu_set(0, per_cpu(cpu_core_map, 0)); return; @@ -1082,7 +1093,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) Dprintk("Before bogomips.\n"); for (cpu = 0; cpu < NR_CPUS; cpu++) if (cpu_isset(cpu, cpu_callout_map)) - bogosum += cpu_data[cpu].loops_per_jiffy; + bogosum += cpu_data(cpu).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, @@ -1152,7 +1163,7 @@ void __init native_smp_prepare_boot_cpu(void) void remove_siblinginfo(int cpu) { int sibling; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); @@ -1160,15 +1171,15 @@ void remove_siblinginfo(int cpu) * last thread sibling in this cpu core going down */ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) - c[sibling].booted_cores--; + cpu_data(sibling).booted_cores--; } for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); cpus_clear(per_cpu(cpu_sibling_map, cpu)); cpus_clear(per_cpu(cpu_core_map, cpu)); - c[cpu].phys_proc_id = 0; - c[cpu].cpu_core_id = 0; + c->phys_proc_id = 0; + c->cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); } diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index e351ac4ab5b..fd1fff6a35a 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -65,7 +65,7 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; +DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID; /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -84,8 +84,8 @@ cpumask_t cpu_possible_map; EXPORT_SYMBOL(cpu_possible_map); /* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); /* Set when the idlers are all forked */ int smp_threads_ready; @@ -138,9 +138,10 @@ static unsigned long __cpuinit setup_trampoline(void) static void __cpuinit smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c = cpu_data + id; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; + c->cpu_index = id; identify_cpu(c); print_cpu_info(c); } @@ -237,7 +238,7 @@ void __cpuinit smp_callin(void) /* maps the cpu to the sched domain representing multi-core */ cpumask_t cpu_coregroup_map(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); /* * For perf, we return last level cache shared map. * And for power savings, we return cpu_core_map @@ -254,41 +255,41 @@ static cpumask_t cpu_sibling_setup_map; static inline void set_cpu_sibling_map(int cpu) { int i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (c[cpu].phys_proc_id == c[i].phys_proc_id && - c[cpu].cpu_core_id == c[i].cpu_core_id) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id && + c->cpu_core_id == cpu_data(i).cpu_core_id) { cpu_set(i, per_cpu(cpu_sibling_map, cpu)); cpu_set(cpu, per_cpu(cpu_sibling_map, i)); cpu_set(i, per_cpu(cpu_core_map, cpu)); cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } } } else { cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } - cpu_set(cpu, c[cpu].llc_shared_map); + cpu_set(cpu, c->llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); - c[cpu].booted_cores = 1; + c->booted_cores = 1; return; } for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (cpu_llc_id[cpu] != BAD_APICID && - cpu_llc_id[cpu] == cpu_llc_id[i]) { - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && + per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } - if (c[cpu].phys_proc_id == c[i].phys_proc_id) { + if (c->phys_proc_id == cpu_data(i).phys_proc_id) { cpu_set(i, per_cpu(cpu_core_map, cpu)); cpu_set(cpu, per_cpu(cpu_core_map, i)); /* @@ -300,15 +301,15 @@ static inline void set_cpu_sibling_map(int cpu) * the booted_cores for this new cpu */ if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) - c[cpu].booted_cores++; + c->booted_cores++; /* * increment the core count for all * the other cpus in this package */ if (i != cpu) - c[i].booted_cores++; - } else if (i != cpu && !c[cpu].booted_cores) - c[cpu].booted_cores = c[i].booted_cores; + cpu_data(i).booted_cores++; + } else if (i != cpu && !c->booted_cores) + c->booted_cores = cpu_data(i).booted_cores; } } } @@ -694,7 +695,7 @@ do_rest: clear_node_cpumask(cpu); /* was set by numa_add_cpu */ cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, cpu_possible_map); - x86_cpu_to_apicid[cpu] = BAD_APICID; + per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; return -EIO; } @@ -841,6 +842,26 @@ static int __init smp_sanity_check(unsigned max_cpus) } /* + * Copy apicid's found by MP_processor_info from initial array to the per cpu + * data area. The x86_cpu_to_apicid_init array is then expendable and the + * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no + * longer available. + */ +void __init smp_set_apicids(void) +{ + int cpu; + + for_each_cpu_mask(cpu, cpu_possible_map) { + if (per_cpu_offset(cpu)) + per_cpu(x86_cpu_to_apicid, cpu) = + x86_cpu_to_apicid_init[cpu]; + } + + /* indicate the static array will be going away soon */ + x86_cpu_to_apicid_ptr = NULL; +} + +/* * Prepare for SMP bootup. The MP table or ACPI has been read * earlier. Just do some sanity checking here and enable APIC mode. */ @@ -849,6 +870,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) nmi_watchdog_default(); current_cpu_data = boot_cpu_data; current_thread_info()->cpu = 0; /* needed? */ + smp_set_apicids(); set_cpu_sibling_map(0); if (smp_sanity_check(max_cpus) < 0) { @@ -968,7 +990,7 @@ void __init smp_cpus_done(unsigned int max_cpus) static void remove_siblinginfo(int cpu) { int sibling; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); @@ -976,15 +998,15 @@ static void remove_siblinginfo(int cpu) * last thread sibling in this cpu core going down */ if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) - c[sibling].booted_cores--; + cpu_data(sibling).booted_cores--; } for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); cpus_clear(per_cpu(cpu_sibling_map, cpu)); cpus_clear(per_cpu(cpu_core_map, cpu)); - c[cpu].phys_proc_id = 0; - c[cpu].cpu_core_id = 0; + c->phys_proc_id = 0; + c->cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); } diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c index f8fafe527ff..622bb026828 100644 --- a/arch/x86/kernel/suspend_64.c +++ b/arch/x86/kernel/suspend_64.c @@ -32,9 +32,9 @@ void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ - asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); - asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); - asm volatile ("str %0" : "=m" (ctxt->tr)); + store_gdt((struct desc_ptr *)&ctxt->gdt_limit); + store_idt((struct desc_ptr *)&ctxt->idt_limit); + store_tr(ctxt->tr); /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ /* @@ -91,8 +91,9 @@ void __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ - asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); - asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); + load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); + load_idt((const struct desc_ptr *)&ctxt->idt_limit); + /* * segment registers diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 1e9d57256eb..cc9acace7e2 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -63,6 +63,9 @@ int panic_on_unrecovered_nmi; +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -288,33 +291,9 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = 1; - unsigned long esp; - unsigned short ss, gs; - - esp = (unsigned long) (®s->esp); - savesegment(ss, ss); - savesegment(gs, gs); - if (user_mode_vm(regs)) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } + print_modules(); - printk(KERN_EMERG "CPU: %d\n" - KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" - KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); - printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + __show_registers(regs, 0); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", TASK_COMM_LEN, current->comm, task_pid_nr(current), current_thread_info(), current, task_thread_info(current)); @@ -322,14 +301,14 @@ void show_registers(struct pt_regs *regs) * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (in_kernel) { + if (!user_mode_vm(regs)) { u8 *eip; unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->esp, KERN_EMERG); printk(KERN_EMERG "Code: "); @@ -374,11 +353,11 @@ int is_valid_bugaddr(unsigned long eip) void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -389,13 +368,14 @@ void die(const char * str, struct pt_regs * regs, long err) if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + __raw_spin_lock(&die.lock); + raw_local_save_flags(flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); } else - local_save_flags(flags); + raw_local_save_flags(flags); if (++die.lock_owner_depth < 3) { unsigned long esp; @@ -439,7 +419,8 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + __raw_spin_unlock(&die.lock); + raw_local_irq_restore(flags); if (!regs) return; @@ -1142,6 +1123,8 @@ static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) void __init trap_init(void) { + int i; + #ifdef CONFIG_EISA void __iomem *p = ioremap(0x0FFFD9, 4); if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { @@ -1201,6 +1184,11 @@ void __init trap_init(void) set_system_gate(SYSCALL_VECTOR,&system_call); + /* Reserve all the builtin and the syscall vector. */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) + set_bit(i, used_vectors); + set_bit(SYSCALL_VECTOR, used_vectors); + /* * Should be a barrier for any external CPU state. */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index b4a9b3db199..df690c3fa45 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -462,7 +462,7 @@ void out_of_line_bug(void) EXPORT_SYMBOL(out_of_line_bug); #endif -static DEFINE_SPINLOCK(die_lock); +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; @@ -474,13 +474,13 @@ unsigned __kprobes long oops_begin(void) oops_enter(); /* racy, but better than risking deadlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); - if (!spin_trylock(&die_lock)) { + if (!__raw_spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + __raw_spin_lock(&die_lock); } die_nest_count++; die_owner = cpu; @@ -494,12 +494,10 @@ void __kprobes oops_end(unsigned long flags) die_owner = -1; bust_spinlocks(0); die_nest_count--; - if (die_nest_count) - /* We still own the lock */ - local_irq_restore(flags); - else + if (!die_nest_count) /* Nest count reaches zero, release the lock. */ - spin_unlock_irqrestore(&die_lock, flags); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); if (panic_on_oops) panic("Fatal exception"); oops_exit(); diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index e87a3939ed4..b8a7cf67143 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -181,8 +181,8 @@ int recalibrate_cpu_khz(void) if (cpu_has_tsc) { cpu_khz = calculate_cpu_khz(); tsc_khz = cpu_khz; - cpu_data[0].loops_per_jiffy = - cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_data(0).loops_per_jiffy = + cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); return 0; } else @@ -215,7 +215,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) return 0; } ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; + loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy; cpu_khz_ref = cpu_khz; } @@ -223,7 +223,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || (val == CPUFREQ_RESUMECHANGE)) { if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data[freq->cpu].loops_per_jiffy = + cpu_data(freq->cpu).loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 9f22e542c37..9c70af45b42 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -73,13 +73,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_freqs *freq = data; unsigned long *lpj, dummy; - if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; lpj = &dummy; if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP - lpj = &cpu_data[freq->cpu].loops_per_jiffy; + lpj = &cpu_data(freq->cpu).loops_per_jiffy; #else lpj = &boot_cpu_data.loops_per_jiffy; #endif diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 585541ca1a7..78f2250963a 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -48,7 +48,7 @@ ({unsigned long v; \ extern char __vsyscall_0; \ asm("" : "=r" (v) : "0" (x)); \ - ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) + ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) /* * vsyscall_gtod_data contains data that is : @@ -291,7 +291,7 @@ static void __cpuinit vsyscall_set_cpu(int cpu) #ifdef CONFIG_NUMA node = cpu_to_node(cpu); #endif - if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) + if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* Store cpu number in limit so that it can be loaded quickly diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index f6edb11364d..952e7a89c2a 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -82,7 +82,7 @@ inline void __const_udelay(unsigned long xloops) __asm__("mull %0" :"=d" (xloops), "=&a" (d0) :"1" (xloops), "0" - (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); + (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); __delay(++xloops); } diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index 2dbebd30834..0ebbfb9e7c7 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -40,7 +40,8 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { - __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1); + __delay(((xloops * HZ * + cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1); } EXPORT_SYMBOL(__const_udelay); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index e4928aa6bdf..f93a730b44d 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -36,8 +36,8 @@ static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR /* per CPU data structure (for /proc/cpuinfo et al), visible externally * indexed physically */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU(cpuinfo_x86, cpu_info) __cacheline_aligned; +EXPORT_PER_CPU_SYMBOL(cpu_info); /* physical ID of the CPU used to boot the system */ unsigned char boot_cpu_id; @@ -430,7 +430,7 @@ find_smp_config(void) void __init smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c=&cpu_data[id]; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; @@ -634,7 +634,7 @@ do_boot_cpu(__u8 cpu) cpu, smp_processor_id())); printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); + print_cpu_info(&cpu_data(cpu)); wmb(); cpu_set(cpu, cpu_callout_map); cpu_set(cpu, cpu_present_map); @@ -683,7 +683,7 @@ smp_boot_cpus(void) */ smp_store_cpu_info(boot_cpu_id); printk("CPU%d: ", boot_cpu_id); - print_cpu_info(&cpu_data[boot_cpu_id]); + print_cpu_info(&cpu_data(boot_cpu_id)); if(is_cpu_quad()) { /* booting on a Quad CPU */ @@ -714,7 +714,7 @@ smp_boot_cpus(void) unsigned long bogosum = 0; for (i = 0; i < NR_CPUS; i++) if (cpu_isset(i, cpu_online_map)) - bogosum += cpu_data[i].loops_per_jiffy; + bogosum += cpu_data(i).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, bogosum/(500000/HZ), diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index 4d3e538c57a..b695d70e998 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c @@ -564,7 +564,8 @@ no_context: * it's allocated already. */ if ((page >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT)) { + && (page & _PAGE_PRESENT) + && !(page & _PAGE_PSE)) { page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)]; diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index 5149ac136a5..00be7f0a71b 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -169,7 +169,7 @@ void dump_pagetable(unsigned long address) pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd)) goto ret; + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; @@ -285,7 +285,6 @@ static int vmalloc_fault(unsigned long address) return 0; } -static int page_fault_trace; int show_unhandled_signals = 1; /* @@ -354,10 +353,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (likely(regs->eflags & X86_EFLAGS_IF)) local_irq_enable(); - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -488,7 +483,7 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", + "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); @@ -621,10 +616,3 @@ void vmalloc_sync_all(void) BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); } - -static int __init enable_pagefaulttrace(char *str) -{ - page_fault_trace = 1; - return 1; -} -__setup("pagefaulttrace", enable_pagefaulttrace); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 5eec5e56d07..3d6926ba899 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -612,7 +612,7 @@ void __init init_cpu_to_node(void) { int i; for (i = 0; i < NR_CPUS; i++) { - u8 apicid = x86_cpu_to_apicid[i]; + u8 apicid = x86_cpu_to_apicid_init[i]; if (apicid == BAD_APICID) continue; if (apicid_to_node[apicid] == NUMA_NO_NODE) diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index c049ce414f0..0ed046a187f 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -13,25 +13,45 @@ #include <linux/mm.h> #include <asm/ptrace.h> #include <asm/uaccess.h> +#include <asm/stacktrace.h> -struct frame_head { - struct frame_head * ebp; - unsigned long ret; -} __attribute__((packed)); +static void backtrace_warning_symbol(void *data, char *msg, + unsigned long symbol) +{ + /* Ignore warnings */ +} -static struct frame_head * -dump_kernel_backtrace(struct frame_head * head) +static void backtrace_warning(void *data, char *msg) { - oprofile_add_trace(head->ret); + /* Ignore warnings */ +} - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (head >= head->ebp) - return NULL; +static int backtrace_stack(void *data, char *name) +{ + /* Yes, we want all stacks */ + return 0; +} + +static void backtrace_address(void *data, unsigned long addr) +{ + unsigned int *depth = data; - return head->ebp; + if ((*depth)--) + oprofile_add_trace(addr); } +static struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; + +struct frame_head { + struct frame_head *ebp; + unsigned long ret; +} __attribute__((packed)); + static struct frame_head * dump_user_backtrace(struct frame_head * head) { @@ -53,72 +73,16 @@ dump_user_backtrace(struct frame_head * head) return bufhead[0].ebp; } -/* - * | | /\ Higher addresses - * | | - * --------------- stack base (address of current_thread_info) - * | thread info | - * . . - * | stack | - * --------------- saved regs->ebp value if valid (frame_head address) - * . . - * --------------- saved regs->rsp value if x86_64 - * | | - * --------------- struct pt_regs * stored on stack if 32-bit - * | | - * . . - * | | - * --------------- %esp - * | | - * | | \/ Lower addresses - * - * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the - * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other - * exceptions use special stacks, maintained by the interrupt stack table - * (IST). These stacks are set up in trap_init() in - * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point - * to the kernel stack; instead, it points to some location on the NMI - * stack. On the other hand, regs->rsp is the stack pointer saved when the - * NMI occurred. (2) For 32-bit, regs->esp is not valid because the - * processor does not save %esp on the kernel stack when interrupts occur - * in the kernel mode. - */ -#ifdef CONFIG_FRAME_POINTER -static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) -{ - unsigned long headaddr = (unsigned long)head; -#ifdef CONFIG_X86_64 - unsigned long stack = (unsigned long)regs->rsp; -#else - unsigned long stack = (unsigned long)regs; -#endif - unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE; - - return headaddr > stack && headaddr < stack_base; -} -#else -/* without fp, it's just junk */ -static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) -{ - return 0; -} -#endif - - void x86_backtrace(struct pt_regs * const regs, unsigned int depth) { - struct frame_head *head; - -#ifdef CONFIG_X86_64 - head = (struct frame_head *)regs->rbp; -#else - head = (struct frame_head *)regs->ebp; -#endif + struct frame_head *head = (struct frame_head *)frame_pointer(regs); + unsigned long stack = stack_pointer(regs); if (!user_mode_vm(regs)) { - while (depth-- && valid_kernel_stack(head, regs)) - head = dump_kernel_backtrace(head); + if (depth) + dump_trace(NULL, regs, (unsigned long *)stack, + &backtrace_ops, &depth); return; } diff --git a/arch/x86_64/.gitignore b/arch/x86_64/.gitignore new file mode 100644 index 00000000000..36ef4c374d2 --- /dev/null +++ b/arch/x86_64/.gitignore @@ -0,0 +1 @@ +boot diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 25785b23df8..aab25f3ba3c 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -723,7 +723,7 @@ config ARCH_HIBERNATION_HEADER source "drivers/acpi/Kconfig" -source "arch/x86/kernel/cpufreq/Kconfig" +source "arch/x86/kernel/cpu/cpufreq/Kconfig_64" source "drivers/cpuidle/Kconfig" @@ -768,9 +768,9 @@ source "fs/Kconfig.binfmt" config IA32_EMULATION bool "IA32 Emulation" help - Include code to run 32-bit programs under a 64-bit kernel. You should likely - turn this on, unless you're 100% sure that you don't have any 32-bit programs - left. + Include code to run 32-bit programs under a 64-bit kernel. You should + likely turn this on, unless you're 100% sure that you don't have any + 32-bit programs left. config IA32_AOUT tristate "IA32 a.out support" diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 03e1ede27b8..6d89ab762ff 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -74,7 +74,7 @@ KBUILD_CFLAGS += $(cflags-y) CFLAGS_KERNEL += $(cflags-kernel-y) KBUILD_AFLAGS += -m64 -head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task_64.o +head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ core-y += arch/x86/kernel/ \ @@ -97,9 +97,9 @@ BOOTIMAGE := arch/x86/boot/bzImage KBUILD_IMAGE := $(BOOTIMAGE) bzImage: vmlinux - $(Q)mkdir -p $(objtree)/arch/x86_64/boot - $(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage $(Q)$(MAKE) $(build)=$(boot) $(BOOTIMAGE) + $(Q)mkdir -p $(objtree)/arch/x86_64/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage bzlilo: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zlilo diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index a7351084f2f..235a51e328c 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -422,12 +422,6 @@ static int map_lsapic_id(struct acpi_subtable_header *entry, return 0; } -#ifdef CONFIG_IA64 -#define arch_cpu_to_apicid ia64_cpu_to_sapicid -#else -#define arch_cpu_to_apicid x86_cpu_to_apicid -#endif - static int map_madt_entry(u32 acpi_id) { unsigned long madt_end, entry; @@ -501,7 +495,7 @@ static int get_cpu_id(acpi_handle handle, u32 acpi_id) return apic_id; for (i = 0; i < NR_CPUS; ++i) { - if (arch_cpu_to_apicid[i] == apic_id) + if (cpu_physical_id(i) == apic_id) return i; } return -1; diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 6f66551d9e5..5c82ec7f8bb 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -150,7 +150,7 @@ static struct coretemp_data *coretemp_update_device(struct device *dev) static int __devinit coretemp_probe(struct platform_device *pdev) { struct coretemp_data *data; - struct cpuinfo_x86 *c = &(cpu_data)[pdev->id]; + struct cpuinfo_x86 *c = &cpu_data(pdev->id); int err; u32 eax, edx; @@ -359,7 +359,7 @@ static int __init coretemp_init(void) struct pdev_entry *p, *n; /* quick check if we run Intel */ - if (cpu_data[0].x86_vendor != X86_VENDOR_INTEL) + if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL) goto exit; err = platform_driver_register(&coretemp_driver); @@ -367,7 +367,7 @@ static int __init coretemp_init(void) goto exit; for_each_online_cpu(i) { - struct cpuinfo_x86 *c = &(cpu_data)[i]; + struct cpuinfo_x86 *c = &cpu_data(i); /* check if family 6, models e, f, 16 */ if ((c->cpuid_level < 0) || (c->x86 != 0x6) || diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index f17e771e42f..3330667280b 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -200,7 +200,7 @@ static u8 find_vrm(u8 eff_family, u8 eff_model, u8 eff_stepping, u8 vendor) u8 vid_which_vrm(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); u32 eax; u8 eff_family, eff_model, eff_stepping, vrm_ret; diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index ec1b6cfefcd..bfc6061f155 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -136,7 +136,8 @@ static int gameport_measure_speed(struct gameport *gameport) } gameport_close(gameport); - return (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx); + return (cpu_data(raw_smp_processor_id()).loops_per_jiffy * + (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx); #else diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 34a8c60a254..9b6fbf044fd 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -267,6 +267,12 @@ config DM_MULTIPATH_RDAC ---help--- Multipath support for LSI/Engenio RDAC. +config DM_MULTIPATH_HP + tristate "HP MSA multipath support (EXPERIMENTAL)" + depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL + ---help--- + Multipath support for HP MSA (Active/Passive) series hardware. + config DM_DELAY tristate "I/O delaying target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL @@ -276,4 +282,10 @@ config DM_DELAY If unsure, say N. +config DM_UEVENT + bool "DM uevents (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Generate udev events for DM events. + endif # MD diff --git a/drivers/md/Makefile b/drivers/md/Makefile index c49366cdc05..d9aa7edb878 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -8,6 +8,7 @@ dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-mirror-objs := dm-log.o dm-raid1.o dm-rdac-objs := dm-mpath-rdac.o +dm-hp-sw-objs := dm-mpath-hp-sw.o md-mod-objs := md.o bitmap.o raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ raid6int1.o raid6int2.o raid6int4.o \ @@ -35,6 +36,7 @@ obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o +obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o @@ -48,6 +50,10 @@ ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec -mabi=altivec endif +ifeq ($(CONFIG_DM_UEVENT),y) +dm-mod-objs += dm-uevent.o +endif + targets += raid6int1.c $(obj)/raid6int1.c: UNROLL := 1 $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h index 3f7b827649e..d4509be0fe6 100644 --- a/drivers/md/dm-bio-list.h +++ b/drivers/md/dm-bio-list.h @@ -21,11 +21,6 @@ static inline int bio_list_empty(const struct bio_list *bl) return bl->head == NULL; } -#define BIO_LIST_INIT { .head = NULL, .tail = NULL } - -#define BIO_LIST(bl) \ - struct bio_list bl = BIO_LIST_INIT - static inline void bio_list_init(struct bio_list *bl) { bl->head = bl->tail = NULL; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 64fee90bb68..b41f945df8a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -36,7 +36,6 @@ struct dm_crypt_io { struct work_struct work; atomic_t pending; int error; - int post_process; }; /* @@ -57,7 +56,7 @@ struct crypt_config; struct crypt_iv_operations { int (*ctr)(struct crypt_config *cc, struct dm_target *ti, - const char *opts); + const char *opts); void (*dtr)(struct crypt_config *cc); const char *(*status)(struct crypt_config *cc); int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector); @@ -80,6 +79,8 @@ struct crypt_config { mempool_t *page_pool; struct bio_set *bs; + struct workqueue_struct *io_queue; + struct workqueue_struct *crypt_queue; /* * crypto related data */ @@ -137,7 +138,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector) } static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, - const char *opts) + const char *opts) { struct crypto_cipher *essiv_tfm; struct crypto_hash *hash_tfm; @@ -175,6 +176,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, if (err) { ti->error = "Error calculating hash in ESSIV"; + kfree(salt); return err; } @@ -188,7 +190,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, if (crypto_cipher_blocksize(essiv_tfm) != crypto_blkcipher_ivsize(cc->tfm)) { ti->error = "Block size of ESSIV cipher does " - "not match IV size of block cipher"; + "not match IV size of block cipher"; crypto_free_cipher(essiv_tfm); kfree(salt); return -EINVAL; @@ -319,10 +321,10 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out, return r; } -static void -crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx, - struct bio *bio_out, struct bio *bio_in, - sector_t sector, int write) +static void crypt_convert_init(struct crypt_config *cc, + struct convert_context *ctx, + struct bio *bio_out, struct bio *bio_in, + sector_t sector, int write) { ctx->bio_in = bio_in; ctx->bio_out = bio_out; @@ -338,7 +340,7 @@ crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx, * Encrypt / decrypt data from one bio to another one (can be the same one) */ static int crypt_convert(struct crypt_config *cc, - struct convert_context *ctx) + struct convert_context *ctx) { int r = 0; @@ -370,7 +372,7 @@ static int crypt_convert(struct crypt_config *cc, } r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length, - ctx->write, ctx->sector); + ctx->write, ctx->sector); if (r < 0) break; @@ -380,13 +382,13 @@ static int crypt_convert(struct crypt_config *cc, return r; } - static void dm_crypt_bio_destructor(struct bio *bio) - { +static void dm_crypt_bio_destructor(struct bio *bio) +{ struct dm_crypt_io *io = bio->bi_private; struct crypt_config *cc = io->target->private; bio_free(bio, cc->bs); - } +} /* * Generate a new unfragmented bio with the given size @@ -458,7 +460,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. */ -static void dec_pending(struct dm_crypt_io *io, int error) +static void crypt_dec_pending(struct dm_crypt_io *io, int error) { struct crypt_config *cc = (struct crypt_config *) io->target->private; @@ -474,18 +476,36 @@ static void dec_pending(struct dm_crypt_io *io, int error) } /* - * kcryptd: + * kcryptd/kcryptd_io: * * Needed because it would be very unwise to do decryption in an * interrupt context. + * + * kcryptd performs the actual encryption or decryption. + * + * kcryptd_io performs the IO submission. + * + * They must be separated as otherwise the final stages could be + * starved by new requests which can block in the first stages due + * to memory allocation. */ -static struct workqueue_struct *_kcryptd_workqueue; static void kcryptd_do_work(struct work_struct *work); +static void kcryptd_do_crypt(struct work_struct *work); static void kcryptd_queue_io(struct dm_crypt_io *io) { + struct crypt_config *cc = io->target->private; + INIT_WORK(&io->work, kcryptd_do_work); - queue_work(_kcryptd_workqueue, &io->work); + queue_work(cc->io_queue, &io->work); +} + +static void kcryptd_queue_crypt(struct dm_crypt_io *io) +{ + struct crypt_config *cc = io->target->private; + + INIT_WORK(&io->work, kcryptd_do_crypt); + queue_work(cc->crypt_queue, &io->work); } static void crypt_endio(struct bio *clone, int error) @@ -508,13 +528,12 @@ static void crypt_endio(struct bio *clone, int error) } bio_put(clone); - io->post_process = 1; - kcryptd_queue_io(io); + kcryptd_queue_crypt(io); return; out: bio_put(clone); - dec_pending(io, error); + crypt_dec_pending(io, error); } static void clone_init(struct dm_crypt_io *io, struct bio *clone) @@ -544,7 +563,7 @@ static void process_read(struct dm_crypt_io *io) */ clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs); if (unlikely(!clone)) { - dec_pending(io, -ENOMEM); + crypt_dec_pending(io, -ENOMEM); return; } @@ -579,7 +598,7 @@ static void process_write(struct dm_crypt_io *io) while (remaining) { clone = crypt_alloc_buffer(io, remaining); if (unlikely(!clone)) { - dec_pending(io, -ENOMEM); + crypt_dec_pending(io, -ENOMEM); return; } @@ -589,7 +608,7 @@ static void process_write(struct dm_crypt_io *io) if (unlikely(crypt_convert(cc, &ctx) < 0)) { crypt_free_buffer_pages(cc, clone); bio_put(clone); - dec_pending(io, -EIO); + crypt_dec_pending(io, -EIO); return; } @@ -624,17 +643,23 @@ static void process_read_endio(struct dm_crypt_io *io) crypt_convert_init(cc, &ctx, io->base_bio, io->base_bio, io->base_bio->bi_sector - io->target->begin, 0); - dec_pending(io, crypt_convert(cc, &ctx)); + crypt_dec_pending(io, crypt_convert(cc, &ctx)); } static void kcryptd_do_work(struct work_struct *work) { struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - if (io->post_process) - process_read_endio(io); - else if (bio_data_dir(io->base_bio) == READ) + if (bio_data_dir(io->base_bio) == READ) process_read(io); +} + +static void kcryptd_do_crypt(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + + if (bio_data_dir(io->base_bio) == READ) + process_read_endio(io); else process_write(io); } @@ -690,7 +715,7 @@ static int crypt_set_key(struct crypt_config *cc, char *key) cc->key_size = key_size; /* initial settings */ if ((!key_size && strcmp(key, "-")) || - (key_size && crypt_decode_key(cc->key, key, key_size) < 0)) + (key_size && crypt_decode_key(cc->key, key, key_size) < 0)) return -EINVAL; set_bit(DM_CRYPT_KEY_VALID, &cc->flags); @@ -746,7 +771,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (crypt_set_key(cc, argv[1])) { ti->error = "Error decoding key"; - goto bad1; + goto bad_cipher; } /* Compatiblity mode for old dm-crypt cipher strings */ @@ -757,19 +782,19 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (strcmp(chainmode, "ecb") && !ivmode) { ti->error = "This chaining mode requires an IV mechanism"; - goto bad1; + goto bad_cipher; } - if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, - cipher) >= CRYPTO_MAX_ALG_NAME) { + if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", + chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) { ti->error = "Chain mode + cipher name is too long"; - goto bad1; + goto bad_cipher; } tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { ti->error = "Error allocating crypto tfm"; - goto bad1; + goto bad_cipher; } strcpy(cc->cipher, cipher); @@ -793,18 +818,18 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_gen_ops = &crypt_iv_null_ops; else { ti->error = "Invalid IV mode"; - goto bad2; + goto bad_ivmode; } if (cc->iv_gen_ops && cc->iv_gen_ops->ctr && cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0) - goto bad2; + goto bad_ivmode; cc->iv_size = crypto_blkcipher_ivsize(tfm); if (cc->iv_size) /* at least a 64 bit sector number should fit in our buffer */ cc->iv_size = max(cc->iv_size, - (unsigned int)(sizeof(u64) / sizeof(u8))); + (unsigned int)(sizeof(u64) / sizeof(u8))); else { if (cc->iv_gen_ops) { DMWARN("Selected cipher does not support IVs"); @@ -817,13 +842,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); if (!cc->io_pool) { ti->error = "Cannot allocate crypt io mempool"; - goto bad3; + goto bad_slab_pool; } cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); if (!cc->page_pool) { ti->error = "Cannot allocate page mempool"; - goto bad4; + goto bad_page_pool; } cc->bs = bioset_create(MIN_IOS, MIN_IOS); @@ -834,25 +859,25 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) { ti->error = "Error setting key"; - goto bad5; + goto bad_device; } if (sscanf(argv[2], "%llu", &tmpll) != 1) { ti->error = "Invalid iv_offset sector"; - goto bad5; + goto bad_device; } cc->iv_offset = tmpll; if (sscanf(argv[4], "%llu", &tmpll) != 1) { ti->error = "Invalid device sector"; - goto bad5; + goto bad_device; } cc->start = tmpll; if (dm_get_device(ti, argv[3], cc->start, ti->len, - dm_table_get_mode(ti->table), &cc->dev)) { + dm_table_get_mode(ti->table), &cc->dev)) { ti->error = "Device lookup failed"; - goto bad5; + goto bad_device; } if (ivmode && cc->iv_gen_ops) { @@ -861,27 +886,45 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL); if (!cc->iv_mode) { ti->error = "Error kmallocing iv_mode string"; - goto bad5; + goto bad_ivmode_string; } strcpy(cc->iv_mode, ivmode); } else cc->iv_mode = NULL; + cc->io_queue = create_singlethread_workqueue("kcryptd_io"); + if (!cc->io_queue) { + ti->error = "Couldn't create kcryptd io queue"; + goto bad_io_queue; + } + + cc->crypt_queue = create_singlethread_workqueue("kcryptd"); + if (!cc->crypt_queue) { + ti->error = "Couldn't create kcryptd queue"; + goto bad_crypt_queue; + } + ti->private = cc; return 0; -bad5: +bad_crypt_queue: + destroy_workqueue(cc->io_queue); +bad_io_queue: + kfree(cc->iv_mode); +bad_ivmode_string: + dm_put_device(ti, cc->dev); +bad_device: bioset_free(cc->bs); bad_bs: mempool_destroy(cc->page_pool); -bad4: +bad_page_pool: mempool_destroy(cc->io_pool); -bad3: +bad_slab_pool: if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); -bad2: +bad_ivmode: crypto_free_blkcipher(tfm); -bad1: +bad_cipher: /* Must zero key material before freeing */ memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); kfree(cc); @@ -892,7 +935,8 @@ static void crypt_dtr(struct dm_target *ti) { struct crypt_config *cc = (struct crypt_config *) ti->private; - flush_workqueue(_kcryptd_workqueue); + destroy_workqueue(cc->io_queue); + destroy_workqueue(cc->crypt_queue); bioset_free(cc->bs); mempool_destroy(cc->page_pool); @@ -918,9 +962,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, io = mempool_alloc(cc->io_pool, GFP_NOIO); io->target = ti; io->base_bio = bio; - io->error = io->post_process = 0; + io->error = 0; atomic_set(&io->pending, 0); - kcryptd_queue_io(io); + + if (bio_data_dir(io->base_bio) == READ) + kcryptd_queue_io(io); + else + kcryptd_queue_crypt(io); return DM_MAPIO_SUBMITTED; } @@ -1037,25 +1085,12 @@ static int __init dm_crypt_init(void) if (!_crypt_io_pool) return -ENOMEM; - _kcryptd_workqueue = create_workqueue("kcryptd"); - if (!_kcryptd_workqueue) { - r = -ENOMEM; - DMERR("couldn't create kcryptd"); - goto bad1; - } - r = dm_register_target(&crypt_target); if (r < 0) { DMERR("register failed %d", r); - goto bad2; + kmem_cache_destroy(_crypt_io_pool); } - return 0; - -bad2: - destroy_workqueue(_kcryptd_workqueue); -bad1: - kmem_cache_destroy(_crypt_io_pool); return r; } @@ -1066,7 +1101,6 @@ static void __exit dm_crypt_exit(void) if (r < 0) DMERR("unregister failed %d", r); - destroy_workqueue(_kcryptd_workqueue); kmem_cache_destroy(_crypt_io_pool); } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 6928c136d3c..bdd37f881c4 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -83,7 +83,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) struct dm_delay_info *delayed, *next; unsigned long next_expires = 0; int start_timer = 0; - BIO_LIST(flush_bios); + struct bio_list flush_bios = { }; mutex_lock(&delayed_bios_lock); list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { @@ -163,34 +163,32 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - if (argc == 3) { - dc->dev_write = NULL; + dc->dev_write = NULL; + if (argc == 3) goto out; - } if (sscanf(argv[4], "%llu", &tmpll) != 1) { ti->error = "Invalid write device sector"; - goto bad; + goto bad_dev_read; } dc->start_write = tmpll; if (sscanf(argv[5], "%u", &dc->write_delay) != 1) { ti->error = "Invalid write delay"; - goto bad; + goto bad_dev_read; } if (dm_get_device(ti, argv[3], dc->start_write, ti->len, dm_table_get_mode(ti->table), &dc->dev_write)) { ti->error = "Write device lookup failed"; - dm_put_device(ti, dc->dev_read); - goto bad; + goto bad_dev_read; } out: dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache); if (!dc->delayed_pool) { DMERR("Couldn't create delayed bio pool."); - goto bad; + goto bad_dev_write; } setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc); @@ -203,6 +201,11 @@ out: ti->private = dc; return 0; +bad_dev_write: + if (dc->dev_write) + dm_put_device(ti, dc->dev_write); +bad_dev_read: + dm_put_device(ti, dc->dev_read); bad: kfree(dc); return -EINVAL; @@ -305,7 +308,7 @@ static int delay_status(struct dm_target *ti, status_type_t type, (unsigned long long) dc->start_read, dc->read_delay); if (dc->dev_write) - DMEMIT("%s %llu %u", dc->dev_write->name, + DMEMIT(" %s %llu %u", dc->dev_write->name, (unsigned long long) dc->start_write, dc->write_delay); break; diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c index 342517261ec..6b91b9ab1d4 100644 --- a/drivers/md/dm-emc.c +++ b/drivers/md/dm-emc.c @@ -81,7 +81,7 @@ static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size) } if (bio_add_page(bio, page, data_size, 0) != data_size) { - DMERR("get_failover_bio: alloc_page() failed."); + DMERR("get_failover_bio: bio_add_page() failed."); __free_page(page); bio_put(bio); return NULL; @@ -211,12 +211,10 @@ fail_path: static struct emc_handler *alloc_emc_handler(void) { - struct emc_handler *h = kmalloc(sizeof(*h), GFP_KERNEL); + struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL); - if (h) { - memset(h, 0, sizeof(*h)); + if (h) spin_lock_init(&h->lock); - } return h; } diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c index baafaaba4d4..2ee84d8aa0b 100644 --- a/drivers/md/dm-hw-handler.c +++ b/drivers/md/dm-hw-handler.c @@ -91,12 +91,10 @@ void dm_put_hw_handler(struct hw_handler_type *hwht) static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht) { - struct hwh_internal *hwhi = kmalloc(sizeof(*hwhi), GFP_KERNEL); + struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL); - if (hwhi) { - memset(hwhi, 0, sizeof(*hwhi)); + if (hwhi) hwhi->hwht = *hwht; - } return hwhi; } diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h index e0832e6fcf3..46809dcb121 100644 --- a/drivers/md/dm-hw-handler.h +++ b/drivers/md/dm-hw-handler.h @@ -58,5 +58,6 @@ unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio); #define MP_FAIL_PATH 1 #define MP_BYPASS_PG 2 #define MP_ERROR_IO 4 /* Don't retry this I/O */ +#define MP_RETRY 8 #endif diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b441d82c338..138200bf5e0 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -700,7 +700,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) int r; char *new_name = (char *) param + param->data_start; - if (new_name < (char *) (param + 1) || + if (new_name < (char *) param->data || invalid_str(new_name, (void *) param + param_size)) { DMWARN("Invalid new logical volume name supplied."); return -EINVAL; @@ -726,7 +726,7 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) if (!md) return -ENXIO; - if (geostr < (char *) (param + 1) || + if (geostr < (char *) param->data || invalid_str(geostr, (void *) param + param_size)) { DMWARN("Invalid geometry supplied."); goto out; @@ -1233,7 +1233,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size) if (r) goto out; - if (tmsg < (struct dm_target_msg *) (param + 1) || + if (tmsg < (struct dm_target_msg *) param->data || invalid_str(tmsg->message, (void *) param + param_size)) { DMWARN("Invalid target message parameters."); r = -EINVAL; @@ -1358,7 +1358,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) if (tmp.data_size < sizeof(tmp)) return -EINVAL; - dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); + dmi = vmalloc(tmp.data_size); if (!dmi) return -ENOMEM; @@ -1515,3 +1515,35 @@ void dm_interface_exit(void) dm_hash_exit(); } + +/** + * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers + * @md: Pointer to mapped_device + * @name: Buffer (size DM_NAME_LEN) for name + * @uuid: Buffer (size DM_UUID_LEN) for uuid or empty string if uuid not defined + */ +int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid) +{ + int r = 0; + struct hash_cell *hc; + + if (!md) + return -ENXIO; + + dm_get(md); + down_read(&_hash_lock); + hc = dm_get_mdptr(md); + if (!hc || hc->md != md) { + r = -ENXIO; + goto out; + } + + strcpy(name, hc->name); + strcpy(uuid, hc->uuid ? : ""); + +out: + up_read(&_hash_lock); + dm_put(md); + + return r; +} diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a66428d860f..072ee4353ea 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -696,7 +696,7 @@ static struct dirty_log_type _disk_type = { .module = THIS_MODULE, .ctr = disk_ctr, .dtr = disk_dtr, - .suspend = disk_flush, + .postsuspend = disk_flush, .resume = disk_resume, .get_region_size = core_get_region_size, .is_clean = core_is_clean, diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h index 86a301c8daf..3fae87eb596 100644 --- a/drivers/md/dm-log.h +++ b/drivers/md/dm-log.h @@ -32,7 +32,8 @@ struct dirty_log_type { * There are times when we don't want the log to touch * the disk. */ - int (*suspend)(struct dirty_log *log); + int (*presuspend)(struct dirty_log *log); + int (*postsuspend)(struct dirty_log *log); int (*resume)(struct dirty_log *log); /* diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c new file mode 100644 index 00000000000..204bf42c944 --- /dev/null +++ b/drivers/md/dm-mpath-hp-sw.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2005 Mike Christie, All rights reserved. + * Copyright (C) 2007 Red Hat, Inc. All rights reserved. + * Authors: Mike Christie + * Dave Wysochanski + * + * This file is released under the GPL. + * + * This module implements the specific path activation code for + * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive) + * storage arrays. + * These storage arrays have controller-based failover, not + * LUN-based failover. However, LUN-based failover is the design + * of dm-multipath. Thus, this module is written for LUN-based failover. + */ +#include <linux/blkdev.h> +#include <linux/list.h> +#include <linux/types.h> +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_dbg.h> + +#include "dm.h" +#include "dm-hw-handler.h" + +#define DM_MSG_PREFIX "multipath hp-sw" +#define DM_HP_HWH_NAME "hp-sw" +#define DM_HP_HWH_VER "1.0.0" + +struct hp_sw_context { + unsigned char sense[SCSI_SENSE_BUFFERSIZE]; +}; + +/* + * hp_sw_error_is_retryable - Is an HP-specific check condition retryable? + * @req: path activation request + * + * Examine error codes of request and determine whether the error is retryable. + * Some error codes are already retried by scsi-ml (see + * scsi_decide_disposition), but some HP specific codes are not. + * The intent of this routine is to supply the logic for the HP specific + * check conditions. + * + * Returns: + * 1 - command completed with retryable error + * 0 - command completed with non-retryable error + * + * Possible optimizations + * 1. More hardware-specific error codes + */ +static int hp_sw_error_is_retryable(struct request *req) +{ + /* + * NOT_READY is known to be retryable + * For now we just dump out the sense data and call it retryable + */ + if (status_byte(req->errors) == CHECK_CONDITION) + __scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len); + + /* + * At this point we don't have complete information about all the error + * codes from this hardware, so we are just conservative and retry + * when in doubt. + */ + return 1; +} + +/* + * hp_sw_end_io - Completion handler for HP path activation. + * @req: path activation request + * @error: scsi-ml error + * + * Check sense data, free request structure, and notify dm that + * pg initialization has completed. + * + * Context: scsi-ml softirq + * + */ +static void hp_sw_end_io(struct request *req, int error) +{ + struct dm_path *path = req->end_io_data; + unsigned err_flags = 0; + + if (!error) { + DMDEBUG("%s path activation command - success", + path->dev->name); + goto out; + } + + if (hp_sw_error_is_retryable(req)) { + DMDEBUG("%s path activation command - retry", + path->dev->name); + err_flags = MP_RETRY; + goto out; + } + + DMWARN("%s path activation fail - error=0x%x", + path->dev->name, error); + err_flags = MP_FAIL_PATH; + +out: + req->end_io_data = NULL; + __blk_put_request(req->q, req); + dm_pg_init_complete(path, err_flags); +} + +/* + * hp_sw_get_request - Allocate an HP specific path activation request + * @path: path on which request will be sent (needed for request queue) + * + * The START command is used for path activation request. + * These arrays are controller-based failover, not LUN based. + * One START command issued to a single path will fail over all + * LUNs for the same controller. + * + * Possible optimizations + * 1. Make timeout configurable + * 2. Preallocate request + */ +static struct request *hp_sw_get_request(struct dm_path *path) +{ + struct request *req; + struct block_device *bdev = path->dev->bdev; + struct request_queue *q = bdev_get_queue(bdev); + struct hp_sw_context *h = path->hwhcontext; + + req = blk_get_request(q, WRITE, GFP_NOIO); + if (!req) + goto out; + + req->timeout = 60 * HZ; + + req->errors = 0; + req->cmd_type = REQ_TYPE_BLOCK_PC; + req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; + req->end_io_data = path; + req->sense = h->sense; + memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); + + memset(&req->cmd, 0, BLK_MAX_CDB); + req->cmd[0] = START_STOP; + req->cmd[4] = 1; + req->cmd_len = COMMAND_SIZE(req->cmd[0]); + +out: + return req; +} + +/* + * hp_sw_pg_init - HP path activation implementation. + * @hwh: hardware handler specific data + * @bypassed: unused; is the path group bypassed? (see dm-mpath.c) + * @path: path to send initialization command + * + * Send an HP-specific path activation command on 'path'. + * Do not try to optimize in any way, just send the activation command. + * More than one path activation command may be sent to the same controller. + * This seems to work fine for basic failover support. + * + * Possible optimizations + * 1. Detect an in-progress activation request and avoid submitting another one + * 2. Model the controller and only send a single activation request at a time + * 3. Determine the state of a path before sending an activation request + * + * Context: kmpathd (see process_queued_ios() in dm-mpath.c) + */ +static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed, + struct dm_path *path) +{ + struct request *req; + struct hp_sw_context *h; + + path->hwhcontext = hwh->context; + h = hwh->context; + + req = hp_sw_get_request(path); + if (!req) { + DMERR("%s path activation command - allocation fail", + path->dev->name); + goto retry; + } + + DMDEBUG("%s path activation command - sent", path->dev->name); + + blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io); + return; + +retry: + dm_pg_init_complete(path, MP_RETRY); +} + +static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv) +{ + struct hp_sw_context *h; + + h = kmalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + hwh->context = h; + + return 0; +} + +static void hp_sw_destroy(struct hw_handler *hwh) +{ + struct hp_sw_context *h = hwh->context; + + kfree(h); +} + +static struct hw_handler_type hp_sw_hwh = { + .name = DM_HP_HWH_NAME, + .module = THIS_MODULE, + .create = hp_sw_create, + .destroy = hp_sw_destroy, + .pg_init = hp_sw_pg_init, +}; + +static int __init hp_sw_init(void) +{ + int r; + + r = dm_register_hw_handler(&hp_sw_hwh); + if (r < 0) + DMERR("register failed %d", r); + else + DMINFO("version " DM_HP_HWH_VER " loaded"); + + return r; +} + +static void __exit hp_sw_exit(void) +{ + int r; + + r = dm_unregister_hw_handler(&hp_sw_hwh); + if (r < 0) + DMERR("unregister failed %d", r); +} + +module_init(hp_sw_init); +module_exit(hp_sw_exit); + +MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support"); +MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DM_HP_HWH_VER); diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c index 16b16134577..e04eb5c697f 100644 --- a/drivers/md/dm-mpath-rdac.c +++ b/drivers/md/dm-mpath-rdac.c @@ -664,20 +664,21 @@ static struct hw_handler_type rdac_handler = { static int __init rdac_init(void) { - int r = dm_register_hw_handler(&rdac_handler); - - if (r < 0) { - DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r); - return r; - } + int r; rdac_wkqd = create_singlethread_workqueue("rdac_wkqd"); if (!rdac_wkqd) { DMERR("Failed to create workqueue rdac_wkqd."); - dm_unregister_hw_handler(&rdac_handler); return -ENOMEM; } + r = dm_register_hw_handler(&rdac_handler); + if (r < 0) { + DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r); + destroy_workqueue(rdac_wkqd); + return r; + } + DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER); return 0; } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 31056abca89..24b2b1e32fa 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -10,6 +10,7 @@ #include "dm-hw-handler.h" #include "dm-bio-list.h" #include "dm-bio-record.h" +#include "dm-uevent.h" #include <linux/ctype.h> #include <linux/init.h> @@ -75,6 +76,8 @@ struct multipath { unsigned queue_io; /* Must we queue all I/O? */ unsigned queue_if_no_path; /* Queue I/O if last path fails? */ unsigned saved_queue_if_no_path;/* Saved state during suspension */ + unsigned pg_init_retries; /* Number of times to retry pg_init */ + unsigned pg_init_count; /* Number of times pg_init called */ struct work_struct process_queued_ios; struct bio_list queued_ios; @@ -225,6 +228,8 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath) m->pg_init_required = 0; m->queue_io = 0; } + + m->pg_init_count = 0; } static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg) @@ -424,6 +429,7 @@ static void process_queued_ios(struct work_struct *work) must_queue = 0; if (m->pg_init_required && !m->pg_init_in_progress) { + m->pg_init_count++; m->pg_init_required = 0; m->pg_init_in_progress = 1; init_required = 1; @@ -689,9 +695,11 @@ static int parse_features(struct arg_set *as, struct multipath *m) int r; unsigned argc; struct dm_target *ti = m->ti; + const char *param_name; static struct param _params[] = { - {0, 1, "invalid number of feature args"}, + {0, 3, "invalid number of feature args"}, + {1, 50, "pg_init_retries must be between 1 and 50"}, }; r = read_param(_params, shift(as), &argc, &ti->error); @@ -701,12 +709,28 @@ static int parse_features(struct arg_set *as, struct multipath *m) if (!argc) return 0; - if (!strnicmp(shift(as), MESG_STR("queue_if_no_path"))) - return queue_if_no_path(m, 1, 0); - else { + do { + param_name = shift(as); + argc--; + + if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { + r = queue_if_no_path(m, 1, 0); + continue; + } + + if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && + (argc >= 1)) { + r = read_param(_params + 1, shift(as), + &m->pg_init_retries, &ti->error); + argc--; + continue; + } + ti->error = "Unrecognised multipath feature request"; - return -EINVAL; - } + r = -EINVAL; + } while (argc && !r); + + return r; } static int multipath_ctr(struct dm_target *ti, unsigned int argc, @@ -834,6 +858,9 @@ static int fail_path(struct pgpath *pgpath) if (pgpath == m->current_pgpath) m->current_pgpath = NULL; + dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, + pgpath->path.dev->name, m->nr_valid_paths); + queue_work(kmultipathd, &m->trigger_event); out: @@ -873,6 +900,9 @@ static int reinstate_path(struct pgpath *pgpath) if (!m->nr_valid_paths++ && m->queue_size) queue_work(kmultipathd, &m->process_queued_ios); + dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, + pgpath->path.dev->name, m->nr_valid_paths); + queue_work(kmultipathd, &m->trigger_event); out: @@ -976,6 +1006,26 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) } /* + * Should we retry pg_init immediately? + */ +static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) +{ + unsigned long flags; + int limit_reached = 0; + + spin_lock_irqsave(&m->lock, flags); + + if (m->pg_init_count <= m->pg_init_retries) + m->pg_init_required = 1; + else + limit_reached = 1; + + spin_unlock_irqrestore(&m->lock, flags); + + return limit_reached; +} + +/* * pg_init must call this when it has completed its initialisation */ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) @@ -985,8 +1035,14 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) struct multipath *m = pg->m; unsigned long flags; - /* We insist on failing the path if the PG is already bypassed. */ - if (err_flags && pg->bypassed) + /* + * If requested, retry pg_init until maximum number of retries exceeded. + * If retry not requested and PG already bypassed, always fail the path. + */ + if (err_flags & MP_RETRY) { + if (pg_init_limit_reached(m, pgpath)) + err_flags |= MP_FAIL_PATH; + } else if (err_flags && pg->bypassed) err_flags |= MP_FAIL_PATH; if (err_flags & MP_FAIL_PATH) @@ -996,7 +1052,7 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) bypass_pg(m, pg, 1); spin_lock_irqsave(&m->lock, flags); - if (err_flags) { + if (err_flags & ~MP_RETRY) { m->current_pgpath = NULL; m->current_pg = NULL; } else if (!m->pg_init_required) @@ -1148,11 +1204,15 @@ static int multipath_status(struct dm_target *ti, status_type_t type, /* Features */ if (type == STATUSTYPE_INFO) - DMEMIT("1 %u ", m->queue_size); - else if (m->queue_if_no_path) - DMEMIT("1 queue_if_no_path "); - else - DMEMIT("0 "); + DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); + else { + DMEMIT("%u ", m->queue_if_no_path + + (m->pg_init_retries > 0) * 2); + if (m->queue_if_no_path) + DMEMIT("queue_if_no_path "); + if (m->pg_init_retries) + DMEMIT("pg_init_retries %u ", m->pg_init_retries); + } if (hwh->type && hwh->type->status) sz += hwh->type->status(hwh, type, result + sz, maxlen - sz); diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c index f10a0c89b3f..ca1bb636a3e 100644 --- a/drivers/md/dm-path-selector.c +++ b/drivers/md/dm-path-selector.c @@ -94,12 +94,10 @@ out: static struct ps_internal *_alloc_path_selector(struct path_selector_type *pst) { - struct ps_internal *psi = kmalloc(sizeof(*psi), GFP_KERNEL); + struct ps_internal *psi = kzalloc(sizeof(*psi), GFP_KERNEL); - if (psi) { - memset(psi, 0, sizeof(*psi)); + if (psi) psi->pst = *pst; - } return psi; } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d09ff15490a..31123d4a6b9 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -19,6 +19,7 @@ #include <linux/time.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> +#include <linux/log2.h> #define DM_MSG_PREFIX "raid1" #define DM_IO_PAGES 64 @@ -113,6 +114,7 @@ struct region { * Mirror set structures. *---------------------------------------------------------------*/ struct mirror { + struct mirror_set *ms; atomic_t error_count; struct dm_dev *dev; sector_t offset; @@ -974,6 +976,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { ti->error = "Error creating dirty region hash"; + dm_io_client_destroy(ms->io_client); kfree(ms); return NULL; } @@ -994,7 +997,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, static inline int _check_region_size(struct dm_target *ti, uint32_t size) { - return !(size % (PAGE_SIZE >> 9) || (size & (size - 1)) || + return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) || size > ti->len); } @@ -1015,6 +1018,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, return -ENXIO; } + ms->mirror[mirror].ms = ms; ms->mirror[mirror].offset = offset; return 0; @@ -1163,16 +1167,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ms->kmirrord_wq = create_singlethread_workqueue("kmirrord"); if (!ms->kmirrord_wq) { DMERR("couldn't start kmirrord"); - free_context(ms, ti, m); - return -ENOMEM; + r = -ENOMEM; + goto err_free_context; } INIT_WORK(&ms->kmirrord_work, do_mirror); r = parse_features(ms, argc, argv, &args_used); - if (r) { - free_context(ms, ti, ms->nr_mirrors); - return r; - } + if (r) + goto err_destroy_wq; argv += args_used; argc -= args_used; @@ -1188,19 +1190,22 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (argc) { ti->error = "Too many mirror arguments"; - free_context(ms, ti, ms->nr_mirrors); - return -EINVAL; + r = -EINVAL; + goto err_destroy_wq; } r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); - if (r) { - destroy_workqueue(ms->kmirrord_wq); - free_context(ms, ti, ms->nr_mirrors); - return r; - } + if (r) + goto err_destroy_wq; wake(ms); return 0; + +err_destroy_wq: + destroy_workqueue(ms->kmirrord_wq); +err_free_context: + free_context(ms, ti, ms->nr_mirrors); + return r; } static void mirror_dtr(struct dm_target *ti) @@ -1302,7 +1307,7 @@ static void mirror_postsuspend(struct dm_target *ti) wait_event(_kmirrord_recovery_stopped, !atomic_read(&ms->rh.recovery_in_flight)); - if (log->type->suspend && log->type->suspend(log)) + if (log->type->postsuspend && log->type->postsuspend(log)) /* FIXME: need better error handling */ DMWARN("log suspend failed"); } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 98a633f3d6b..cee16fadd9e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/log2.h> #include "dm-snap.h" #include "dm-bio-list.h" @@ -415,7 +416,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, chunk_size = round_up(chunk_size, PAGE_SIZE >> 9); /* Check chunk_size is a power of 2 */ - if (chunk_size & (chunk_size - 1)) { + if (!is_power_of_2(chunk_size)) { *error = "Chunk size is not a power of 2"; return -EINVAL; } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 51f5e076001..969944a8aba 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -11,6 +11,7 @@ #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/slab.h> +#include <linux/log2.h> #define DM_MSG_PREFIX "striped" @@ -99,7 +100,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * chunk_size is a power of two */ - if (!chunk_size || (chunk_size & (chunk_size - 1)) || + if (!is_power_of_2(chunk_size) || (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { ti->error = "Invalid chunk size"; return -EINVAL; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index fbe477bb2c6..8939e610508 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -213,12 +213,11 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, int mode, unsigned num_targets, struct mapped_device *md) { - struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; - memset(t, 0, sizeof(*t)); INIT_LIST_HEAD(&t->devices); atomic_set(&t->holders, 1); diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 477a041a41c..835cf95b857 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -88,12 +88,10 @@ void dm_put_target_type(struct target_type *t) static struct tt_internal *alloc_target(struct target_type *t) { - struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL); + struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL); - if (ti) { - memset(ti, 0, sizeof(*ti)); + if (ti) ti->tt = *t; - } return ti; } diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c new file mode 100644 index 00000000000..50377e5dc2a --- /dev/null +++ b/drivers/md/dm-uevent.c @@ -0,0 +1,222 @@ +/* + * Device Mapper Uevent Support (dm-uevent) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2007 + * Author: Mike Anderson <andmike@linux.vnet.ibm.com> + */ +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/kobject.h> +#include <linux/dm-ioctl.h> + +#include "dm.h" +#include "dm-uevent.h" + +#define DM_MSG_PREFIX "uevent" + +static const struct { + enum dm_uevent_type type; + enum kobject_action action; + char *name; +} _dm_uevent_type_names[] = { + {DM_UEVENT_PATH_FAILED, KOBJ_CHANGE, "PATH_FAILED"}, + {DM_UEVENT_PATH_REINSTATED, KOBJ_CHANGE, "PATH_REINSTATED"}, +}; + +static struct kmem_cache *_dm_event_cache; + +struct dm_uevent { + struct mapped_device *md; + enum kobject_action action; + struct kobj_uevent_env ku_env; + struct list_head elist; + char name[DM_NAME_LEN]; + char uuid[DM_UUID_LEN]; +}; + +static void dm_uevent_free(struct dm_uevent *event) +{ + kmem_cache_free(_dm_event_cache, event); +} + +static struct dm_uevent *dm_uevent_alloc(struct mapped_device *md) +{ + struct dm_uevent *event; + + event = kmem_cache_zalloc(_dm_event_cache, GFP_ATOMIC); + if (!event) + return NULL; + + INIT_LIST_HEAD(&event->elist); + event->md = md; + + return event; +} + +static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md, + struct dm_target *ti, + enum kobject_action action, + const char *dm_action, + const char *path, + unsigned nr_valid_paths) +{ + struct dm_uevent *event; + + event = dm_uevent_alloc(md); + if (!event) { + DMERR("%s: dm_uevent_alloc() failed", __FUNCTION__); + goto err_nomem; + } + + event->action = action; + + if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) { + DMERR("%s: add_uevent_var() for DM_TARGET failed", + __FUNCTION__); + goto err_add; + } + + if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) { + DMERR("%s: add_uevent_var() for DM_ACTION failed", + __FUNCTION__); + goto err_add; + } + + if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u", + dm_next_uevent_seq(md))) { + DMERR("%s: add_uevent_var() for DM_SEQNUM failed", + __FUNCTION__); + goto err_add; + } + + if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) { + DMERR("%s: add_uevent_var() for DM_PATH failed", __FUNCTION__); + goto err_add; + } + + if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d", + nr_valid_paths)) { + DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed", + __FUNCTION__); + goto err_add; + } + + return event; + +err_add: + dm_uevent_free(event); +err_nomem: + return ERR_PTR(-ENOMEM); +} + +/** + * dm_send_uevents - send uevents for given list + * + * @events: list of events to send + * @kobj: kobject generating event + * + */ +void dm_send_uevents(struct list_head *events, struct kobject *kobj) +{ + int r; + struct dm_uevent *event, *next; + + list_for_each_entry_safe(event, next, events, elist) { + list_del_init(&event->elist); + + /* + * Need to call dm_copy_name_and_uuid from here for now. + * Context of previous var adds and locking used for + * hash_cell not compatable. + */ + if (dm_copy_name_and_uuid(event->md, event->name, + event->uuid)) { + DMERR("%s: dm_copy_name_and_uuid() failed", + __FUNCTION__); + goto uevent_free; + } + + if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) { + DMERR("%s: add_uevent_var() for DM_NAME failed", + __FUNCTION__); + goto uevent_free; + } + + if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) { + DMERR("%s: add_uevent_var() for DM_UUID failed", + __FUNCTION__); + goto uevent_free; + } + + r = kobject_uevent_env(kobj, event->action, event->ku_env.envp); + if (r) + DMERR("%s: kobject_uevent_env failed", __FUNCTION__); +uevent_free: + dm_uevent_free(event); + } +} +EXPORT_SYMBOL_GPL(dm_send_uevents); + +/** + * dm_path_uevent - called to create a new path event and queue it + * + * @event_type: path event type enum + * @ti: pointer to a dm_target + * @path: string containing pathname + * @nr_valid_paths: number of valid paths remaining + * + */ +void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti, + const char *path, unsigned nr_valid_paths) +{ + struct mapped_device *md = dm_table_get_md(ti->table); + struct dm_uevent *event; + + if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) { + DMERR("%s: Invalid event_type %d", __FUNCTION__, event_type); + goto out; + } + + event = dm_build_path_uevent(md, ti, + _dm_uevent_type_names[event_type].action, + _dm_uevent_type_names[event_type].name, + path, nr_valid_paths); + if (IS_ERR(event)) + goto out; + + dm_uevent_add(md, &event->elist); + +out: + dm_put(md); +} +EXPORT_SYMBOL_GPL(dm_path_uevent); + +int dm_uevent_init(void) +{ + _dm_event_cache = KMEM_CACHE(dm_uevent, 0); + if (!_dm_event_cache) + return -ENOMEM; + + DMINFO("version 1.0.3"); + + return 0; +} + +void dm_uevent_exit(void) +{ + kmem_cache_destroy(_dm_event_cache); +} diff --git a/drivers/md/dm-uevent.h b/drivers/md/dm-uevent.h new file mode 100644 index 00000000000..2eccc8bd671 --- /dev/null +++ b/drivers/md/dm-uevent.h @@ -0,0 +1,59 @@ +/* + * Device Mapper Uevent Support + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2007 + * Author: Mike Anderson <andmike@linux.vnet.ibm.com> + */ +#ifndef DM_UEVENT_H +#define DM_UEVENT_H + +enum dm_uevent_type { + DM_UEVENT_PATH_FAILED, + DM_UEVENT_PATH_REINSTATED, +}; + +#ifdef CONFIG_DM_UEVENT + +extern int dm_uevent_init(void); +extern void dm_uevent_exit(void); +extern void dm_send_uevents(struct list_head *events, struct kobject *kobj); +extern void dm_path_uevent(enum dm_uevent_type event_type, + struct dm_target *ti, const char *path, + unsigned nr_valid_paths); + +#else + +static inline int dm_uevent_init(void) +{ + return 0; +} +static inline void dm_uevent_exit(void) +{ +} +static inline void dm_send_uevents(struct list_head *events, + struct kobject *kobj) +{ +} +static inline void dm_path_uevent(enum dm_uevent_type event_type, + struct dm_target *ti, const char *path, + unsigned nr_valid_paths) +{ +} + +#endif /* CONFIG_DM_UEVENT */ + +#endif /* DM_UEVENT_H */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d837d37f620..07cbbb8eb3e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -7,6 +7,7 @@ #include "dm.h" #include "dm-bio-list.h" +#include "dm-uevent.h" #include <linux/init.h> #include <linux/module.h> @@ -112,6 +113,9 @@ struct mapped_device { */ atomic_t event_nr; wait_queue_head_t eventq; + atomic_t uevent_seq; + struct list_head uevent_list; + spinlock_t uevent_lock; /* Protect access to uevent_list */ /* * freeze/thaw support require holding onto a super block @@ -143,11 +147,19 @@ static int __init local_init(void) return -ENOMEM; } + r = dm_uevent_init(); + if (r) { + kmem_cache_destroy(_tio_cache); + kmem_cache_destroy(_io_cache); + return r; + } + _major = major; r = register_blkdev(_major, _name); if (r < 0) { kmem_cache_destroy(_tio_cache); kmem_cache_destroy(_io_cache); + dm_uevent_exit(); return r; } @@ -162,6 +174,7 @@ static void local_exit(void) kmem_cache_destroy(_tio_cache); kmem_cache_destroy(_io_cache); unregister_blkdev(_major, _name); + dm_uevent_exit(); _major = 0; @@ -751,15 +764,13 @@ static void __clone_and_map(struct clone_info *ci) /* * Split the bio into several clones. */ -static void __split_bio(struct mapped_device *md, struct bio *bio) +static int __split_bio(struct mapped_device *md, struct bio *bio) { struct clone_info ci; ci.map = dm_get_table(md); - if (!ci.map) { - bio_io_error(bio); - return; - } + if (unlikely(!ci.map)) + return -EIO; ci.md = md; ci.bio = bio; @@ -779,6 +790,8 @@ static void __split_bio(struct mapped_device *md, struct bio *bio) /* drop the extra reference count */ dec_pending(ci.io, 0); dm_table_put(ci.map); + + return 0; } /*----------------------------------------------------------------- * CRUD END @@ -790,7 +803,7 @@ static void __split_bio(struct mapped_device *md, struct bio *bio) */ static int dm_request(struct request_queue *q, struct bio *bio) { - int r; + int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; @@ -815,18 +828,11 @@ static int dm_request(struct request_queue *q, struct bio *bio) while (test_bit(DMF_BLOCK_IO, &md->flags)) { up_read(&md->io_lock); - if (bio_rw(bio) == READA) { - bio_io_error(bio); - return 0; - } - - r = queue_io(md, bio); - if (r < 0) { - bio_io_error(bio); - return 0; + if (bio_rw(bio) != READA) + r = queue_io(md, bio); - } else if (r == 0) - return 0; /* deferred successfully */ + if (r <= 0) + goto out_req; /* * We're in a while loop, because someone could suspend @@ -835,8 +841,13 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); } - __split_bio(md, bio); + r = __split_bio(md, bio); up_read(&md->io_lock); + +out_req: + if (r < 0) + bio_io_error(bio); + return 0; } @@ -977,6 +988,9 @@ static struct mapped_device *alloc_dev(int minor) atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); + atomic_set(&md->uevent_seq, 0); + INIT_LIST_HEAD(&md->uevent_list); + spin_lock_init(&md->uevent_lock); md->queue = blk_alloc_queue(GFP_KERNEL); if (!md->queue) @@ -1044,12 +1058,14 @@ static struct mapped_device *alloc_dev(int minor) return NULL; } +static void unlock_fs(struct mapped_device *md); + static void free_dev(struct mapped_device *md) { int minor = md->disk->first_minor; if (md->suspended_bdev) { - thaw_bdev(md->suspended_bdev, NULL); + unlock_fs(md); bdput(md->suspended_bdev); } mempool_destroy(md->tio_pool); @@ -1073,8 +1089,16 @@ static void free_dev(struct mapped_device *md) */ static void event_callback(void *context) { + unsigned long flags; + LIST_HEAD(uevents); struct mapped_device *md = (struct mapped_device *) context; + spin_lock_irqsave(&md->uevent_lock, flags); + list_splice_init(&md->uevent_list, &uevents); + spin_unlock_irqrestore(&md->uevent_lock, flags); + + dm_send_uevents(&uevents, &md->disk->kobj); + atomic_inc(&md->event_nr); wake_up(&md->eventq); } @@ -1233,7 +1257,8 @@ static void __flush_deferred_io(struct mapped_device *md, struct bio *c) while (c) { n = c->bi_next; c->bi_next = NULL; - __split_bio(md, c); + if (__split_bio(md, c)) + bio_io_error(c); c = n; } } @@ -1491,6 +1516,11 @@ out: /*----------------------------------------------------------------- * Event notification. *---------------------------------------------------------------*/ +uint32_t dm_next_uevent_seq(struct mapped_device *md) +{ + return atomic_add_return(1, &md->uevent_seq); +} + uint32_t dm_get_event_nr(struct mapped_device *md) { return atomic_read(&md->event_nr); @@ -1502,6 +1532,15 @@ int dm_wait_event(struct mapped_device *md, int event_nr) (event_nr != atomic_read(&md->event_nr))); } +void dm_uevent_add(struct mapped_device *md, struct list_head *elist) +{ + unsigned long flags; + + spin_lock_irqsave(&md->uevent_lock, flags); + list_add(elist, &md->uevent_list); + spin_unlock_irqrestore(&md->uevent_lock, flags); +} + /* * The gendisk is only valid as long as you have a reference * count on 'md'. diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index 7e052378c47..f3831f31223 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -198,7 +198,7 @@ struct kcopyd_job { * These fields are only used if the job has been split * into more manageable parts. */ - struct semaphore lock; + struct mutex lock; atomic_t sub_jobs; sector_t progress; }; @@ -456,7 +456,7 @@ static void segment_complete(int read_err, sector_t count = 0; struct kcopyd_job *job = (struct kcopyd_job *) context; - down(&job->lock); + mutex_lock(&job->lock); /* update the error */ if (read_err) @@ -480,7 +480,7 @@ static void segment_complete(int read_err, job->progress += count; } } - up(&job->lock); + mutex_unlock(&job->lock); if (count) { int i; @@ -562,7 +562,7 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, dispatch_job(job); else { - init_MUTEX(&job->lock); + mutex_init(&job->lock); job->progress = 0; split_job(job); } diff --git a/drivers/video/geode/video_gx.c b/drivers/video/geode/video_gx.c index 7f3f18d0671..febf09c6349 100644 --- a/drivers/video/geode/video_gx.c +++ b/drivers/video/geode/video_gx.c @@ -127,7 +127,7 @@ static void gx_set_dclk_frequency(struct fb_info *info) int timeout = 1000; /* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */ - if (cpu_data->x86_mask == 1) { + if (cpu_data(0).x86_mask == 1) { pll_table = gx_pll_table_14MHz; pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz); } else { diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index af8b235d405..11833f4caea 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -168,7 +168,8 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct spin_unlock(&inode->i_lock); spin_unlock(&clp->cl_lock); - kfree(delegation); + if (delegation != NULL) + nfs_free_delegation(delegation); return status; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8ec7fbd8240..35334539d94 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -562,6 +562,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) nfs_fattr_init(&fattr); desc->entry = &my_entry; + nfs_block_sillyrename(dentry); while(!desc->entry->eof) { res = readdir_search_pagecache(desc); @@ -592,6 +593,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } } + nfs_unblock_sillyrename(dentry); unlock_kernel(); if (res > 0) res = 0; @@ -866,6 +868,7 @@ struct dentry_operations nfs_dentry_operations = { static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct dentry *res; + struct dentry *parent; struct inode *inode = NULL; int error; struct nfs_fh fhandle; @@ -894,26 +897,31 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru goto out_unlock; } + parent = dentry->d_parent; + /* Protect against concurrent sillydeletes */ + nfs_block_sillyrename(parent); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error == -ENOENT) goto no_entry; if (error < 0) { res = ERR_PTR(error); - goto out_unlock; + goto out_unblock_sillyrename; } inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); res = (struct dentry *)inode; if (IS_ERR(res)) - goto out_unlock; + goto out_unblock_sillyrename; no_entry: res = d_materialise_unique(dentry, inode); if (res != NULL) { if (IS_ERR(res)) - goto out_unlock; + goto out_unblock_sillyrename; dentry = res; } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); +out_unblock_sillyrename: + nfs_unblock_sillyrename(parent); out_unlock: unlock_kernel(); out: diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d29f90d00aa..b3bb89f7d5d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -131,7 +131,7 @@ nfs_file_release(struct inode *inode, struct file *filp) { /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) - filemap_fdatawrite(filp->f_mapping); + nfs_wb_all(filp->f_path.dentry->d_inode); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return NFS_PROTO(inode)->file_release(inode, filp); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6d2f2a3eccf..db5d96dc610 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -514,7 +514,7 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) return ctx; } -void put_nfs_open_context(struct nfs_open_context *ctx) +static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) { struct inode *inode = ctx->path.dentry->d_inode; @@ -522,8 +522,12 @@ void put_nfs_open_context(struct nfs_open_context *ctx) return; list_del(&ctx->list); spin_unlock(&inode->i_lock); - if (ctx->state != NULL) - nfs4_close_state(&ctx->path, ctx->state, ctx->mode); + if (ctx->state != NULL) { + if (wait) + nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); + else + nfs4_close_state(&ctx->path, ctx->state, ctx->mode); + } if (ctx->cred != NULL) put_rpccred(ctx->cred); dput(ctx->path.dentry); @@ -531,6 +535,16 @@ void put_nfs_open_context(struct nfs_open_context *ctx) kfree(ctx); } +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + __put_nfs_open_context(ctx, 0); +} + +static void put_nfs_open_context_sync(struct nfs_open_context *ctx) +{ + __put_nfs_open_context(ctx, 1); +} + /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages @@ -577,7 +591,7 @@ static void nfs_file_clear_open_context(struct file *filp) spin_lock(&inode->i_lock); list_move_tail(&ctx->list, &NFS_I(inode)->open_files); spin_unlock(&inode->i_lock); - put_nfs_open_context(ctx); + put_nfs_open_context_sync(ctx); } } @@ -1169,6 +1183,9 @@ static void init_once(struct kmem_cache * cachep, void *foo) INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); nfsi->ncommit = 0; nfsi->npages = 0; + atomic_set(&nfsi->silly_count, 1); + INIT_HLIST_HEAD(&nfsi->silly_list); + init_waitqueue_head(&nfsi->waitqueue); nfs4_init_once(nfsi); } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d2802b1ca3b..b35069a2aa9 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -178,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct path *path, struct nfs4_state *state); +extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); @@ -209,6 +209,7 @@ extern void nfs4_drop_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); +extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); @@ -235,6 +236,7 @@ extern struct svc_version nfs4_callback_version1; #else #define nfs4_close_state(a, b, c) do { } while (0) +#define nfs4_close_sync(a, b, c) do { } while (0) #endif /* CONFIG_NFS_V4 */ #endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cb99fd90a9a..f03d9d5f5ba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1305,7 +1305,7 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct path *path, struct nfs4_state *state) +int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; @@ -1333,8 +1333,11 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state) task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); if (IS_ERR(task)) return PTR_ERR(task); + status = 0; + if (wait) + status = rpc_wait_for_completion_task(task); rpc_put_task(task); - return 0; + return status; out_free_calldata: kfree(calldata); out: @@ -1365,13 +1368,14 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct } ret = PTR_ERR(filp); out_close: - nfs4_close_state(path, state, nd->intent.open.flags); + nfs4_close_sync(path, state, nd->intent.open.flags); return ret; } struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct dentry *parent; struct path path = { .mnt = nd->mnt, .dentry = dentry, @@ -1394,6 +1398,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); if (IS_ERR(cred)) return (struct dentry *)cred; + parent = dentry->d_parent; + /* Protect against concurrent sillydeletes */ + nfs_block_sillyrename(parent); state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { @@ -1401,12 +1408,14 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) d_add(dentry, NULL); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } + nfs_unblock_sillyrename(parent); return (struct dentry *)state; } res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) path.dentry = res; nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); + nfs_unblock_sillyrename(parent); nfs4_intent_set_file(nd, &path, state); return res; } @@ -1444,7 +1453,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st nfs4_intent_set_file(nd, &path, state); return 1; } - nfs4_close_state(&path, state, openflags); + nfs4_close_sync(&path, state, openflags); out_drop: d_drop(dentry); return 0; @@ -1898,7 +1907,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) status = nfs4_intent_set_file(nd, &path, state); else - nfs4_close_state(&path, state, flags); + nfs4_close_sync(&path, state, flags); out: return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bfb36261cec..23a9a36556b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -425,7 +425,7 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Close the current file. */ -void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) +static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mode, int wait) { struct nfs4_state_owner *owner = state->owner; int call_close = 0; @@ -466,7 +466,17 @@ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) nfs4_put_open_state(state); nfs4_put_state_owner(owner); } else - nfs4_do_close(path, state); + nfs4_do_close(path, state, wait); +} + +void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) +{ + __nfs4_close(path, state, mode, 0); +} + +void nfs4_close_sync(struct path *path, struct nfs4_state *state, mode_t mode) +{ + __nfs4_close(path, state, mode, 1); } /* diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 1aed850d18f..6ecd46c967c 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -14,6 +14,7 @@ struct nfs_unlinkdata { + struct hlist_node list; struct nfs_removeargs args; struct nfs_removeres res; struct inode *dir; @@ -52,6 +53,20 @@ static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data) return 0; } +static void nfs_free_dname(struct nfs_unlinkdata *data) +{ + kfree(data->args.name.name); + data->args.name.name = NULL; + data->args.name.len = 0; +} + +static void nfs_dec_sillycount(struct inode *dir) +{ + struct nfs_inode *nfsi = NFS_I(dir); + if (atomic_dec_return(&nfsi->silly_count) == 1) + wake_up(&nfsi->waitqueue); +} + /** * nfs_async_unlink_init - Initialize the RPC info * task: rpc_task of the sillydelete @@ -95,6 +110,8 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) static void nfs_async_unlink_release(void *calldata) { struct nfs_unlinkdata *data = calldata; + + nfs_dec_sillycount(data->dir); nfs_free_unlinkdata(data); } @@ -104,33 +121,100 @@ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_release = nfs_async_unlink_release, }; -static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) { struct rpc_task *task; + struct dentry *alias; + + alias = d_lookup(parent, &data->args.name); + if (alias != NULL) { + int ret = 0; + /* + * Hey, we raced with lookup... See if we need to transfer + * the sillyrename information to the aliased dentry. + */ + nfs_free_dname(data); + spin_lock(&alias->d_lock); + if (!(alias->d_flags & DCACHE_NFSFS_RENAMED)) { + alias->d_fsdata = data; + alias->d_flags ^= DCACHE_NFSFS_RENAMED; + ret = 1; + } + spin_unlock(&alias->d_lock); + nfs_dec_sillycount(dir); + dput(alias); + return ret; + } + data->dir = igrab(dir); + if (!data->dir) { + nfs_dec_sillycount(dir); + return 0; + } + data->args.fh = NFS_FH(dir); + nfs_fattr_init(&data->res.dir_attr); + + task = rpc_run_task(NFS_CLIENT(dir), RPC_TASK_ASYNC, &nfs_unlink_ops, data); + if (!IS_ERR(task)) + rpc_put_task(task); + return 1; +} + +static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +{ struct dentry *parent; struct inode *dir; + int ret = 0; - if (nfs_copy_dname(dentry, data) < 0) - goto out_free; parent = dget_parent(dentry); if (parent == NULL) goto out_free; - dir = igrab(parent->d_inode); + dir = parent->d_inode; + if (nfs_copy_dname(dentry, data) == 0) + goto out_dput; + /* Non-exclusive lock protects against concurrent lookup() calls */ + spin_lock(&dir->i_lock); + if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { + /* Deferred delete */ + hlist_add_head(&data->list, &NFS_I(dir)->silly_list); + spin_unlock(&dir->i_lock); + ret = 1; + goto out_dput; + } + spin_unlock(&dir->i_lock); + ret = nfs_do_call_unlink(parent, dir, data); +out_dput: dput(parent); - if (dir == NULL) - goto out_free; +out_free: + return ret; +} - data->dir = dir; - data->args.fh = NFS_FH(dir); - nfs_fattr_init(&data->res.dir_attr); +void nfs_block_sillyrename(struct dentry *dentry) +{ + struct nfs_inode *nfsi = NFS_I(dentry->d_inode); - task = rpc_run_task(NFS_CLIENT(dir), RPC_TASK_ASYNC, &nfs_unlink_ops, data); - if (!IS_ERR(task)) - rpc_put_task(task); - return 1; -out_free: - return 0; + wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1); +} + +void nfs_unblock_sillyrename(struct dentry *dentry) +{ + struct inode *dir = dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(dir); + struct nfs_unlinkdata *data; + + atomic_inc(&nfsi->silly_count); + spin_lock(&dir->i_lock); + while (!hlist_empty(&nfsi->silly_list)) { + if (!atomic_inc_not_zero(&nfsi->silly_count)) + break; + data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list); + hlist_del(&data->list); + spin_unlock(&dir->i_lock); + if (nfs_do_call_unlink(dentry, dir, data) == 0) + nfs_free_unlinkdata(data); + spin_lock(&dir->i_lock); + } + spin_unlock(&dir->i_lock); } /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0cf9d1cd9bd..89527a487ed 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -174,8 +174,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int return; if (count != nfs_page_length(page)) return; - if (count != PAGE_CACHE_SIZE) - zero_user_page(page, count, PAGE_CACHE_SIZE - count, KM_USER0); SetPageUptodate(page); } @@ -627,7 +625,8 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, return ERR_PTR(error); } spin_unlock(&inode->i_lock); - return new; + req = new; + goto zero_page; } spin_unlock(&inode->i_lock); @@ -655,13 +654,23 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, if (offset < req->wb_offset) { req->wb_offset = offset; req->wb_pgbase = offset; - req->wb_bytes = rqend - req->wb_offset; + req->wb_bytes = max(end, rqend) - req->wb_offset; + goto zero_page; } if (end > rqend) req->wb_bytes = end - req->wb_offset; return req; +zero_page: + /* If this page might potentially be marked as up to date, + * then we need to zero any uninitalised data. */ + if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE + && !PageUptodate(req->wb_page)) + zero_user_page(req->wb_page, req->wb_bytes, + PAGE_CACHE_SIZE - req->wb_bytes, + KM_USER0); + return req; } int nfs_flush_incompatible(struct file *file, struct page *page) diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h index 125179adf04..723493e6c85 100644 --- a/include/asm-x86/acpi_32.h +++ b/include/asm-x86/acpi_32.h @@ -81,11 +81,7 @@ int __acpi_release_global_lock(unsigned int *lock); :"=r"(n_hi), "=r"(n_lo) \ :"0"(n_hi), "1"(n_lo)) -#ifdef CONFIG_X86_IO_APIC -extern void check_acpi_pci(void); -#else -static inline void check_acpi_pci(void) { } -#endif +extern void early_quirks(void); #ifdef CONFIG_ACPI extern int acpi_lapic; diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h index 53cb96b68a6..66ba7987184 100644 --- a/include/asm-x86/compat.h +++ b/include/asm-x86/compat.h @@ -6,6 +6,7 @@ */ #include <linux/types.h> #include <linux/sched.h> +#include <asm/user32.h> #define COMPAT_USER_HZ 100 @@ -181,6 +182,11 @@ struct compat_shmid64_ds { }; /* + * The type of struct elf_prstatus.pr_reg in compatible core dumps. + */ +typedef struct user_regs_struct32 compat_elf_gregset_t; + +/* * A pointer passed in from user mode. This should not * be used for syscall parameters, just declare them * as pointers because the syscall entry code will have diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h index ac991b5ca0f..7d9c938e69f 100644 --- a/include/asm-x86/desc_64.h +++ b/include/asm-x86/desc_64.h @@ -20,6 +20,16 @@ extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; #define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8)) #define clear_LDT() asm volatile("lldt %w0"::"r" (0)) +static inline unsigned long __store_tr(void) +{ + unsigned long tr; + + asm volatile ("str %w0":"=r" (tr)); + return tr; +} + +#define store_tr(tr) (tr) = __store_tr() + /* * This is the ldt that every process will get unless we need * something other than this. @@ -31,6 +41,16 @@ extern struct desc_ptr cpu_gdt_descr[]; /* the cpu gdt accessor */ #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) +static inline void load_gdt(const struct desc_ptr *ptr) +{ + asm volatile("lgdt %w0"::"m" (*ptr)); +} + +static inline void store_gdt(struct desc_ptr *ptr) +{ + asm("sgdt %w0":"=m" (*ptr)); +} + static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) { struct gate_struct s; @@ -71,6 +91,16 @@ static inline void set_system_gate_ist(int nr, void *func, unsigned ist) _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); } +static inline void load_idt(const struct desc_ptr *ptr) +{ + asm volatile("lidt %w0"::"m" (*ptr)); +} + +static inline void store_idt(struct desc_ptr *dtr) +{ + asm("sidt %w0":"=m" (*dtr)); +} + static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, unsigned size) { diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index d94898831ba..771af336734 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -38,6 +38,8 @@ extern int geode_get_dev_base(unsigned int dev); #define MSR_LBAR_ACPI 0x5140000E #define MSR_LBAR_PMS 0x5140000F +#define MSR_DIVIL_SOFT_RESET 0x51400017 + #define MSR_PIC_YSEL_LOW 0x51400020 #define MSR_PIC_YSEL_HIGH 0x51400021 #define MSR_PIC_ZSEL_LOW 0x51400022 diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h index d4ab6db050b..4f51519fc19 100644 --- a/include/asm-x86/hpet.h +++ b/include/asm-x86/hpet.h @@ -64,6 +64,7 @@ /* hpet memory map physical address */ extern unsigned long hpet_address; extern unsigned long force_hpet_address; +extern int hpet_force_user; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern unsigned long hpet_readl(unsigned long a); diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h index d9f2e54324d..e2c13675ee4 100644 --- a/include/asm-x86/io_apic_64.h +++ b/include/asm-x86/io_apic_64.h @@ -133,4 +133,6 @@ void enable_NMI_through_LVT0 (void * dummy); extern spinlock_t i8259A_lock; +extern int timer_over_8254; + #endif diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index a7c75ea408a..6d011bd6067 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -119,7 +119,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) */ local_irq_save(flags); for_each_cpu_mask(query_cpu, mask) { - __send_IPI_dest_field(x86_cpu_to_apicid[query_cpu], + __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h index 36f310632c4..aca9c96e8e6 100644 --- a/include/asm-x86/irq_32.h +++ b/include/asm-x86/irq_32.h @@ -45,4 +45,7 @@ unsigned int do_IRQ(struct pt_regs *regs); void init_IRQ(void); void __init native_init_IRQ(void); +/* Interrupt vector management */ +extern DECLARE_BITMAP(used_vectors, NR_VECTORS); + #endif /* _ASM_IRQ_H */ diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index a02eb299134..a4944732be0 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -73,8 +73,32 @@ #define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL1 0x00000187 -/* K7/K8 MSRs. Not complete. See the architecture manual for a more +/* AMD64 MSRs. Not complete. See the architecture manual for a more complete list. */ + +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSOPCTL 0xc0011033 +#define MSR_AMD64_IBSOPRIP 0xc0011034 +#define MSR_AMD64_IBSOPDATA 0xc0011035 +#define MSR_AMD64_IBSOPDATA2 0xc0011036 +#define MSR_AMD64_IBSOPDATA3 0xc0011037 +#define MSR_AMD64_IBSDCLINAD 0xc0011038 +#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSCTL 0xc001103a + +/* K8 MSRs */ +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K8_HWCR 0xc0010015 +#define MSR_K8_ENABLE_C1E 0xc0010055 +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + +/* K7 MSRs */ #define MSR_K7_EVNTSEL0 0xc0010000 #define MSR_K7_PERFCTR0 0xc0010004 #define MSR_K7_EVNTSEL1 0xc0010001 @@ -83,20 +107,10 @@ #define MSR_K7_PERFCTR2 0xc0010006 #define MSR_K7_EVNTSEL3 0xc0010003 #define MSR_K7_PERFCTR3 0xc0010007 -#define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K7_CLK_CTL 0xc001001b -#define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 - -#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ -#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ -#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ - #define MSR_K7_HWCR 0xc0010015 -#define MSR_K8_HWCR 0xc0010015 #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 -#define MSR_K8_ENABLE_C1E 0xc0010055 /* K6 MSRs */ #define MSR_K6_EFER 0xc0000080 diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h index 83800e7496e..13976b08683 100644 --- a/include/asm-x86/processor_32.h +++ b/include/asm-x86/processor_32.h @@ -79,6 +79,7 @@ struct cpuinfo_x86 { unsigned char booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical processor id. */ __u8 cpu_core_id; /* Core id */ + __u8 cpu_index; /* index into per_cpu list */ #endif } __attribute__((__aligned__(SMP_CACHE_BYTES))); @@ -103,14 +104,19 @@ extern struct tss_struct doublefault_tss; DECLARE_PER_CPU(struct tss_struct, init_tss); #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif -extern int cpu_llc_id[NR_CPUS]; +/* + * the following now lives in the per cpu area: + * extern int cpu_llc_id[NR_CPUS]; + */ +DECLARE_PER_CPU(u8, cpu_llc_id); extern char ignore_fpu_irq; void __init cpu_detect(struct cpuinfo_x86 *c); diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h index f422becbddd..e4f19970a82 100644 --- a/include/asm-x86/processor_64.h +++ b/include/asm-x86/processor_64.h @@ -74,6 +74,7 @@ struct cpuinfo_x86 { __u8 booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical Processor id. */ __u8 cpu_core_id; /* Core id. */ + __u8 cpu_index; /* index into per_cpu list */ #endif } ____cacheline_aligned; @@ -88,11 +89,12 @@ struct cpuinfo_x86 { #define X86_VENDOR_UNKNOWN 0xff #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif extern char ignore_irq13; @@ -390,12 +392,6 @@ static inline void sync_core(void) asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); } -#define ARCH_HAS_PREFETCH -static inline void prefetch(void *x) -{ - asm volatile("prefetcht0 (%0)" :: "r" (x)); -} - #define ARCH_HAS_PREFETCHW 1 static inline void prefetchw(void *x) { diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h index c44a3a93b5a..dabba55f7ed 100644 --- a/include/asm-x86/proto.h +++ b/include/asm-x86/proto.h @@ -83,8 +83,6 @@ extern unsigned tsc_khz; extern int reboot_force; extern int notsc_setup(char *); -extern int timer_over_8254; - extern int gsi_irq_sharing(int gsi); extern int force_mwait; diff --git a/include/asm-x86/ptrace_32.h b/include/asm-x86/ptrace_32.h index 6002597b9e1..78d063dabe0 100644 --- a/include/asm-x86/ptrace_32.h +++ b/include/asm-x86/ptrace_32.h @@ -55,6 +55,8 @@ static inline int v8086_mode(struct pt_regs *regs) } #define instruction_pointer(regs) ((regs)->eip) +#define frame_pointer(regs) ((regs)->ebp) +#define stack_pointer(regs) ((regs)->esp) #define regs_return_value(regs) ((regs)->eax) extern unsigned long profile_pc(struct pt_regs *regs); diff --git a/include/asm-x86/ptrace_64.h b/include/asm-x86/ptrace_64.h index 7f166ccb060..7bfe61e1b70 100644 --- a/include/asm-x86/ptrace_64.h +++ b/include/asm-x86/ptrace_64.h @@ -40,6 +40,8 @@ struct pt_regs { #define user_mode(regs) (!!((regs)->cs & 3)) #define user_mode_vm(regs) user_mode(regs) #define instruction_pointer(regs) ((regs)->rip) +#define frame_pointer(regs) ((regs)->rbp) +#define stack_pointer(regs) ((regs)->rsp) #define regs_return_value(regs) ((regs)->rax) extern unsigned long profile_pc(struct pt_regs *regs); diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h index 1f576a93368..7056d868452 100644 --- a/include/asm-x86/smp_32.h +++ b/include/asm-x86/smp_32.h @@ -39,9 +39,11 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); #define MAX_APICID 256 -extern u8 x86_cpu_to_apicid[]; +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) extern void set_cpu_sibling_map(int cpu); diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h index d30e9b684fd..6f0e0273b64 100644 --- a/include/asm-x86/smp_64.h +++ b/include/asm-x86/smp_64.h @@ -37,6 +37,8 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); +extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait); /* * cpu_sibling_map and cpu_core_map now live @@ -47,7 +49,7 @@ extern void smp_send_reschedule(int cpu); */ DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); -extern u8 cpu_llc_id[NR_CPUS]; +DECLARE_PER_CPU(u8, cpu_llc_id); #define SMP_TRAMPOLINE_BASE 0x6000 @@ -84,7 +86,9 @@ static inline int hard_smp_processor_id(void) * Some lowlevel functions might want to know about * the real APIC ID <-> CPU # mapping. */ -extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */ extern u8 bios_cpu_apicid[]; static inline int cpu_present_to_apicid(int mps_cpu) @@ -115,8 +119,9 @@ static __inline int logical_smp_processor_id(void) } #ifdef CONFIG_SMP -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #else +extern unsigned int boot_cpu_id; #define cpu_physical_id(cpu) boot_cpu_id #endif /* !CONFIG_SMP */ #endif diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h index db6283eb5e4..ef8468883ba 100644 --- a/include/asm-x86/system_32.h +++ b/include/asm-x86/system_32.h @@ -315,5 +315,6 @@ extern unsigned long arch_align_stack(unsigned long sp); extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); +void __show_registers(struct pt_regs *, int all); #endif diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h index ae1074603c4..9040f5a6127 100644 --- a/include/asm-x86/topology_32.h +++ b/include/asm-x86/topology_32.h @@ -28,8 +28,8 @@ #define _ASM_I386_TOPOLOGY_H #ifdef CONFIG_X86_HT -#define topology_physical_package_id(cpu) (cpu_data[cpu].phys_proc_id) -#define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id) +#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h index c0c93d74467..a718dda037e 100644 --- a/include/asm-x86/topology_64.h +++ b/include/asm-x86/topology_64.h @@ -56,8 +56,8 @@ extern int __node_distance(int, int); #endif #ifdef CONFIG_SMP -#define topology_physical_package_id(cpu) (cpu_data[cpu].phys_proc_id) -#define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id) +#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #define mc_capable() (boot_cpu_data.x86_max_cores > 1) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 499f5373e21..37c66d1254b 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -183,11 +183,14 @@ int dm_resume(struct mapped_device *md); */ uint32_t dm_get_event_nr(struct mapped_device *md); int dm_wait_event(struct mapped_device *md, int event_nr); +uint32_t dm_next_uevent_seq(struct mapped_device *md); +void dm_uevent_add(struct mapped_device *md, struct list_head *elist); /* * Info functions. */ const char *dm_device_name(struct mapped_device *md); +int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid); struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct mapped_device *md); int dm_noflush_suspending(struct dm_target *ti); diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index b9348610782..523281c5b7f 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -131,6 +131,7 @@ struct dm_ioctl { char name[DM_NAME_LEN]; /* device name */ char uuid[DM_UUID_LEN]; /* unique identifier for * the block device */ + char data[7]; /* padding or data */ }; /* @@ -285,9 +286,9 @@ typedef char ioctl_struct[308]; #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 11 +#define DM_VERSION_MINOR 12 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2006-10-12)" +#define DM_VERSION_EXTRA "-ioctl (2007-10-02)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c5164c257f7..e82a6ebc725 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -160,6 +160,12 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; + /* Number of in-flight sillydelete RPC calls */ + atomic_t silly_count; + /* List of deferred sillydelete requests */ + struct hlist_head silly_list; + wait_queue_head_t waitqueue; + #ifdef CONFIG_NFS_V4 struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ @@ -394,6 +400,8 @@ extern void nfs_release_automount_timer(void); */ extern int nfs_async_unlink(struct inode *dir, struct dentry *dentry); extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); +extern void nfs_block_sillyrename(struct dentry *dentry); +extern void nfs_unblock_sillyrename(struct dentry *dentry); /* * linux/fs/nfs/write.c diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h index 1adfe668d03..af7c36a5a52 100644 --- a/include/linux/prefetch.h +++ b/include/linux/prefetch.h @@ -34,17 +34,12 @@ */ -/* - * These cannot be do{}while(0) macros. See the mental gymnastics in - * the loop macro. - */ - #ifndef ARCH_HAS_PREFETCH -static inline void prefetch(const void *x) {;} +#define prefetch(x) __builtin_prefetch(x) #endif #ifndef ARCH_HAS_PREFETCHW -static inline void prefetchw(const void *x) {;} +#define prefetchw(x) __builtin_prefetch(x,1) #endif #ifndef ARCH_HAS_SPINLOCK_PREFETCH diff --git a/kernel/Makefile b/kernel/Makefile index 05c3e6df859..79f017e09fb 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,8 +9,9 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \ - utsname.o sysctl_check.o notifier.o + utsname.o notifier.o +obj-$(CONFIG_SYSCTL) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index de7bb284c61..b96ea8d6a5e 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -56,6 +56,17 @@ endef # gcc support functions # See documentation in Documentation/kbuild/makefiles.txt +# cc-cross-prefix +# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-) +# Return first prefix where a prefix$(CC) is found in PATH. +# If no $(CC) found in PATH with listed prefixes return nothing +cc-cross-prefix = \ + $(word 1, $(foreach c,$(1), \ + $(shell set -e; \ + if (which $(strip $(c))$(CC)) > /dev/null 2>&1 ; then \ + echo $(c); \ + fi))) + # output directory for tests below TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/) diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c index e5c6ac7bde9..0e4bd5459df 100644 --- a/scripts/basic/docproc.c +++ b/scripts/basic/docproc.c @@ -66,12 +66,15 @@ FILELINE * entity_system; #define FUNCTION "-function" #define NOFUNCTION "-nofunction" +char *srctree; + void usage (void) { fprintf(stderr, "Usage: docproc {doc|depend} file\n"); fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n"); fprintf(stderr, "doc: frontend when generating kernel documentation\n"); fprintf(stderr, "depend: generate list of files referenced within file\n"); + fprintf(stderr, "Environment variable SRCTREE: absolute path to kernel source tree.\n"); } /* @@ -90,7 +93,7 @@ void exec_kernel_doc(char **svec) exit(1); case 0: memset(real_filename, 0, sizeof(real_filename)); - strncat(real_filename, getenv("SRCTREE"), PATH_MAX); + strncat(real_filename, srctree, PATH_MAX); strncat(real_filename, KERNELDOCPATH KERNELDOC, PATH_MAX - strlen(real_filename)); execvp(real_filename, svec); @@ -171,7 +174,7 @@ void find_export_symbols(char * filename) if (filename_exist(filename) == NULL) { char real_filename[PATH_MAX + 1]; memset(real_filename, 0, sizeof(real_filename)); - strncat(real_filename, getenv("SRCTREE"), PATH_MAX); + strncat(real_filename, srctree, PATH_MAX); strncat(real_filename, filename, PATH_MAX - strlen(real_filename)); sym = add_new_file(filename); @@ -338,6 +341,10 @@ void parse_file(FILE *infile) int main(int argc, char *argv[]) { FILE * infile; + + srctree = getenv("SRCTREE"); + if (!srctree) + srctree = getcwd(NULL, 0); if (argc != 3) { usage(); exit(1); diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index bb08069b04a..83c5e76414c 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -84,7 +84,7 @@ help: # lxdialog stuff check-lxdialog := $(srctree)/$(src)/lxdialog/check-lxdialog.sh -# Use reursively expanded variables so we do not call gcc unless +# Use recursively expanded variables so we do not call gcc unless # we really need to do so. (Do not call gcc as part of make mrproper) HOST_EXTRACFLAGS = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags) HOST_LOADLIBES = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ldflags $(HOSTCC)) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 2ef9a193fca..93ac52adb49 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -268,6 +268,9 @@ static struct symbol *sym_add_exported(const char *name, struct module *mod, "was in %s%s\n", mod->name, name, s->module->name, is_vmlinux(s->module->name) ?"":".ko"); + } else { + /* In case Modules.symvers was out of date */ + s->module = mod; } } s->preloaded = 0; diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 6edb29f2b4a..0f657b5f3bc 100644 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -83,6 +83,7 @@ Maintainer: $name Standards-Version: 3.6.1 Package: $packagename +Provides: kernel-image-$version, linux-image-$version Architecture: any Description: User Mode Linux kernel, version $version User-mode Linux is a port of the Linux kernel to its own system call @@ -104,6 +105,7 @@ Maintainer: $name Standards-Version: 3.6.1 Package: $packagename +Provides: kernel-image-$version, linux-image-$version Architecture: any Description: Linux kernel, version $version This package contains the Linux kernel, modules and corresponding other |