From 48adcf148c83faa41999fb0b3524299c4e160fd9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 20 May 2008 01:03:16 +0300 Subject: [CPUFREQ] cpufreq: remove CVS keywords This patch removes CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k7.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h index f8a63b3664e..35fb4eaf6e1 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h @@ -1,5 +1,4 @@ /* - * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ * (C) 2003 Dave Jones. * * Licensed under the terms of the GNU GPL License version 2. -- cgit v1.2.3 From 444933c6c6e82362ba8e0da26f41a53c433d11ef Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:28 +0900 Subject: [IA64] pvops: preparation: remove extern in irq_ia64.c remove extern declaration of handle_IPI() in irq_ia64.c. Instead, declare it in asm-ia64/smp.h. Later handle_IPI() will be referenced from another file. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/irq_ia64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 5538471e8d6..c48171bc796 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -600,7 +600,6 @@ static irqreturn_t dummy_handler (int irq, void *dev_id) { BUG(); } -extern irqreturn_t handle_IPI (int irq, void *dev_id); static struct irqaction ipi_irqaction = { .handler = handle_IPI, -- cgit v1.2.3 From 8311d21c35092aa4c4a12e0140e1ef3443489d77 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:29 +0900 Subject: [IA64] pvops: preparation: move the constants, LOAD_OFFSET, to a header file. Move the LOAD_OFFSET definition from vmlinux.lds.S into system.h. On paravirtualized environments, it is necessary to detect the execution environment. One of the solutions is the multi entry point. The multi entry point allows a boot loader to start the kernel execution from the entry point which is different from the ELF entry point. The non standard entry point will defined as the specialized elf note which contains the LMA of the entry point symbol. The constant, LOAD_OFFSET, is necessary to calculate the symbol's LMA. Move the definition into the public header file to make it available to the multi entry point support. Cc: "He, Qing" Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/vmlinux.lds.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 5929ab10a28..5a77206c249 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -4,7 +4,6 @@ #include #include -#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) #include #define IVT_TEXT \ -- cgit v1.2.3 From 90aeb169c03a96e22674741f08054023c33d595b Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:32 +0900 Subject: [IA64] pvops: introduce pv_info which describes some random info. introduce pv_info which describes some randome info about underlying execution environment. Cc: Jes Sorensen Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 2 ++ arch/ia64/kernel/paravirt.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 arch/ia64/kernel/paravirt.c (limited to 'arch') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 13fd10e8699..10a4ddb5b27 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -36,6 +36,8 @@ obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o +obj-$(CONFIG_PARAVIRT) += paravirt.o + obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c new file mode 100644 index 00000000000..d295ea5e59c --- /dev/null +++ b/arch/ia64/kernel/paravirt.c @@ -0,0 +1,41 @@ +/****************************************************************************** + * arch/ia64/kernel/paravirt.c + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * Yaozu (Eddie) Dong + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + +#include +#include +#include +#include + +#include +#include + +/*************************************************************************** + * general info + */ +struct pv_info pv_info = { + .kernel_rpl = 0, + .paravirt_enabled = 0, + .name = "bare hardware" +}; -- cgit v1.2.3 From 3e0879deb700f322f6c81ab34f056fc72d15ec02 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:33 +0900 Subject: [IA64] pvops: add an early setup hook for pv_ops. This patch adds a setup hook in the very early boot sequence before start_kernel() to initialize paravirtualization stuff. The hook will be set by each pv loader code or by using multi entry point. Signed-off-by: Qing He Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/head.S | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch') diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index ddeab4e36fd..db540e58c78 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -26,11 +26,14 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #ifdef CONFIG_HOTPLUG_CPU #define SAL_PSR_BITS_TO_SET \ @@ -367,6 +370,44 @@ start_ap: ;; (isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader +#ifdef CONFIG_PARAVIRT + + movl r14=hypervisor_setup_hooks + movl r15=hypervisor_type + mov r16=num_hypervisor_hooks + ;; + ld8 r2=[r15] + ;; + cmp.ltu p7,p0=r2,r16 // array size check + shladd r8=r2,3,r14 + ;; +(p7) ld8 r9=[r8] + ;; +(p7) mov b1=r9 +(p7) cmp.ne.unc p7,p0=r9,r0 // no actual branch to NULL + ;; +(p7) br.call.sptk.many rp=b1 + + __INITDATA + +default_setup_hook = 0 // Currently nothing needs to be done. + + .weak xen_setup_hook + + .global hypervisor_type +hypervisor_type: + data8 PARAVIRT_HYPERVISOR_TYPE_DEFAULT + + // must have the same order with PARAVIRT_HYPERVISOR_TYPE_xxx + +hypervisor_setup_hooks: + data8 default_setup_hook + data8 xen_setup_hook +num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8 + .previous + +#endif + #ifdef CONFIG_SMP (isAP) br.call.sptk.many rp=start_secondary .ret0: -- cgit v1.2.3 From 1ff730b52f0c3e4e3846c3ff345c5526b2633ba9 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:34 +0900 Subject: [IA64] pvops: introduce pv_cpu_ops to paravirtualize privileged instructions. introduce pv_cpu_ops to paravirtualize privleged instructions which are defined by ia64 intrinsics. make them indirect C function calls by introducing function tables, pv_cpu_ops. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/paravirt.c | 247 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) (limited to 'arch') diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index d295ea5e59c..e5482bb6841 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -39,3 +40,249 @@ struct pv_info pv_info = { .paravirt_enabled = 0, .name = "bare hardware" }; + +/*************************************************************************** + * pv_cpu_ops + * intrinsics hooks. + */ + +/* ia64_native_xxx are macros so that we have to make them real functions */ + +#define DEFINE_VOID_FUNC1(name) \ + static void \ + ia64_native_ ## name ## _func(unsigned long arg) \ + { \ + ia64_native_ ## name(arg); \ + } \ + +#define DEFINE_VOID_FUNC2(name) \ + static void \ + ia64_native_ ## name ## _func(unsigned long arg0, \ + unsigned long arg1) \ + { \ + ia64_native_ ## name(arg0, arg1); \ + } \ + +#define DEFINE_FUNC0(name) \ + static unsigned long \ + ia64_native_ ## name ## _func(void) \ + { \ + return ia64_native_ ## name(); \ + } + +#define DEFINE_FUNC1(name, type) \ + static unsigned long \ + ia64_native_ ## name ## _func(type arg) \ + { \ + return ia64_native_ ## name(arg); \ + } \ + +DEFINE_VOID_FUNC1(fc); +DEFINE_VOID_FUNC1(intrin_local_irq_restore); + +DEFINE_VOID_FUNC2(ptcga); +DEFINE_VOID_FUNC2(set_rr); + +DEFINE_FUNC0(get_psr_i); + +DEFINE_FUNC1(thash, unsigned long); +DEFINE_FUNC1(get_cpuid, int); +DEFINE_FUNC1(get_pmd, int); +DEFINE_FUNC1(get_rr, unsigned long); + +static void +ia64_native_ssm_i_func(void) +{ + ia64_native_ssm(IA64_PSR_I); +} + +static void +ia64_native_rsm_i_func(void) +{ + ia64_native_rsm(IA64_PSR_I); +} + +static void +ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4) +{ + ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4); +} + +#define CASE_GET_REG(id) \ + case _IA64_REG_ ## id: \ + res = ia64_native_getreg(_IA64_REG_ ## id); \ + break; +#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id) +#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id) + +unsigned long +ia64_native_getreg_func(int regnum) +{ + unsigned long res = -1; + switch (regnum) { + CASE_GET_REG(GP); + CASE_GET_REG(IP); + CASE_GET_REG(PSR); + CASE_GET_REG(TP); + CASE_GET_REG(SP); + + CASE_GET_AR(KR0); + CASE_GET_AR(KR1); + CASE_GET_AR(KR2); + CASE_GET_AR(KR3); + CASE_GET_AR(KR4); + CASE_GET_AR(KR5); + CASE_GET_AR(KR6); + CASE_GET_AR(KR7); + CASE_GET_AR(RSC); + CASE_GET_AR(BSP); + CASE_GET_AR(BSPSTORE); + CASE_GET_AR(RNAT); + CASE_GET_AR(FCR); + CASE_GET_AR(EFLAG); + CASE_GET_AR(CSD); + CASE_GET_AR(SSD); + CASE_GET_AR(CFLAG); + CASE_GET_AR(FSR); + CASE_GET_AR(FIR); + CASE_GET_AR(FDR); + CASE_GET_AR(CCV); + CASE_GET_AR(UNAT); + CASE_GET_AR(FPSR); + CASE_GET_AR(ITC); + CASE_GET_AR(PFS); + CASE_GET_AR(LC); + CASE_GET_AR(EC); + + CASE_GET_CR(DCR); + CASE_GET_CR(ITM); + CASE_GET_CR(IVA); + CASE_GET_CR(PTA); + CASE_GET_CR(IPSR); + CASE_GET_CR(ISR); + CASE_GET_CR(IIP); + CASE_GET_CR(IFA); + CASE_GET_CR(ITIR); + CASE_GET_CR(IIPA); + CASE_GET_CR(IFS); + CASE_GET_CR(IIM); + CASE_GET_CR(IHA); + CASE_GET_CR(LID); + CASE_GET_CR(IVR); + CASE_GET_CR(TPR); + CASE_GET_CR(EOI); + CASE_GET_CR(IRR0); + CASE_GET_CR(IRR1); + CASE_GET_CR(IRR2); + CASE_GET_CR(IRR3); + CASE_GET_CR(ITV); + CASE_GET_CR(PMV); + CASE_GET_CR(CMCV); + CASE_GET_CR(LRR0); + CASE_GET_CR(LRR1); + + default: + printk(KERN_CRIT "wrong_getreg %d\n", regnum); + break; + } + return res; +} + +#define CASE_SET_REG(id) \ + case _IA64_REG_ ## id: \ + ia64_native_setreg(_IA64_REG_ ## id, val); \ + break; +#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id) +#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id) + +void +ia64_native_setreg_func(int regnum, unsigned long val) +{ + switch (regnum) { + case _IA64_REG_PSR_L: + ia64_native_setreg(_IA64_REG_PSR_L, val); + ia64_dv_serialize_data(); + break; + CASE_SET_REG(SP); + CASE_SET_REG(GP); + + CASE_SET_AR(KR0); + CASE_SET_AR(KR1); + CASE_SET_AR(KR2); + CASE_SET_AR(KR3); + CASE_SET_AR(KR4); + CASE_SET_AR(KR5); + CASE_SET_AR(KR6); + CASE_SET_AR(KR7); + CASE_SET_AR(RSC); + CASE_SET_AR(BSP); + CASE_SET_AR(BSPSTORE); + CASE_SET_AR(RNAT); + CASE_SET_AR(FCR); + CASE_SET_AR(EFLAG); + CASE_SET_AR(CSD); + CASE_SET_AR(SSD); + CASE_SET_AR(CFLAG); + CASE_SET_AR(FSR); + CASE_SET_AR(FIR); + CASE_SET_AR(FDR); + CASE_SET_AR(CCV); + CASE_SET_AR(UNAT); + CASE_SET_AR(FPSR); + CASE_SET_AR(ITC); + CASE_SET_AR(PFS); + CASE_SET_AR(LC); + CASE_SET_AR(EC); + + CASE_SET_CR(DCR); + CASE_SET_CR(ITM); + CASE_SET_CR(IVA); + CASE_SET_CR(PTA); + CASE_SET_CR(IPSR); + CASE_SET_CR(ISR); + CASE_SET_CR(IIP); + CASE_SET_CR(IFA); + CASE_SET_CR(ITIR); + CASE_SET_CR(IIPA); + CASE_SET_CR(IFS); + CASE_SET_CR(IIM); + CASE_SET_CR(IHA); + CASE_SET_CR(LID); + CASE_SET_CR(IVR); + CASE_SET_CR(TPR); + CASE_SET_CR(EOI); + CASE_SET_CR(IRR0); + CASE_SET_CR(IRR1); + CASE_SET_CR(IRR2); + CASE_SET_CR(IRR3); + CASE_SET_CR(ITV); + CASE_SET_CR(PMV); + CASE_SET_CR(CMCV); + CASE_SET_CR(LRR0); + CASE_SET_CR(LRR1); + default: + printk(KERN_CRIT "wrong setreg %d\n", regnum); + break; + } +} + +struct pv_cpu_ops pv_cpu_ops = { + .fc = ia64_native_fc_func, + .thash = ia64_native_thash_func, + .get_cpuid = ia64_native_get_cpuid_func, + .get_pmd = ia64_native_get_pmd_func, + .ptcga = ia64_native_ptcga_func, + .get_rr = ia64_native_get_rr_func, + .set_rr = ia64_native_set_rr_func, + .set_rr0_to_rr4 = ia64_native_set_rr0_to_rr4_func, + .ssm_i = ia64_native_ssm_i_func, + .getreg = ia64_native_getreg_func, + .setreg = ia64_native_setreg_func, + .rsm_i = ia64_native_rsm_i_func, + .get_psr_i = ia64_native_get_psr_i_func, + .intrin_local_irq_restore + = ia64_native_intrin_local_irq_restore_func, +}; +EXPORT_SYMBOL(pv_cpu_ops); -- cgit v1.2.3 From 1e39d80a5957eab9dfdd7490d5c5cee272c34aa7 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:35 +0900 Subject: [IA64] pvops: preparation for paravirtulization of hand written assembly code. Preparation for paravirtualization of hand written assembly code. They are paravirtualized by single source code and compiled multi times. To tell those files for target (including native), add one defines. Cc: "Dong, Eddie" Cc: Keith Owens Cc: tgingold@free.fr Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 10a4ddb5b27..8b2524293eb 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -72,3 +72,12 @@ $(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE # We must build gate.so before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/gate-data.o: $(obj)/gate.so + +# +# native ivt.S and entry.S +# +ASM_PARAVIRT_OBJS = ivt.o entry.o +define paravirtualized_native +AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE +endef +$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj)))) -- cgit v1.2.3 From 02e32e36f42f8ea7ee6060d02f2d69ad5bad6d50 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:37 +0900 Subject: [IA64] pvops: paravirtualize minstate.h. paravirtualize minstate.h which are hand written assembly code. They include sensitive or performance critical privileged instructions. So that they are appropriate for paravirtualization. Cc: Keith Owens Cc: Akio Takebe Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/minstate.h | 13 +++++++------ arch/ia64/kernel/paravirt_inst.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 arch/ia64/kernel/paravirt_inst.h (limited to 'arch') diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index 74b6d670aae..292e214a3b8 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -2,6 +2,7 @@ #include #include "entry.h" +#include "paravirt_inst.h" #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* read ar.itc in advance, and use it before leaving bank 0 */ @@ -43,16 +44,16 @@ * Note that psr.ic is NOT turned on by this macro. This is so that * we can pass interruption state as arguments to a handler. */ -#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA,WORKAROUND) \ +#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \ mov r16=IA64_KR(CURRENT); /* M */ \ mov r27=ar.rsc; /* M */ \ mov r20=r1; /* A */ \ mov r25=ar.unat; /* M */ \ - mov r29=cr.ipsr; /* M */ \ + MOV_FROM_IPSR(p0,r29); /* M */ \ mov r26=ar.pfs; /* I */ \ - mov r28=cr.iip; /* M */ \ + MOV_FROM_IIP(r28); /* M */ \ mov r21=ar.fpsr; /* M */ \ - COVER; /* B;; (or nothing) */ \ + __COVER; /* B;; (or nothing) */ \ ;; \ adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ ;; \ @@ -244,6 +245,6 @@ 1: \ .pred.rel "mutex", pKStk, pUStk -#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs, , RSE_WORKAROUND) -#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND) +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND) +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND) #define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , ) diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h new file mode 100644 index 00000000000..5cad6fb2ed1 --- /dev/null +++ b/arch/ia64/kernel/paravirt_inst.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_inst.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifdef __IA64_ASM_PARAVIRTUALIZED_XEN +#include +#include +#else +#include +#endif + -- cgit v1.2.3 From 498c5170472ff0c03a29d22dbd33225a0be038f4 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:38 +0900 Subject: [IA64] pvops: paravirtualize ivt.S paravirtualize ivt.S which implements fault handler in hand written assembly code. They includes sensitive or performance critical privileged instructions. So they need paravirtualization. Cc: Keith Owens Cc: tgingold@free.fr Cc: Akio Takebe Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/ivt.S | 249 ++++++++++++++++++++++++------------------------- 1 file changed, 122 insertions(+), 127 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 80b44ea052d..23749ed3cf0 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -12,6 +12,14 @@ * * 00/08/23 Asit Mallick TLB handling for SMP * 00/12/20 David Mosberger-Tang DTLB/ITLB handler now uses virtual PT. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer + * Xen paravirtualization + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + * Yaozu (Eddie) Dong */ /* * This file defines the interruption vector table used by the CPU. @@ -102,13 +110,13 @@ ENTRY(vhpt_miss) * - the faulting virtual address uses unimplemented address bits * - the faulting virtual address has no valid page table mapping */ - mov r16=cr.ifa // get address that caused the TLB miss + MOV_FROM_IFA(r16) // get address that caused the TLB miss #ifdef CONFIG_HUGETLB_PAGE movl r18=PAGE_SHIFT - mov r25=cr.itir + MOV_FROM_ITIR(r25) #endif ;; - rsm psr.dt // use physical addressing for data + RSM_PSR_DT // use physical addressing for data mov r31=pr // save the predicate registers mov r19=IA64_KR(PT_BASE) // get page table base address shl r21=r16,3 // shift bit 60 into sign bit @@ -168,21 +176,21 @@ ENTRY(vhpt_miss) dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) ;; (p7) ld8 r18=[r21] // read *pte - mov r19=cr.isr // cr.isr bit 32 tells us if this is an insn miss + MOV_FROM_ISR(r19) // cr.isr bit 32 tells us if this is an insn miss ;; (p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? - mov r22=cr.iha // get the VHPT address that caused the TLB miss + MOV_FROM_IHA(r22) // get the VHPT address that caused the TLB miss ;; // avoid RAW on p7 (p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address ;; -(p10) itc.i r18 // insert the instruction TLB entry -(p11) itc.d r18 // insert the data TLB entry + ITC_I_AND_D(p10, p11, r18, r24) // insert the instruction TLB entry and + // insert the data TLB entry (p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) - mov cr.ifa=r22 + MOV_TO_IFA(r22, r24) #ifdef CONFIG_HUGETLB_PAGE -(p8) mov cr.itir=r25 // change to default page-size for VHPT + MOV_TO_ITIR(p8, r25, r24) // change to default page-size for VHPT #endif /* @@ -192,7 +200,7 @@ ENTRY(vhpt_miss) */ adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 ;; -(p7) itc.d r24 + ITC_D(p7, r24, r25) ;; #ifdef CONFIG_SMP /* @@ -234,7 +242,7 @@ ENTRY(vhpt_miss) #endif mov pr=r31,-1 // restore predicate registers - rfi + RFI END(vhpt_miss) .org ia64_ivt+0x400 @@ -248,11 +256,11 @@ ENTRY(itlb_miss) * mode, walk the page table, and then re-execute the PTE read and * go on normally after that. */ - mov r16=cr.ifa // get virtual address + MOV_FROM_IFA(r16) // get virtual address mov r29=b0 // save b0 mov r31=pr // save predicates .itlb_fault: - mov r17=cr.iha // get virtual address of PTE + MOV_FROM_IHA(r17) // get virtual address of PTE movl r30=1f // load nested fault continuation point ;; 1: ld8 r18=[r17] // read *pte @@ -261,7 +269,7 @@ ENTRY(itlb_miss) tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? (p6) br.cond.spnt page_fault ;; - itc.i r18 + ITC_I(p0, r18, r19) ;; #ifdef CONFIG_SMP /* @@ -278,7 +286,7 @@ ENTRY(itlb_miss) (p7) ptc.l r16,r20 #endif mov pr=r31,-1 - rfi + RFI END(itlb_miss) .org ia64_ivt+0x0800 @@ -292,11 +300,11 @@ ENTRY(dtlb_miss) * mode, walk the page table, and then re-execute the PTE read and * go on normally after that. */ - mov r16=cr.ifa // get virtual address + MOV_FROM_IFA(r16) // get virtual address mov r29=b0 // save b0 mov r31=pr // save predicates dtlb_fault: - mov r17=cr.iha // get virtual address of PTE + MOV_FROM_IHA(r17) // get virtual address of PTE movl r30=1f // load nested fault continuation point ;; 1: ld8 r18=[r17] // read *pte @@ -305,7 +313,7 @@ dtlb_fault: tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? (p6) br.cond.spnt page_fault ;; - itc.d r18 + ITC_D(p0, r18, r19) ;; #ifdef CONFIG_SMP /* @@ -322,7 +330,7 @@ dtlb_fault: (p7) ptc.l r16,r20 #endif mov pr=r31,-1 - rfi + RFI END(dtlb_miss) .org ia64_ivt+0x0c00 @@ -330,9 +338,9 @@ END(dtlb_miss) // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) ENTRY(alt_itlb_miss) DBG_FAULT(3) - mov r16=cr.ifa // get address that caused the TLB miss + MOV_FROM_IFA(r16) // get address that caused the TLB miss movl r17=PAGE_KERNEL - mov r21=cr.ipsr + MOV_FROM_IPSR(p0, r21) movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) mov r31=pr ;; @@ -341,9 +349,9 @@ ENTRY(alt_itlb_miss) ;; cmp.gt p8,p0=6,r22 // user mode ;; -(p8) thash r17=r16 + THASH(p8, r17, r16, r23) ;; -(p8) mov cr.iha=r17 + MOV_TO_IHA(p8, r17, r23) (p8) mov r29=b0 // save b0 (p8) br.cond.dptk .itlb_fault #endif @@ -358,9 +366,9 @@ ENTRY(alt_itlb_miss) or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 (p8) br.cond.spnt page_fault ;; - itc.i r19 // insert the TLB entry + ITC_I(p0, r19, r18) // insert the TLB entry mov pr=r31,-1 - rfi + RFI END(alt_itlb_miss) .org ia64_ivt+0x1000 @@ -368,11 +376,11 @@ END(alt_itlb_miss) // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) ENTRY(alt_dtlb_miss) DBG_FAULT(4) - mov r16=cr.ifa // get address that caused the TLB miss + MOV_FROM_IFA(r16) // get address that caused the TLB miss movl r17=PAGE_KERNEL - mov r20=cr.isr + MOV_FROM_ISR(r20) movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r21=cr.ipsr + MOV_FROM_IPSR(p0, r21) mov r31=pr mov r24=PERCPU_ADDR ;; @@ -381,9 +389,9 @@ ENTRY(alt_dtlb_miss) ;; cmp.gt p8,p0=6,r22 // access to region 0-5 ;; -(p8) thash r17=r16 + THASH(p8, r17, r16, r25) ;; -(p8) mov cr.iha=r17 + MOV_TO_IHA(p8, r17, r25) (p8) mov r29=b0 // save b0 (p8) br.cond.dptk dtlb_fault #endif @@ -402,7 +410,7 @@ ENTRY(alt_dtlb_miss) tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? ;; (p10) sub r19=r19,r26 -(p10) mov cr.itir=r25 + MOV_TO_ITIR(p10, r25, r24) cmp.ne p8,p0=r0,r23 (p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field (p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr @@ -411,11 +419,11 @@ ENTRY(alt_dtlb_miss) dep r21=-1,r21,IA64_PSR_ED_BIT,1 ;; or r19=r19,r17 // insert PTE control bits into r19 -(p6) mov cr.ipsr=r21 + MOV_TO_IPSR(p6, r21, r24) ;; -(p7) itc.d r19 // insert the TLB entry + ITC_D(p7, r19, r18) // insert the TLB entry mov pr=r31,-1 - rfi + RFI END(alt_dtlb_miss) .org ia64_ivt+0x1400 @@ -444,10 +452,10 @@ ENTRY(nested_dtlb_miss) * * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) */ - rsm psr.dt // switch to using physical data addressing + RSM_PSR_DT // switch to using physical data addressing mov r19=IA64_KR(PT_BASE) // get the page table base address shl r21=r16,3 // shift bit 60 into sign bit - mov r18=cr.itir + MOV_FROM_ITIR(r18) ;; shr.u r17=r16,61 // get the region number into r17 extr.u r18=r18,2,6 // get the faulting page size @@ -510,21 +518,15 @@ END(ikey_miss) //----------------------------------------------------------------------------------- // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) ENTRY(page_fault) - ssm psr.dt - ;; - srlz.i + SSM_PSR_DT_AND_SRLZ_I ;; SAVE_MIN_WITH_COVER alloc r15=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=cr.isr + MOV_FROM_IFA(out0) + MOV_FROM_ISR(out1) + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collectin is on - ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_I(p15, p15, r14) // restore psr.i movl r14=ia64_leave_kernel ;; SAVE_REST @@ -556,10 +558,10 @@ ENTRY(dirty_bit) * page table TLB entry isn't present, we take a nested TLB miss hit where we look * up the physical address of the L3 PTE and then continue at label 1 below. */ - mov r16=cr.ifa // get the address that caused the fault + MOV_FROM_IFA(r16) // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault ;; - thash r17=r16 // compute virtual address of L3 PTE + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE mov r29=b0 // save b0 in case of nested fault mov r31=pr // save pr #ifdef CONFIG_SMP @@ -576,7 +578,7 @@ ENTRY(dirty_bit) ;; (p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present ;; -(p6) itc.d r25 // install updated PTE + ITC_D(p6, r25, r18) // install updated PTE ;; /* * Tell the assemblers dependency-violation checker that the above "itc" instructions @@ -602,7 +604,7 @@ ENTRY(dirty_bit) itc.d r18 // install updated PTE #endif mov pr=r31,-1 // restore pr - rfi + RFI END(dirty_bit) .org ia64_ivt+0x2400 @@ -611,22 +613,22 @@ END(dirty_bit) ENTRY(iaccess_bit) DBG_FAULT(9) // Like Entry 8, except for instruction access - mov r16=cr.ifa // get the address that caused the fault + MOV_FROM_IFA(r16) // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault mov r31=pr // save predicates #ifdef CONFIG_ITANIUM /* * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. */ - mov r17=cr.ipsr + MOV_FROM_IPSR(p0, r17) ;; - mov r18=cr.iip + MOV_FROM_IIP(r18) tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? ;; (p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa #endif /* CONFIG_ITANIUM */ ;; - thash r17=r16 // compute virtual address of L3 PTE + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE mov r29=b0 // save b0 in case of nested fault) #ifdef CONFIG_SMP mov r28=ar.ccv // save ar.ccv @@ -642,7 +644,7 @@ ENTRY(iaccess_bit) ;; (p6) cmp.eq p6,p7=r26,r18 // Only if page present ;; -(p6) itc.i r25 // install updated PTE + ITC_I(p6, r25, r26) // install updated PTE ;; /* * Tell the assemblers dependency-violation checker that the above "itc" instructions @@ -668,7 +670,7 @@ ENTRY(iaccess_bit) itc.i r18 // install updated PTE #endif /* !CONFIG_SMP */ mov pr=r31,-1 - rfi + RFI END(iaccess_bit) .org ia64_ivt+0x2800 @@ -677,10 +679,10 @@ END(iaccess_bit) ENTRY(daccess_bit) DBG_FAULT(10) // Like Entry 8, except for data access - mov r16=cr.ifa // get the address that caused the fault + MOV_FROM_IFA(r16) // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault ;; - thash r17=r16 // compute virtual address of L3 PTE + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE mov r31=pr mov r29=b0 // save b0 in case of nested fault) #ifdef CONFIG_SMP @@ -697,7 +699,7 @@ ENTRY(daccess_bit) ;; (p6) cmp.eq p6,p7=r26,r18 // Only if page is present ;; -(p6) itc.d r25 // install updated PTE + ITC_D(p6, r25, r26) // install updated PTE /* * Tell the assemblers dependency-violation checker that the above "itc" instructions * cannot possibly affect the following loads: @@ -721,7 +723,7 @@ ENTRY(daccess_bit) #endif mov b0=r29 // restore b0 mov pr=r31,-1 - rfi + RFI END(daccess_bit) .org ia64_ivt+0x2c00 @@ -745,10 +747,10 @@ ENTRY(break_fault) */ DBG_FAULT(11) mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) - mov r29=cr.ipsr // M2 (12 cyc) + MOV_FROM_IPSR(p0, r29) // M2 (12 cyc) mov r31=pr // I0 (2 cyc) - mov r17=cr.iim // M2 (2 cyc) + MOV_FROM_IIM(r17) // M2 (2 cyc) mov.m r27=ar.rsc // M2 (12 cyc) mov r18=__IA64_BREAK_SYSCALL // A @@ -767,7 +769,7 @@ ENTRY(break_fault) nop.m 0 movl r30=sys_call_table // X - mov r28=cr.iip // M2 (2 cyc) + MOV_FROM_IIP(r28) // M2 (2 cyc) cmp.eq p0,p7=r18,r17 // I0 is this a system call? (p7) br.cond.spnt non_syscall // B no -> // @@ -864,18 +866,17 @@ ENTRY(break_fault) #endif mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 nop 0 - bsw.1 // B (6 cyc) regs are saved, switch to bank 1 + BSW_1(r2, r14) // B (6 cyc) regs are saved, switch to bank 1 ;; - ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16) // M2 now it's safe to re-enable intr.-collection + // M0 ensure interruption collection is on movl r3=ia64_ret_from_syscall // X ;; - - srlz.i // M0 ensure interruption collection is on mov rp=r3 // I0 set the real return addr (p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT -(p15) ssm psr.i // M2 restore psr.i + SSM_PSR_I(p15, p15, r16) // M2 restore psr.i (p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic // NOT REACHED @@ -899,16 +900,15 @@ ENTRY(interrupt) mov r31=pr // prepare to save predicates ;; SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - ssm psr.ic | PSR_DEFAULT_BITS - ;; + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) + // ensure everybody knows psr.ic is back on adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on ;; SAVE_REST ;; MCA_RECOVER_RANGE(interrupt) alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group - mov out0=cr.ivr // pass cr.ivr as first arg + MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg add out1=16,sp // pass pointer to pt_regs as second arg ;; srlz.d // make sure we see the effect of cr.ivr @@ -978,6 +978,7 @@ END(interrupt) * - ar.fpsr: set to kernel settings * - b6: preserved (same as on entry) */ +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE GLOBAL_ENTRY(ia64_syscall_setup) #if PT(B6) != 0 # error This code assumes that b6 is the first field in pt_regs. @@ -1069,6 +1070,7 @@ GLOBAL_ENTRY(ia64_syscall_setup) (p10) mov r8=-EINVAL br.ret.sptk.many b7 END(ia64_syscall_setup) +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ .org ia64_ivt+0x3c00 ///////////////////////////////////////////////////////////////////////////////////////// @@ -1082,7 +1084,7 @@ END(ia64_syscall_setup) DBG_FAULT(16) FAULT(16) -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) /* * There is no particular reason for this code to be here, other than * that there happens to be space here that would go unused otherwise. @@ -1092,7 +1094,7 @@ END(ia64_syscall_setup) * account_sys_enter is called from SAVE_MIN* macros if accounting is * enabled and if the macro is entered from user mode. */ -ENTRY(account_sys_enter) +GLOBAL_ENTRY(account_sys_enter) // mov.m r20=ar.itc is called in advance, and r13 is current add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 @@ -1134,15 +1136,13 @@ ENTRY(non_syscall) // suitable spot... alloc r14=ar.pfs,0,0,2,0 - mov out0=cr.iim + MOV_FROM_IIM(out0) add out1=16,sp adds r3=8,r2 // set up second base pointer for SAVE_REST - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r15) // restore psr.i movl r15=ia64_leave_kernel ;; SAVE_REST @@ -1168,14 +1168,12 @@ ENTRY(dispatch_unaligned_handler) SAVE_MIN_WITH_COVER ;; alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa + MOV_FROM_IFA(out0) adds out1=16,sp - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r3) // restore psr.i adds r3=8,r2 // set up second base pointer ;; SAVE_REST @@ -1207,17 +1205,16 @@ ENTRY(dispatch_to_fault_handler) */ SAVE_MIN_WITH_COVER_R19 alloc r14=ar.pfs,0,0,5,0 - mov out0=r15 - mov out1=cr.isr - mov out2=cr.ifa - mov out3=cr.iim - mov out4=cr.itir + MOV_FROM_ISR(out1) + MOV_FROM_IFA(out2) + MOV_FROM_IIM(out3) + MOV_FROM_ITIR(out4) ;; - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) + // guarantee that interruption collection is on + mov out0=r15 ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_I(p15, p15, r3) // restore psr.i adds r3=8,r2 // set up second base pointer for SAVE_REST ;; SAVE_REST @@ -1236,8 +1233,8 @@ END(dispatch_to_fault_handler) // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) ENTRY(page_not_present) DBG_FAULT(20) - mov r16=cr.ifa - rsm psr.dt + MOV_FROM_IFA(r16) + RSM_PSR_DT /* * The Linux page fault handler doesn't expect non-present pages to be in * the TLB. Flush the existing entry now, so we meet that expectation. @@ -1256,8 +1253,8 @@ END(page_not_present) // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) ENTRY(key_permission) DBG_FAULT(21) - mov r16=cr.ifa - rsm psr.dt + MOV_FROM_IFA(r16) + RSM_PSR_DT mov r31=pr ;; srlz.d @@ -1269,8 +1266,8 @@ END(key_permission) // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) ENTRY(iaccess_rights) DBG_FAULT(22) - mov r16=cr.ifa - rsm psr.dt + MOV_FROM_IFA(r16) + RSM_PSR_DT mov r31=pr ;; srlz.d @@ -1282,8 +1279,8 @@ END(iaccess_rights) // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) ENTRY(daccess_rights) DBG_FAULT(23) - mov r16=cr.ifa - rsm psr.dt + MOV_FROM_IFA(r16) + RSM_PSR_DT mov r31=pr ;; srlz.d @@ -1295,7 +1292,7 @@ END(daccess_rights) // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) ENTRY(general_exception) DBG_FAULT(24) - mov r16=cr.isr + MOV_FROM_ISR(r16) mov r31=pr ;; cmp4.eq p6,p0=0,r16 @@ -1324,8 +1321,8 @@ END(disabled_fp_reg) ENTRY(nat_consumption) DBG_FAULT(26) - mov r16=cr.ipsr - mov r17=cr.isr + MOV_FROM_IPSR(p0, r16) + MOV_FROM_ISR(r17) mov r31=pr // save PR ;; and r18=0xf,r17 // r18 = cr.ipsr.code{3:0} @@ -1335,10 +1332,10 @@ ENTRY(nat_consumption) dep r16=-1,r16,IA64_PSR_ED_BIT,1 (p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH) ;; - mov cr.ipsr=r16 // set cr.ipsr.na + MOV_TO_IPSR(p0, r16, r18) mov pr=r31,-1 ;; - rfi + RFI 1: mov pr=r31,-1 ;; @@ -1360,26 +1357,26 @@ ENTRY(speculation_vector) * * cr.imm contains zero_ext(imm21) */ - mov r18=cr.iim + MOV_FROM_IIM(r18) ;; - mov r17=cr.iip + MOV_FROM_IIP(r17) shl r18=r18,43 // put sign bit in position (43=64-21) ;; - mov r16=cr.ipsr + MOV_FROM_IPSR(p0, r16) shr r18=r18,39 // sign extend (39=43-4) ;; add r17=r17,r18 // now add the offset ;; - mov cr.iip=r17 + MOV_FROM_IIP(r17) dep r16=0,r16,41,2 // clear EI ;; - mov cr.ipsr=r16 + MOV_FROM_IPSR(p0, r16) ;; - rfi // and go back + RFI END(speculation_vector) .org ia64_ivt+0x5800 @@ -1517,11 +1514,11 @@ ENTRY(ia32_intercept) DBG_FAULT(46) #ifdef CONFIG_IA32_SUPPORT mov r31=pr - mov r16=cr.isr + MOV_FROM_ISR(r16) ;; extr.u r17=r16,16,8 // get ISR.code mov r18=ar.eflag - mov r19=cr.iim // old eflag value + MOV_FROM_IIM(r19) // old eflag value ;; cmp.ne p6,p0=2,r17 (p6) br.cond.spnt 1f // not a system flag fault @@ -1533,7 +1530,7 @@ ENTRY(ia32_intercept) (p6) br.cond.spnt 1f // eflags.ac bit didn't change ;; mov pr=r31,-1 // restore predicate registers - rfi + RFI 1: #endif // CONFIG_IA32_SUPPORT @@ -1686,11 +1683,10 @@ ENTRY(dispatch_illegal_op_fault) .prologue .body SAVE_MIN_WITH_COVER - ssm psr.ic | PSR_DEFAULT_BITS + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i + SSM_PSR_I(p15, p15, r3) // restore psr.i adds r3=8,r2 // set up second base pointer for SAVE_REST ;; alloc r14=ar.pfs,0,0,1,0 // must be first in insn group @@ -1729,12 +1725,11 @@ END(dispatch_illegal_op_fault) ENTRY(dispatch_to_ia32_handler) SAVE_MIN ;; - mov r14=cr.isr - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on + MOV_FROM_ISR(r14) + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on ;; -(p15) ssm psr.i + SSM_PSR_I(p15, p15, r3) adds r3=8,r2 // Base pointer for SAVE_REST ;; SAVE_REST -- cgit v1.2.3 From 4df8d22bbbb16ccfa4e10cc068135183c9e5e006 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 27 May 2008 15:08:01 -0700 Subject: [IA64] pvops: paravirtualize entry.S paravirtualize ia64_swtich_to, ia64_leave_syscall and ia64_leave_kernel. They include sensitive or performance critical privileged instructions so that they need paravirtualization. To paravirtualize them by single source and multi compile they are converted into indirect jump. And define each pv instances. Cc: Keith Owens Cc: "Dong, Eddie" Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 2 +- arch/ia64/kernel/entry.S | 115 ++++++++++++++++++++++++--------------- arch/ia64/kernel/paravirt.c | 19 +++++++ arch/ia64/kernel/paravirtentry.S | 60 ++++++++++++++++++++ 4 files changed, 152 insertions(+), 44 deletions(-) create mode 100644 arch/ia64/kernel/paravirtentry.S (limited to 'arch') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 8b2524293eb..cea91f17d44 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o -obj-$(CONFIG_PARAVIRT) += paravirt.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ca2bb95726d..56ab156c48a 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -22,6 +22,11 @@ * Patrick O'Rourke * 11/07/2000 */ +/* + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + */ /* * Global (preserved) predicate usage on syscall entry/exit path: * @@ -45,6 +50,7 @@ #include "minstate.h" +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE /* * execve() is special because in case of success, we need to * setup a null register window frame. @@ -173,6 +179,7 @@ GLOBAL_ENTRY(sys_clone) mov rp=loc0 br.ret.sptk.many rp END(sys_clone) +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ /* * prev_task <- ia64_switch_to(struct task_struct *next) @@ -180,7 +187,7 @@ END(sys_clone) * called. The code starting at .map relies on this. The rest of the code * doesn't care about the interrupt masking status. */ -GLOBAL_ENTRY(ia64_switch_to) +GLOBAL_ENTRY(__paravirt_switch_to) .prologue alloc r16=ar.pfs,1,0,0,0 DO_SAVE_SWITCH_STACK @@ -204,7 +211,7 @@ GLOBAL_ENTRY(ia64_switch_to) ;; .done: ld8 sp=[r21] // load kernel stack pointer of new task - mov IA64_KR(CURRENT)=in0 // update "current" application register + MOV_TO_KR(CURRENT, in0, r8, r9) // update "current" application register mov r8=r13 // return pointer to previously running task mov r13=in0 // set "current" pointer ;; @@ -216,26 +223,25 @@ GLOBAL_ENTRY(ia64_switch_to) br.ret.sptk.many rp // boogie on out in new context .map: - rsm psr.ic // interrupts (psr.i) are already disabled here + RSM_PSR_IC(r25) // interrupts (psr.i) are already disabled here movl r25=PAGE_KERNEL ;; srlz.d or r23=r25,r20 // construct PA | page properties mov r25=IA64_GRANULE_SHIFT<<2 ;; - mov cr.itir=r25 - mov cr.ifa=in0 // VA of next task... + MOV_TO_ITIR(p0, r25, r8) + MOV_TO_IFA(in0, r8) // VA of next task... ;; mov r25=IA64_TR_CURRENT_STACK - mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped... + MOV_TO_KR(CURRENT_STACK, r26, r8, r9) // remember last page we mapped... ;; itr.d dtr[r25]=r23 // wire in new mapping... - ssm psr.ic // reenable the psr.ic bit - ;; - srlz.d + SSM_PSR_IC_AND_SRLZ_D(r8, r9) // reenable the psr.ic bit br.cond.sptk .done -END(ia64_switch_to) +END(__paravirt_switch_to) +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE /* * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This * means that we may get an interrupt with "sp" pointing to the new kernel stack while @@ -375,7 +381,7 @@ END(save_switch_stack) * - b7 holds address to return to * - must not touch r8-r11 */ -ENTRY(load_switch_stack) +GLOBAL_ENTRY(load_switch_stack) .prologue .altrp b7 @@ -571,7 +577,7 @@ GLOBAL_ENTRY(ia64_trace_syscall) .ret3: (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk (pUStk) rsm psr.i // disable interrupts - br.cond.sptk .work_pending_syscall_end + br.cond.sptk ia64_work_pending_syscall_end strace_error: ld8 r3=[r2] // load pt_regs.r8 @@ -636,8 +642,17 @@ GLOBAL_ENTRY(ia64_ret_from_syscall) adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 mov r10=r0 // clear error indication in r10 (p7) br.cond.spnt handle_syscall_error // handle potential syscall failure +#ifdef CONFIG_PARAVIRT + ;; + br.cond.sptk.few ia64_leave_syscall + ;; +#endif /* CONFIG_PARAVIRT */ END(ia64_ret_from_syscall) +#ifndef CONFIG_PARAVIRT // fall through +#endif +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ + /* * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't * need to switch to bank 0 and doesn't restore the scratch registers. @@ -682,7 +697,7 @@ END(ia64_ret_from_syscall) * ar.csd: cleared * ar.ssd: cleared */ -ENTRY(ia64_leave_syscall) +GLOBAL_ENTRY(__paravirt_leave_syscall) PT_REGS_UNWIND_INFO(0) /* * work.need_resched etc. mustn't get changed by this CPU before it returns to @@ -692,11 +707,11 @@ ENTRY(ia64_leave_syscall) * extra work. We always check for extra work when returning to user-level. * With CONFIG_PREEMPT, we also check for extra work when the preempt_count * is 0. After extra work processing has been completed, execution - * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check + * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check * needs to be redone. */ #ifdef CONFIG_PREEMPT - rsm psr.i // disable interrupts + RSM_PSR_I(p0, r2, r18) // disable interrupts cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 ;; @@ -706,11 +721,12 @@ ENTRY(ia64_leave_syscall) ;; cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) #else /* !CONFIG_PREEMPT */ -(pUStk) rsm psr.i + RSM_PSR_I(pUStk, r2, r18) cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk #endif -.work_processed_syscall: +.global __paravirt_work_processed_syscall; +__paravirt_work_processed_syscall: #ifdef CONFIG_VIRT_CPU_ACCOUNTING adds r2=PT(LOADRS)+16,r12 (pUStk) mov.m r22=ar.itc // fetch time at leave @@ -744,7 +760,7 @@ ENTRY(ia64_leave_syscall) (pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! ;; invala // M0|1 invalidate ALAT - rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection + RSM_PSR_I_IC(r28, r29, r30) // M2 turn off interrupts and interruption collection cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs ld8 r29=[r2],16 // M0|1 load cr.ipsr @@ -765,7 +781,7 @@ ENTRY(ia64_leave_syscall) ;; #endif ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + MOV_FROM_PSR(pKStk, r22, r21) // M2 read PSR now that interrupts are disabled nop 0 ;; ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 @@ -798,7 +814,7 @@ ENTRY(ia64_leave_syscall) srlz.d // M0 ensure interruption collection is off (for cover) shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition - cover // B add current frame into dirty partition & set cr.ifs + COVER // B add current frame into dirty partition & set cr.ifs ;; #ifdef CONFIG_VIRT_CPU_ACCOUNTING mov r19=ar.bsp // M2 get new backing store pointer @@ -823,8 +839,9 @@ ENTRY(ia64_leave_syscall) mov.m ar.ssd=r0 // M2 clear ar.ssd mov f11=f0 // F clear f11 br.cond.sptk.many rbs_switch // B -END(ia64_leave_syscall) +END(__paravirt_leave_syscall) +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE #ifdef CONFIG_IA32_SUPPORT GLOBAL_ENTRY(ia64_ret_from_ia32_execve) PT_REGS_UNWIND_INFO(0) @@ -835,10 +852,20 @@ GLOBAL_ENTRY(ia64_ret_from_ia32_execve) st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit .mem.offset 8,0 st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit +#ifdef CONFIG_PARAVIRT + ;; + // don't fall through, ia64_leave_kernel may be #define'd + br.cond.sptk.few ia64_leave_kernel + ;; +#endif /* CONFIG_PARAVIRT */ END(ia64_ret_from_ia32_execve) +#ifndef CONFIG_PARAVIRT // fall through +#endif #endif /* CONFIG_IA32_SUPPORT */ -GLOBAL_ENTRY(ia64_leave_kernel) +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ + +GLOBAL_ENTRY(__paravirt_leave_kernel) PT_REGS_UNWIND_INFO(0) /* * work.need_resched etc. mustn't get changed by this CPU before it returns to @@ -852,7 +879,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) * needs to be redone. */ #ifdef CONFIG_PREEMPT - rsm psr.i // disable interrupts + RSM_PSR_I(p0, r17, r31) // disable interrupts cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 ;; @@ -862,7 +889,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) ;; cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) #else -(pUStk) rsm psr.i + RSM_PSR_I(pUStk, r17, r31) cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk #endif @@ -910,7 +937,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) mov ar.csd=r30 mov ar.ssd=r31 ;; - rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection + RSM_PSR_I_IC(r23, r22, r25) // initiate turning off of interrupt and interruption collection invala // invalidate ALAT ;; ld8.fill r22=[r2],24 @@ -942,7 +969,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) mov ar.ccv=r15 ;; ldf.fill f11=[r2] - bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + BSW_0(r2, r3, r15) // switch back to bank 0 (no stop bit required beforehand...) ;; (pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) adds r16=PT(CR_IPSR)+16,r12 @@ -950,12 +977,12 @@ GLOBAL_ENTRY(ia64_leave_kernel) #ifdef CONFIG_VIRT_CPU_ACCOUNTING .pred.rel.mutex pUStk,pKStk -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled (pUStk) mov.m r22=ar.itc // M fetch time at leave nop.i 0 ;; #else -(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled + MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled nop.i 0 nop.i 0 ;; @@ -1027,7 +1054,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) * NOTE: alloc, loadrs, and cover can't be predicated. */ (pNonSys) br.cond.dpnt dont_preserve_current_frame - cover // add current frame into dirty partition and set cr.ifs + COVER // add current frame into dirty partition and set cr.ifs ;; mov r19=ar.bsp // get new backing store pointer rbs_switch: @@ -1130,16 +1157,16 @@ skip_rbs_switch: (pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp (pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise ;; - mov cr.ipsr=r29 // M2 + MOV_TO_IPSR(p0, r29, r25) // M2 mov ar.pfs=r26 // I0 (pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise -(p9) mov cr.ifs=r30 // M2 + MOV_TO_IFS(p9, r30, r25)// M2 mov b0=r21 // I0 (pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise mov ar.fpsr=r20 // M2 - mov cr.iip=r28 // M2 + MOV_TO_IIP(r28, r25) // M2 nop 0 ;; (pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode @@ -1148,7 +1175,7 @@ skip_rbs_switch: mov ar.rsc=r27 // M2 mov pr=r31,-1 // I0 - rfi // B + RFI // B /* * On entry: @@ -1174,35 +1201,36 @@ skip_rbs_switch: ;; (pKStk) st4 [r20]=r21 #endif - ssm psr.i // enable interrupts + SSM_PSR_I(p0, p6, r2) // enable interrupts br.call.spnt.many rp=schedule .ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check) - rsm psr.i // disable interrupts + RSM_PSR_I(p0, r2, r20) // disable interrupts ;; #ifdef CONFIG_PREEMPT (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 ;; (pKStk) st4 [r20]=r0 // preempt_count() <- 0 #endif -(pLvSys)br.cond.sptk.few .work_pending_syscall_end +(pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end br.cond.sptk.many .work_processed_kernel .notify: (pUStk) br.call.spnt.many rp=notify_resume_user .ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check) -(pLvSys)br.cond.sptk.few .work_pending_syscall_end +(pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end br.cond.sptk.many .work_processed_kernel -.work_pending_syscall_end: +.global __paravirt_pending_syscall_end; +__paravirt_pending_syscall_end: adds r2=PT(R8)+16,r12 adds r3=PT(R10)+16,r12 ;; ld8 r8=[r2] ld8 r10=[r3] - br.cond.sptk.many .work_processed_syscall - -END(ia64_leave_kernel) + br.cond.sptk.many __paravirt_work_processed_syscall_target +END(__paravirt_leave_kernel) +#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE ENTRY(handle_syscall_error) /* * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could @@ -1244,7 +1272,7 @@ END(ia64_invoke_schedule_tail) * We declare 8 input registers so the system call args get preserved, * in case we need to restart a system call. */ -ENTRY(notify_resume_user) +GLOBAL_ENTRY(notify_resume_user) .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! mov r9=ar.unat @@ -1306,7 +1334,7 @@ ENTRY(sys_rt_sigreturn) adds sp=16,sp ;; ld8 r9=[sp] // load new ar.unat - mov.sptk b7=r8,ia64_leave_kernel + mov.sptk b7=r8,ia64_native_leave_kernel ;; mov ar.unat=r9 br.many b7 @@ -1665,3 +1693,4 @@ sys_call_table: data8 sys_timerfd_gettime .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls +#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index e5482bb6841..7126ea8f7ec 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -286,3 +286,22 @@ struct pv_cpu_ops pv_cpu_ops = { = ia64_native_intrin_local_irq_restore_func, }; EXPORT_SYMBOL(pv_cpu_ops); + +/****************************************************************************** + * replacement of hand written assembly codes. + */ + +void +paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch) +{ + extern unsigned long paravirt_switch_to_targ; + extern unsigned long paravirt_leave_syscall_targ; + extern unsigned long paravirt_work_processed_syscall_targ; + extern unsigned long paravirt_leave_kernel_targ; + + paravirt_switch_to_targ = cpu_asm_switch->switch_to; + paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall; + paravirt_work_processed_syscall_targ = + cpu_asm_switch->work_processed_syscall; + paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel; +} diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S new file mode 100644 index 00000000000..2f42fcb9776 --- /dev/null +++ b/arch/ia64/kernel/paravirtentry.S @@ -0,0 +1,60 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirtentry.S + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include "entry.h" + +#define DATA8(sym, init_value) \ + .pushsection .data.read_mostly ; \ + .align 8 ; \ + .global sym ; \ + sym: ; \ + data8 init_value ; \ + .popsection + +#define BRANCH(targ, reg, breg) \ + movl reg=targ ; \ + ;; \ + ld8 reg=[reg] ; \ + ;; \ + mov breg=reg ; \ + br.cond.sptk.many breg + +#define BRANCH_PROC(sym, reg, breg) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ + END(paravirt_ ## sym) + +#define BRANCH_PROC_UNWINFO(sym, reg, breg) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + PT_REGS_UNWIND_INFO(0) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ + END(paravirt_ ## sym) + + +BRANCH_PROC(switch_to, r22, b7) +BRANCH_PROC_UNWINFO(leave_syscall, r22, b7) +BRANCH_PROC(work_processed_syscall, r2, b7) +BRANCH_PROC_UNWINFO(leave_kernel, r22, b7) -- cgit v1.2.3 From 213060a4d6991a95d0b9344406d195be3464accf Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:40 +0900 Subject: [IA64] pvops: paravirtualize NR_IRQS Make NR_IRQ overridable by each pv instances. Pv instance may need each own number of irqs so that NR_IRQS should be the maximum number of nr_irqs each pv instances need. Cc: Jes Sorensen Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/Makefile | 6 ++++++ arch/ia64/kernel/Makefile | 33 +++++++++++++++++++++++++++++++++ arch/ia64/kernel/nr-irqs.c | 24 ++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 arch/ia64/kernel/nr-irqs.c (limited to 'arch') diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 88f1a55c6c9..3b9c8cadfd3 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -99,3 +99,9 @@ define archhelp echo ' boot - Build vmlinux and bootloader for Ski simulator' echo '* unwcheck - Check vmlinux for invalid unwind info' endef + +archprepare: make_nr_irqs_h FORCE +PHONY += make_nr_irqs_h FORCE + +make_nr_irqs_h: FORCE + $(Q)$(MAKE) $(build)=arch/ia64/kernel include/asm-ia64/nr-irqs.h diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index cea91f17d44..87fea11aecb 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -73,6 +73,39 @@ $(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/gate-data.o: $(obj)/gate.so +# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config +define sed-y + "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" +endef +quiet_cmd_nr_irqs = GEN $@ +define cmd_nr_irqs + (set -e; \ + echo "#ifndef __ASM_NR_IRQS_H__"; \ + echo "#define __ASM_NR_IRQS_H__"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Kbuild"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-y) $<; \ + echo ""; \ + echo "#endif" ) > $@ +endef + +# We use internal kbuild rules to avoid the "is up to date" message from make +arch/$(SRCARCH)/kernel/nr-irqs.s: $(srctree)/arch/$(SRCARCH)/kernel/nr-irqs.c \ + $(wildcard $(srctree)/include/asm-ia64/*/irq.h) + $(Q)mkdir -p $(dir $@) + $(call if_changed_dep,cc_s_c) + +include/asm-ia64/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s + $(Q)mkdir -p $(dir $@) + $(call cmd,nr_irqs) + +clean-files += $(objtree)/include/asm-ia64/nr-irqs.h + # # native ivt.S and entry.S # diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c new file mode 100644 index 00000000000..1ae049181e8 --- /dev/null +++ b/arch/ia64/kernel/nr-irqs.c @@ -0,0 +1,24 @@ +/* + * calculate + * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...) + * depending on config. + * This must be calculated before processing asm-offset.c. + */ + +#define ASM_OFFSETS_C 1 + +#include +#include +#include + +void foo(void) +{ + union paravirt_nr_irqs_max { + char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS]; +#ifdef CONFIG_XEN + char xen_nr_irqs[XEN_NR_IRQS]; +#endif + }; + + DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max)); +} -- cgit v1.2.3 From e51835d58a5abdf82211f36f500f666ca7ef9aee Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:41 +0900 Subject: [IA64] pvops: define initialization hooks, pv_init_ops, for paravirtualized environment. define pv_init_ops hooks which represents various initialization hooks for paravirtualized environment. and add hooks. Signed-off-by: Alex Williamson Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/paravirt.c | 7 +++++++ arch/ia64/kernel/setup.c | 10 ++++++++++ arch/ia64/kernel/smpboot.c | 2 ++ 3 files changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 7126ea8f7ec..5daf659ff29 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -41,6 +41,13 @@ struct pv_info pv_info = { .name = "bare hardware" }; +/*************************************************************************** + * pv_init_ops + * initialization hooks. + */ + +struct pv_init_ops pv_init_ops; + /*************************************************************************** * pv_cpu_ops * intrinsics hooks. diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index f48a809c686..750749551e8 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -341,6 +342,8 @@ reserve_memory (void) rsvd_region[n].end = (unsigned long) ia64_imva(_end); n++; + n += paravirt_reserve_memory(&rsvd_region[n]); + #ifdef CONFIG_BLK_DEV_INITRD if (ia64_boot_param->initrd_start) { rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); @@ -519,6 +522,8 @@ setup_arch (char **cmdline_p) { unw_init(); + paravirt_arch_setup_early(); + ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); *cmdline_p = __va(ia64_boot_param->command_line); @@ -584,6 +589,9 @@ setup_arch (char **cmdline_p) acpi_boot_init(); #endif + paravirt_banner(); + paravirt_arch_setup_console(cmdline_p); + #ifdef CONFIG_VT if (!conswitchp) { # if defined(CONFIG_DUMMY_CONSOLE) @@ -603,6 +611,8 @@ setup_arch (char **cmdline_p) #endif /* enable IA-64 Machine Check Abort Handling unless disabled */ + if (paravirt_arch_setup_nomca()) + nomca = 1; if (!nomca) ia64_mca_init(); diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index d7ad42b77d4..933f3881152 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -642,6 +643,7 @@ void __devinit smp_prepare_boot_cpu(void) cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callin_map); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; + paravirt_post_smp_prepare_boot_cpu(); } #ifdef CONFIG_HOTPLUG_CPU -- cgit v1.2.3 From 33b39e84209b0308b572dce017df7ee9b63f086c Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:42 +0900 Subject: [IA64] pvops: add hooks, pv_iosapic_ops, to paravirtualize iosapic. add hooks to paravirtualize iosapic which is a real hardware resource. On virtualized environment it may be replaced something virtualized friendly. Define pv_iosapic_ops and add the hooks. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 45 +++++++++++++++++++++++++++++---------------- arch/ia64/kernel/paravirt.c | 25 +++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 082c31dcfd9..587196dd84f 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -587,6 +587,15 @@ static inline int irq_is_shared (int irq) return (iosapic_intr_info[irq].count > 1); } +struct irq_chip* +ia64_native_iosapic_get_irq_chip(unsigned long trigger) +{ + if (trigger == IOSAPIC_EDGE) + return &irq_type_iosapic_edge; + else + return &irq_type_iosapic_level; +} + static int register_intr (unsigned int gsi, int irq, unsigned char delivery, unsigned long polarity, unsigned long trigger) @@ -637,13 +646,10 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery, iosapic_intr_info[irq].dmode = delivery; iosapic_intr_info[irq].trigger = trigger; - if (trigger == IOSAPIC_EDGE) - irq_type = &irq_type_iosapic_edge; - else - irq_type = &irq_type_iosapic_level; + irq_type = iosapic_get_irq_chip(trigger); idesc = irq_desc + irq; - if (idesc->chip != irq_type) { + if (irq_type != NULL && idesc->chip != irq_type) { if (idesc->chip != &no_irq_type) printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", @@ -975,6 +981,22 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, set_rte(gsi, irq, dest, 1); } +void __init +ia64_native_iosapic_pcat_compat_init(void) +{ + if (pcat_compat) { + /* + * Disable the compatibility mode interrupts (8259 style), + * needs IN/OUT support enabled. + */ + printk(KERN_INFO + "%s: Disabling PC-AT compatible 8259 interrupts\n", + __func__); + outb(0xff, 0xA1); + outb(0xff, 0x21); + } +} + void __init iosapic_system_init (int system_pcat_compat) { @@ -989,17 +1011,8 @@ iosapic_system_init (int system_pcat_compat) } pcat_compat = system_pcat_compat; - if (pcat_compat) { - /* - * Disable the compatibility mode interrupts (8259 style), - * needs IN/OUT support enabled. - */ - printk(KERN_INFO - "%s: Disabling PC-AT compatible 8259 interrupts\n", - __func__); - outb(0xff, 0xA1); - outb(0xff, 0x21); - } + if (pcat_compat) + iosapic_pcat_compat_init(); } static inline int diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 5daf659ff29..65c211b2f98 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -312,3 +312,28 @@ paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch) cpu_asm_switch->work_processed_syscall; paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel; } + +/*************************************************************************** + * pv_iosapic_ops + * iosapic read/write hooks. + */ + +static unsigned int +ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + return __ia64_native_iosapic_read(iosapic, reg); +} + +static void +ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + __ia64_native_iosapic_write(iosapic, reg, val); +} + +struct pv_iosapic_ops pv_iosapic_ops = { + .pcat_compat_init = ia64_native_iosapic_pcat_compat_init, + .get_irq_chip = ia64_native_iosapic_get_irq_chip, + + .__read = ia64_native_iosapic_read, + .__write = ia64_native_iosapic_write, +}; -- cgit v1.2.3 From 85cbc503787d577c215f9540c57294e1ec799144 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:43 +0900 Subject: [IA64] pvops: add hooks, pv_irq_ops, to paravirtualized irq related operations. introduce pv_irq_ops which adds hooks to paravirtualize irq related operations. On virtualized environment, interruption may be replaced by something virtualization friendly. So the irq related operation also may need paravirtualization. This patch adds necessary hooks to paravirtualize irq related operations. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/irq_ia64.c | 18 +++++++++++++----- arch/ia64/kernel/paravirt.c | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index c48171bc796..28d3d483db9 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -196,7 +196,7 @@ static void clear_irq_vector(int irq) } int -assign_irq_vector (int irq) +ia64_native_assign_irq_vector (int irq) { unsigned long flags; int vector, cpu; @@ -222,7 +222,7 @@ assign_irq_vector (int irq) } void -free_irq_vector (int vector) +ia64_native_free_irq_vector (int vector) { if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) @@ -622,7 +622,7 @@ static struct irqaction tlb_irqaction = { #endif void -register_percpu_irq (ia64_vector vec, struct irqaction *action) +ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action) { irq_desc_t *desc; unsigned int irq; @@ -637,13 +637,21 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action) } void __init -init_IRQ (void) +ia64_native_register_ipi(void) { - register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); #ifdef CONFIG_SMP register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); +#endif +} + +void __init +init_IRQ (void) +{ + ia64_register_ipi(); + register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); +#ifdef CONFIG_SMP #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG) if (vector_domain_type != VECTOR_DOMAIN_NONE) { BUG_ON(IA64_FIRST_DEVICE_VECTOR != IA64_IRQ_MOVE_VECTOR); diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 65c211b2f98..ba5383be03c 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -337,3 +337,18 @@ struct pv_iosapic_ops pv_iosapic_ops = { .__read = ia64_native_iosapic_read, .__write = ia64_native_iosapic_write, }; + +/*************************************************************************** + * pv_irq_ops + * irq operations + */ + +struct pv_irq_ops pv_irq_ops = { + .register_ipi = ia64_native_register_ipi, + + .assign_irq_vector = ia64_native_assign_irq_vector, + .free_irq_vector = ia64_native_free_irq_vector, + .register_percpu_irq = ia64_native_register_percpu_irq, + + .resend_irq = ia64_native_resend_irq, +}; -- cgit v1.2.3 From 00d21d82b8a9e290286e09d8eedc20bfc33b0eee Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 19 May 2008 22:13:44 +0900 Subject: [IA64] pvops: add to hooks, pv_time_ops, for steal time accounting. Introduce pv_time_ops which adds hook to steal time accounting. On virtualized environment, cpus are shared by many guests and steal time is the time which is used for other guests. On virtualized environtment, streal time should be accounted. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/paravirt.c | 15 +++++++++++++++ arch/ia64/kernel/time.c | 23 +++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'arch') diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index ba5383be03c..afaf5b9a2cf 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -352,3 +352,18 @@ struct pv_irq_ops pv_irq_ops = { .resend_irq = ia64_native_resend_irq, }; + +/*************************************************************************** + * pv_time_ops + * time operations + */ + +static int +ia64_native_do_steal_accounting(unsigned long *new_itm) +{ + return 0; +} + +struct pv_time_ops pv_time_ops = { + .do_steal_accounting = ia64_native_do_steal_accounting, +}; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 8c73643f2d6..046ca89efc0 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,15 @@ EXPORT_SYMBOL(last_cli_ip); #endif +#ifdef CONFIG_PARAVIRT +static void +paravirt_clocksource_resume(void) +{ + if (pv_time_ops.clocksource_resume) + pv_time_ops.clocksource_resume(); +} +#endif + static struct clocksource clocksource_itc = { .name = "itc", .rating = 350, @@ -56,6 +66,9 @@ static struct clocksource clocksource_itc = { .mult = 0, /*to be calculated*/ .shift = 16, .flags = CLOCK_SOURCE_IS_CONTINUOUS, +#ifdef CONFIG_PARAVIRT + .resume = paravirt_clocksource_resume, +#endif }; static struct clocksource *itc_clocksource; @@ -156,6 +169,9 @@ timer_interrupt (int irq, void *dev_id) profile_tick(CPU_PROFILING); + if (paravirt_do_steal_accounting(&new_itm)) + goto skip_process_time_accounting; + while (1) { update_process_times(user_mode(get_irq_regs())); @@ -185,6 +201,8 @@ timer_interrupt (int irq, void *dev_id) local_irq_disable(); } +skip_process_time_accounting: + do { /* * If we're too close to the next clock tick for @@ -334,6 +352,11 @@ ia64_init_itm (void) */ clocksource_itc.rating = 50; + paravirt_init_missing_ticks_accounting(smp_processor_id()); + + /* avoid softlock up message when cpu is unplug and plugged again. */ + touch_softlockup_watchdog(); + /* Setup the CPU local timer tick */ ia64_cpu_local_tick(); -- cgit v1.2.3 From 4d58bbcc89e267d52b4df572acbf209a60a8a497 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 28 May 2008 09:41:58 -0700 Subject: [IA64] pv_ops: move some functions in ivt.S to avoid lack of space. move interrupt, page_fault, non_syscall, dispatch_unaligned_handler and dispatch_to_fault_handler to avoid lack of instructin space. The change set 4dcc29e1574d88f4465ba865ed82800032f76418 bloated SAVE_MIN_WITH_COVER, SAVE_MIN_WITH_COVER_R19 so that it bloated the functions which uses those macros. In the native case, only dispatch_illegal_op_fault had to be moved. When paravirtualized case the all functions which use the macros need to be moved to avoid the lack of space. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/ivt.S | 261 +++++++++++++++++++++++++------------------------ 1 file changed, 133 insertions(+), 128 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 23749ed3cf0..c39627df3cd 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -515,27 +515,6 @@ ENTRY(ikey_miss) FAULT(6) END(ikey_miss) - //----------------------------------------------------------------------------------- - // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) -ENTRY(page_fault) - SSM_PSR_DT_AND_SRLZ_I - ;; - SAVE_MIN_WITH_COVER - alloc r15=ar.pfs,0,0,3,0 - MOV_FROM_IFA(out0) - MOV_FROM_ISR(out1) - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) - adds r3=8,r2 // set up second base pointer - SSM_PSR_I(p15, p15, r14) // restore psr.i - movl r14=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 // out2 = pointer to pt_regs - br.call.sptk.many b6=ia64_do_page_fault // ignore return address -END(page_fault) - .org ia64_ivt+0x1c00 ///////////////////////////////////////////////////////////////////////////////////////// // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) @@ -896,26 +875,8 @@ END(break_fault) ///////////////////////////////////////////////////////////////////////////////////////// // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) ENTRY(interrupt) - DBG_FAULT(12) - mov r31=pr // prepare to save predicates - ;; - SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) - // ensure everybody knows psr.ic is back on - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - ;; - MCA_RECOVER_RANGE(interrupt) - alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group - MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg - add out1=16,sp // pass pointer to pt_regs as second arg - ;; - srlz.d // make sure we see the effect of cr.ivr - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_handle_irq + /* interrupt handler has become too big to fit this area. */ + br.sptk.many __interrupt END(interrupt) .org ia64_ivt+0x3400 @@ -1125,105 +1086,18 @@ END(account_sys_enter) DBG_FAULT(17) FAULT(17) -ENTRY(non_syscall) - mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER - ;; - SAVE_MIN_WITH_COVER - - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - - alloc r14=ar.pfs,0,0,2,0 - MOV_FROM_IIM(out0) - add out1=16,sp - adds r3=8,r2 // set up second base pointer for SAVE_REST - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) - // guarantee that interruption collection is on - SSM_PSR_I(p15, p15, r15) // restore psr.i - movl r15=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r15 - ;; - br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr -END(non_syscall) - .org ia64_ivt+0x4800 ///////////////////////////////////////////////////////////////////////////////////////// // 0x4800 Entry 18 (size 64 bundles) Reserved DBG_FAULT(18) FAULT(18) - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_unaligned_handler) - SAVE_MIN_WITH_COVER - ;; - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - MOV_FROM_IFA(out0) - adds out1=16,sp - - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) - // guarantee that interruption collection is on - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_unaligned -END(dispatch_unaligned_handler) - .org ia64_ivt+0x4c00 ///////////////////////////////////////////////////////////////////////////////////////// // 0x4c00 Entry 19 (size 64 bundles) Reserved DBG_FAULT(19) FAULT(19) - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_to_fault_handler) - /* - * Input: - * psr.ic: off - * r19: fault vector number (e.g., 24 for General Exception) - * r31: contains saved predicates (pr) - */ - SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - MOV_FROM_ISR(out1) - MOV_FROM_IFA(out2) - MOV_FROM_IIM(out3) - MOV_FROM_ITIR(out4) - ;; - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) - // guarantee that interruption collection is on - mov out0=r15 - ;; - SSM_PSR_I(p15, p15, r3) // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_fault -END(dispatch_to_fault_handler) - // // --- End of long entries, Beginning of short entries // @@ -1670,6 +1544,137 @@ END(ia32_interrupt) DBG_FAULT(67) FAULT(67) + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) +ENTRY(page_fault) + SSM_PSR_DT_AND_SRLZ_I + ;; + SAVE_MIN_WITH_COVER + alloc r15=ar.pfs,0,0,3,0 + MOV_FROM_IFA(out0) + MOV_FROM_ISR(out1) + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) + adds r3=8,r2 // set up second base pointer + SSM_PSR_I(p15, p15, r14) // restore psr.i + movl r14=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.many b6=ia64_do_page_fault // ignore return address +END(page_fault) + +ENTRY(non_syscall) + mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER + ;; + SAVE_MIN_WITH_COVER + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + alloc r14=ar.pfs,0,0,2,0 + MOV_FROM_IIM(out0) + add out1=16,sp + adds r3=8,r2 // set up second base pointer for SAVE_REST + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r15) // restore psr.i + movl r15=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r15 + ;; + br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr +END(non_syscall) + +ENTRY(__interrupt) + DBG_FAULT(12) + mov r31=pr // prepare to save predicates + ;; + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) + // ensure everybody knows psr.ic is back on + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + ;; + MCA_RECOVER_RANGE(interrupt) + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_handle_irq +END(__interrupt) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_unaligned_handler) + SAVE_MIN_WITH_COVER + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + MOV_FROM_IFA(out0) + adds out1=16,sp + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_unaligned +END(dispatch_unaligned_handler) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_to_fault_handler) + /* + * Input: + * psr.ic: off + * r19: fault vector number (e.g., 24 for General Exception) + * r31: contains saved predicates (pr) + */ + SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + MOV_FROM_ISR(out1) + MOV_FROM_IFA(out2) + MOV_FROM_IIM(out3) + MOV_FROM_ITIR(out4) + ;; + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) + // guarantee that interruption collection is on + mov out0=r15 + ;; + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_fault +END(dispatch_to_fault_handler) + /* * Squatting in this space ... * -- cgit v1.2.3 From da3854fc9f80c0240ba7cadd2aebf036683ff21b Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 24 Jun 2008 22:15:58 +0100 Subject: DM9000: Fixup blackfin after removing 2 resource usage The dm9000 driver accepts either 2 or 3 resources to describe the platform devices. The 2 resources case abuses the ioresource mechanism by passing ioremap()ed memory through the platform device resources. This patch removes converts boards that were using it to the 3 resources scheme. CC: Bryan Wu Signed-off-by: Ben Dooks Signed-off-by: Laurent Pinchart Signed-off-by: Jeff Garzik --- arch/blackfin/mach-bf527/boards/ezkit.c | 7 ++++++- arch/blackfin/mach-bf533/boards/H8606.c | 7 ++++++- arch/blackfin/mach-bf537/boards/generic_board.c | 7 ++++++- 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/mach-bf527/boards/ezkit.c b/arch/blackfin/mach-bf527/boards/ezkit.c index 5958eecefcf..689b69c98ee 100644 --- a/arch/blackfin/mach-bf527/boards/ezkit.c +++ b/arch/blackfin/mach-bf527/boards/ezkit.c @@ -323,10 +323,15 @@ static struct platform_device smc91x_device = { static struct resource dm9000_resources[] = { [0] = { .start = 0x203FB800, - .end = 0x203FB800 + 8, + .end = 0x203FB800 + 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = 0x203FB800 + 4, + .end = 0x203FB800 + 5, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = IRQ_PF9, .end = IRQ_PF9, .flags = (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE), diff --git a/arch/blackfin/mach-bf533/boards/H8606.c b/arch/blackfin/mach-bf533/boards/H8606.c index 7cc4864f6aa..4103a97c1a7 100644 --- a/arch/blackfin/mach-bf533/boards/H8606.c +++ b/arch/blackfin/mach-bf533/boards/H8606.c @@ -65,10 +65,15 @@ static struct platform_device rtc_device = { static struct resource dm9000_resources[] = { [0] = { .start = 0x20300000, - .end = 0x20300000 + 8, + .end = 0x20300000 + 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = 0x20300000 + 4, + .end = 0x20300000 + 5, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = IRQ_PF10, .end = IRQ_PF10, .flags = (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE), diff --git a/arch/blackfin/mach-bf537/boards/generic_board.c b/arch/blackfin/mach-bf537/boards/generic_board.c index 7d250828dad..01b63e2ec18 100644 --- a/arch/blackfin/mach-bf537/boards/generic_board.c +++ b/arch/blackfin/mach-bf537/boards/generic_board.c @@ -166,10 +166,15 @@ static struct platform_device smc91x_device = { static struct resource dm9000_resources[] = { [0] = { .start = 0x203FB800, - .end = 0x203FB800 + 8, + .end = 0x203FB800 + 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = 0x203FB800 + 4, + .end = 0x203FB800 + 5, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = IRQ_PF9, .end = IRQ_PF9, .flags = (IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE), -- cgit v1.2.3 From 3aa30df3d0d78f568cff9d6a98ae01ae55494f10 Mon Sep 17 00:00:00 2001 From: Hinko Kocevar Date: Fri, 6 Jun 2008 14:12:26 +0200 Subject: cris: compile fixes for 2.6.26-rc5 Add dummy ops for serial debug port. Add setting of c_ispeed/c_ospeed as suggested by Alan Cox. Signed-off-by: Hinko Kocevar Acked-by: Alan Cox Signed-off-by: Jesper Nilsson --- arch/cris/arch-v10/kernel/debugport.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/cris/arch-v10/kernel/debugport.c b/arch/cris/arch-v10/kernel/debugport.c index 04d5eee2c90..162730eb887 100644 --- a/arch/cris/arch-v10/kernel/debugport.c +++ b/arch/cris/arch-v10/kernel/debugport.c @@ -426,12 +426,18 @@ static int dummy_write(struct tty_struct * tty, return count; } -static int -dummy_write_room(struct tty_struct *tty) +static int dummy_write_room(struct tty_struct *tty) { return 8192; } +static const struct tty_operations dummy_ops = { + .open = dummy_open, + .close = dummy_close, + .write = dummy_write, + .write_room = dummy_write_room, +}; + void __init init_dummy_console(void) { @@ -444,14 +450,14 @@ init_dummy_console(void) dummy_driver.type = TTY_DRIVER_TYPE_SERIAL; dummy_driver.subtype = SERIAL_TYPE_NORMAL; dummy_driver.init_termios = tty_std_termios; + /* Normally B9600 default... */ dummy_driver.init_termios.c_cflag = - B115200 | CS8 | CREAD | HUPCL | CLOCAL; /* is normally B9600 default... */ + B115200 | CS8 | CREAD | HUPCL | CLOCAL; dummy_driver.flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; + dummy_driver.init_termios.c_ispeed = 115200; + dummy_driver.init_termios.c_ospeed = 115200; - dummy_driver.open = dummy_open; - dummy_driver.close = dummy_close; - dummy_driver.write = dummy_write; - dummy_driver.write_room = dummy_write_room; + dummy_driver.ops = &dummy_ops; if (tty_register_driver(&dummy_driver)) panic("Couldn't register dummy serial driver\n"); } -- cgit v1.2.3 From 9be48a94b8ae8c944dc918ad65f2f27e9df3ed00 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 29 Jun 2008 22:50:56 +0200 Subject: It looks at least odd to apply spin_unlock to a mutex. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @def@ declarer DEFINE_MUTEX; identifier m; @@ DEFINE_MUTEX(m); @@ identifier def.m; @@ ( - spin_lock(&m) + mutex_lock(&m) | - spin_unlock(&m) + mutex_unlock(&m) ) // Signed-off-by: Julia Lawall Signed-off-by: Jesper Nilsson --- arch/cris/arch-v10/drivers/pcf8563.c | 2 +- arch/cris/arch-v32/drivers/pcf8563.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c index 52103d16dc6..8769dc91407 100644 --- a/arch/cris/arch-v10/drivers/pcf8563.c +++ b/arch/cris/arch-v10/drivers/pcf8563.c @@ -233,7 +233,7 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (copy_to_user((struct rtc_time *) arg, &tm, sizeof tm)) { - spin_unlock(&rtc_lock); + mutex_unlock(&rtc_lock); return -EFAULT; } diff --git a/arch/cris/arch-v32/drivers/pcf8563.c b/arch/cris/arch-v32/drivers/pcf8563.c index 53db3870ba0..f263ab57122 100644 --- a/arch/cris/arch-v32/drivers/pcf8563.c +++ b/arch/cris/arch-v32/drivers/pcf8563.c @@ -229,7 +229,7 @@ int pcf8563_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (copy_to_user((struct rtc_time *) arg, &tm, sizeof tm)) { - spin_unlock(&rtc_lock); + mutex_unlock(&rtc_lock); return -EFAULT; } -- cgit v1.2.3 From bdb144b67a7660ce5d044ae9a2fd1a8030f12523 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Sun, 29 Jun 2008 23:15:19 +0200 Subject: [CRIS] Build fixes for compressed and rescue images for v10 and v32: - Use the normal cross gcc instead of using an elf specific cris toolchain. This removes the dependency of this second toolchain. - Use the normal cross objcopy instead of overriding it to use elf-toolchain. This allows compiling using "CROSS_COMPILE=$CRIS_GCC/cris-axis-linux-gnu-" instead of just "CROSS_COMPILE=$CRIS_GCC/cris-axis-linux-gnu/bin/" - Remove redundant rules for compiling, the implicit rules are sufficient. - Convert the arch/cris/arch-v10/boot/compressed/head.S to format accepted by the cris-axis-linux-gnu-gcc (registers must be prefixed with '$', remove explicit underscore on exported symbols) - Remove a number of unused (and duplicated) prototypes from arch/cris/arch-v10/boot/compressed/misc.c. - Correct memcpy and memset return values (actually return them!) Signed-off-by: Jesper Nilsson --- arch/cris/arch-v10/boot/Makefile | 1 - arch/cris/arch-v10/boot/compressed/Makefile | 12 +-- arch/cris/arch-v10/boot/compressed/decompress.ld | 3 +- arch/cris/arch-v10/boot/compressed/head.S | 98 ++++++++++++------------ arch/cris/arch-v10/boot/compressed/misc.c | 23 +++--- arch/cris/arch-v10/boot/rescue/Makefile | 7 +- arch/cris/arch-v32/boot/Makefile | 1 - arch/cris/arch-v32/boot/compressed/Makefile | 4 - arch/cris/arch-v32/boot/rescue/Makefile | 1 - 9 files changed, 65 insertions(+), 85 deletions(-) (limited to 'arch') diff --git a/arch/cris/arch-v10/boot/Makefile b/arch/cris/arch-v10/boot/Makefile index 20c83a53caf..21720301443 100644 --- a/arch/cris/arch-v10/boot/Makefile +++ b/arch/cris/arch-v10/boot/Makefile @@ -2,7 +2,6 @@ # arch/cris/arch-v10/boot/Makefile # -OBJCOPY = objcopy-cris OBJCOPYFLAGS = -O binary --remove-section=.bss subdir- := compressed rescue diff --git a/arch/cris/arch-v10/boot/compressed/Makefile b/arch/cris/arch-v10/boot/compressed/Makefile index 4a031cb27eb..9ec5f87d515 100644 --- a/arch/cris/arch-v10/boot/compressed/Makefile +++ b/arch/cris/arch-v10/boot/compressed/Makefile @@ -2,12 +2,10 @@ # arch/cris/arch-v10/boot/compressed/Makefile # -CC = gcc-cris -melf $(LINUXINCLUDE) -ccflags-y += -O2 -LD = ld-cris +asflags-y += $(LINUXINCLUDE) +ccflags-y += -O2 $(LINUXINCLUDE) ldflags-y += -T $(obj)/decompress.ld OBJECTS = $(obj)/head.o $(obj)/misc.o -OBJCOPY = objcopy-cris OBJCOPYFLAGS = -O binary --remove-section=.bss quiet_cmd_image = BUILD $@ @@ -21,12 +19,6 @@ $(obj)/decompress.o: $(OBJECTS) FORCE $(obj)/decompress.bin: $(obj)/decompress.o FORCE $(call if_changed,objcopy) -$(obj)/head.o: $(obj)/head.S .config - @$(CC) -D__ASSEMBLY__ -traditional -c $< -o $@ - -$(obj)/misc.o: $(obj)/misc.c .config - @$(CC) -D__KERNEL__ -c $< -o $@ - $(obj)/vmlinux: $(obj)/piggy.gz $(obj)/decompress.bin FORCE $(call if_changed,image) diff --git a/arch/cris/arch-v10/boot/compressed/decompress.ld b/arch/cris/arch-v10/boot/compressed/decompress.ld index 0b0a14fe617..e80f4594d54 100644 --- a/arch/cris/arch-v10/boot/compressed/decompress.ld +++ b/arch/cris/arch-v10/boot/compressed/decompress.ld @@ -1,4 +1,5 @@ -OUTPUT_FORMAT(elf32-us-cris) +/* OUTPUT_FORMAT(elf32-us-cris) */ +OUTPUT_FORMAT(elf32-cris) MEMORY { diff --git a/arch/cris/arch-v10/boot/compressed/head.S b/arch/cris/arch-v10/boot/compressed/head.S index 610bdb23755..981fbae8495 100644 --- a/arch/cris/arch-v10/boot/compressed/head.S +++ b/arch/cris/arch-v10/boot/compressed/head.S @@ -15,77 +15,77 @@ #define COMMAND_LINE_MAGIC 0x87109563 ;; Exported symbols - - .globl _input_data - + .globl input_data + + .text nop di ;; We need to initialze DRAM registers before we start using the DRAM - - cmp.d RAM_INIT_MAGIC, r8 ; Already initialized? + + cmp.d RAM_INIT_MAGIC, $r8 ; Already initialized? beq dram_init_finished nop - + #include "../../lib/dram_init.S" - -dram_init_finished: - + +dram_init_finished: + ;; Initiate the PA and PB ports - move.b CONFIG_ETRAX_DEF_R_PORT_PA_DATA, r0 - move.b r0, [R_PORT_PA_DATA] + move.b CONFIG_ETRAX_DEF_R_PORT_PA_DATA, $r0 + move.b $r0, [R_PORT_PA_DATA] - move.b CONFIG_ETRAX_DEF_R_PORT_PA_DIR, r0 - move.b r0, [R_PORT_PA_DIR] + move.b CONFIG_ETRAX_DEF_R_PORT_PA_DIR, $r0 + move.b $r0, [R_PORT_PA_DIR] - move.b CONFIG_ETRAX_DEF_R_PORT_PB_DATA, r0 - move.b r0, [R_PORT_PB_DATA] + move.b CONFIG_ETRAX_DEF_R_PORT_PB_DATA, $r0 + move.b $r0, [R_PORT_PB_DATA] - move.b CONFIG_ETRAX_DEF_R_PORT_PB_DIR, r0 - move.b r0, [R_PORT_PB_DIR] + move.b CONFIG_ETRAX_DEF_R_PORT_PB_DIR, $r0 + move.b $r0, [R_PORT_PB_DIR] ;; Setup the stack to a suitably high address. ;; We assume 8 MB is the minimum DRAM in an eLinux ;; product and put the sp at the top for now. - move.d 0x40800000, sp + move.d 0x40800000, $sp ;; Figure out where the compressed piggyback image is ;; in the flash (since we wont try to copy it to DRAM ;; before unpacking). It is at _edata, but in flash. ;; Use (_edata - basse) as offset to the current PC. - -basse: move.d pc, r5 - and.d 0x7fffffff, r5 ; strip any non-cache bit - subq 2, r5 ; compensate for the move.d pc instr - move.d r5, r0 ; save for later - flash address of 'basse' - add.d _edata, r5 - sub.d basse, r5 ; r5 = flash address of '_edata' - + +basse: move.d $pc, $r5 + and.d 0x7fffffff, $r5 ; strip any non-cache bit + subq 2, $r5 ; compensate for the move.d $pc instr + move.d $r5, $r0 ; save for later - flash address of 'basse' + add.d _edata, $r5 + sub.d basse, $r5 ; $r5 = flash address of '_edata' + ;; Copy text+data to DRAM - - move.d basse, r1 ; destination - move.d _edata, r2 ; end destination -1: move.w [r0+], r3 - move.w r3, [r1+] - cmp.d r2, r1 + + move.d basse, $r1 ; destination + move.d _edata, $r2 ; end destination +1: move.w [$r0+], $r3 + move.w $r3, [$r1+] + cmp.d $r2, $r1 bcs 1b nop - move.d r5, [_input_data] ; for the decompressor + move.d $r5, [input_data] ; for the decompressor ;; Clear the decompressors BSS (between _edata and _end) - - moveq 0, r0 - move.d _edata, r1 - move.d _end, r2 -1: move.w r0, [r1+] - cmp.d r2, r1 + + moveq 0, $r0 + move.d _edata, $r1 + move.d _end, $r2 +1: move.w $r0, [$r1+] + cmp.d $r2, $r1 bcs 1b nop @@ -94,16 +94,16 @@ basse: move.d pc, r5 move.d $r10, [$r12] move.d _cmd_line_addr, $r12 move.d $r11, [$r12] - - ;; Do the decompression and save compressed size in _inptr - jsr _decompress_kernel - - ;; Put start address of root partition in r9 so the kernel can use it + ;; Do the decompression and save compressed size in inptr + + jsr decompress_kernel + + ;; Put start address of root partition in $r9 so the kernel can use it ;; when mounting from flash - move.d [_input_data], r9 ; flash address of compressed kernel - add.d [_inptr], r9 ; size of compressed kernel + move.d [input_data], $r9 ; flash address of compressed kernel + add.d [inptr], $r9 ; size of compressed kernel ;; Restore command line magic and address. move.d _cmd_line_magic, $r10 @@ -112,12 +112,12 @@ basse: move.d pc, r5 move.d [$r11], $r11 ;; Enter the decompressed kernel - move.d RAM_INIT_MAGIC, r8 ; Tell kernel that DRAM is initialized + move.d RAM_INIT_MAGIC, $r8 ; Tell kernel that DRAM is initialized jump 0x40004000 ; kernel is linked to this address - + .data -_input_data: +input_data: .dword 0 ; used by the decompressor _cmd_line_magic: .dword 0 diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c index 9a43ab19391..59961f20fab 100644 --- a/arch/cris/arch-v10/boot/compressed/misc.c +++ b/arch/cris/arch-v10/boot/compressed/misc.c @@ -30,8 +30,7 @@ #define STATIC static void* memset(void* s, int c, size_t n); -void* memcpy(void* __dest, __const void* __src, - size_t __n); +void* memcpy(void* __dest, __const void* __src, size_t __n); #define memzero(s, n) memset ((s), 0, (n)) @@ -81,11 +80,8 @@ static unsigned outcnt = 0; /* bytes in output buffer */ # define Tracecv(c,x) #endif -static int fill_inbuf(void); static void flush_window(void); static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); extern char *input_data; /* lives in head.S */ @@ -95,7 +91,6 @@ static unsigned long output_ptr = 0; static void *malloc(int size); static void free(void *where); -static void error(char *m); static void gzip_mark(void **); static void gzip_release(void **); @@ -103,8 +98,8 @@ static void puts(const char *); /* the "heap" is put directly after the BSS ends, at end */ -extern int end; -static long free_mem_ptr = (long)&end; +extern int _end; +static long free_mem_ptr = (long)&_end; #include "../../../../../lib/inflate.c" @@ -170,6 +165,8 @@ memset(void* s, int c, size_t n) char *ss = (char*)s; for (i=0;i Date: Sat, 28 Jun 2008 18:25:41 -0400 Subject: x86, 64-bit: patch paravirt inline replacements when loading modules small speedup. Paravirt replacements were added to the i386 module loader by commit 139ec7c416248b9ea227d21839235344edfee1e0. This adds the same code to the x86_64 module loader. Signed-off-by: Anders Kaseorg Acked-by: "H. Peter Anvin" Signed-off-by: Ingo Molnar --- arch/x86/kernel/module_64.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index a888e67f587..0e867676b5a 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c @@ -150,7 +150,8 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -160,6 +161,8 @@ int module_finalize(const Elf_Ehdr *hdr, alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) locks= s; + if (!strcmp(".parainstructions", secstrings + s->sh_name)) + para = s; } if (alt) { @@ -175,6 +178,11 @@ int module_finalize(const Elf_Ehdr *hdr, tseg, tseg + text->sh_size); } + if (para) { + void *pseg = (void *)para->sh_addr; + apply_paravirt(pseg, pseg + para->sh_size); + } + return module_bug_finalize(hdr, sechdrs, me); } -- cgit v1.2.3 From 7dd071058f70d517f2d24e80cbb9d9885d565f0a Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Mon, 30 Jun 2008 20:38:06 +0200 Subject: [CRIS] Correct image makefiles to allow using a separate OBJ-directory. Make compile succeed when building with O= (srctree != objtree). Signed-off-by: Hinko Kocevar Signed-off-by: Jesper Nilsson --- arch/cris/arch-v10/boot/compressed/Makefile | 2 +- arch/cris/arch-v10/boot/rescue/Makefile | 2 +- arch/cris/arch-v32/boot/compressed/Makefile | 2 +- arch/cris/arch-v32/boot/rescue/Makefile | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/cris/arch-v10/boot/compressed/Makefile b/arch/cris/arch-v10/boot/compressed/Makefile index 9ec5f87d515..08d943ce4be 100644 --- a/arch/cris/arch-v10/boot/compressed/Makefile +++ b/arch/cris/arch-v10/boot/compressed/Makefile @@ -4,7 +4,7 @@ asflags-y += $(LINUXINCLUDE) ccflags-y += -O2 $(LINUXINCLUDE) -ldflags-y += -T $(obj)/decompress.ld +ldflags-y += -T $(srctree)/$(obj)/decompress.ld OBJECTS = $(obj)/head.o $(obj)/misc.o OBJCOPYFLAGS = -O binary --remove-section=.bss diff --git a/arch/cris/arch-v10/boot/rescue/Makefile b/arch/cris/arch-v10/boot/rescue/Makefile index bea8b9c2a7c..07688da9270 100644 --- a/arch/cris/arch-v10/boot/rescue/Makefile +++ b/arch/cris/arch-v10/boot/rescue/Makefile @@ -4,7 +4,7 @@ ccflags-y += -O2 $(LINUXINCLUDE) asflags-y += $(LINUXINCLUDE) -ldflags-y += -T $(obj)/rescue.ld +ldflags-y += -T $(srctree)/$(obj)/rescue.ld OBJCOPYFLAGS = -O binary --remove-section=.bss obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o OBJECT := $(obj)/head.o diff --git a/arch/cris/arch-v32/boot/compressed/Makefile b/arch/cris/arch-v32/boot/compressed/Makefile index 9138938eec3..d6335f26083 100644 --- a/arch/cris/arch-v32/boot/compressed/Makefile +++ b/arch/cris/arch-v32/boot/compressed/Makefile @@ -4,7 +4,7 @@ asflags-y += -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch ccflags-y += -O2 -I $(srctree)/include/asm/mach/ -I $(srctree)/include/asm/arch -ldflags-y += -T $(obj)/decompress.ld +ldflags-y += -T $(srctree)/$(obj)/decompress.ld OBJECTS = $(obj)/head.o $(obj)/misc.o OBJCOPYFLAGS = -O binary --remove-section=.bss diff --git a/arch/cris/arch-v32/boot/rescue/Makefile b/arch/cris/arch-v32/boot/rescue/Makefile index b548bde185d..44ae0ad61f9 100644 --- a/arch/cris/arch-v32/boot/rescue/Makefile +++ b/arch/cris/arch-v32/boot/rescue/Makefile @@ -7,7 +7,7 @@ ccflags-y += -O2 -I $(srctree)/include/asm/arch/mach/ \ -I $(srctree)/include/asm/arch asflags-y += -I $(srctree)/include/asm/arch/mach/ -I $(srctree)/include/asm/arch LD = gcc-cris -mlinux -march=v32 -nostdlib -ldflags-y += -T $(obj)/rescue.ld +ldflags-y += -T $(srctree)/$(obj)/rescue.ld LDPOSTFLAGS = -lgcc OBJCOPYFLAGS = -O binary --remove-section=.bss obj-$(CONFIG_ETRAX_AXISFLASHMAP) = head.o -- cgit v1.2.3 From bd451d5ed206cda4ed0e03fac4e5dece2fd7767f Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Mon, 30 Jun 2008 23:22:51 +0200 Subject: [CRISv10] Correct whitespace damage. The previous patch was whitespace damaged, correct to indent using tabs. Signed-off-by: Jesper Nilsson --- arch/cris/arch-v10/kernel/debugport.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/cris/arch-v10/kernel/debugport.c b/arch/cris/arch-v10/kernel/debugport.c index 162730eb887..3dc6e91ba39 100644 --- a/arch/cris/arch-v10/kernel/debugport.c +++ b/arch/cris/arch-v10/kernel/debugport.c @@ -432,10 +432,10 @@ static int dummy_write_room(struct tty_struct *tty) } static const struct tty_operations dummy_ops = { - .open = dummy_open, - .close = dummy_close, - .write = dummy_write, - .write_room = dummy_write_room, + .open = dummy_open, + .close = dummy_close, + .write = dummy_write, + .write_room = dummy_write_room, }; void __init -- cgit v1.2.3 From f3c4b53d5ec6bd2ae0f284c1e6371bff545f0f80 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Mon, 30 Jun 2008 21:20:23 +0200 Subject: [CRISv10] Clean up compressed/misc.c Many minor fixes in whitespace and formatting. Signed-off-by: Jesper Nilsson --- arch/cris/arch-v10/boot/compressed/misc.c | 127 ++++++++++++++++-------------- 1 file changed, 70 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c index 59961f20fab..18e13bce140 100644 --- a/arch/cris/arch-v10/boot/compressed/misc.c +++ b/arch/cris/arch-v10/boot/compressed/misc.c @@ -29,11 +29,10 @@ #define OF(args) args #define STATIC static -void* memset(void* s, int c, size_t n); -void* memcpy(void* __dest, __const void* __src, size_t __n); - -#define memzero(s, n) memset ((s), 0, (n)) +void *memset(void *s, int c, size_t n); +void *memcpy(void *__dest, __const void *__src, size_t __n); +#define memzero(s, n) memset((s), 0, (n)) typedef unsigned char uch; typedef unsigned short ush; @@ -61,23 +60,38 @@ static unsigned outcnt = 0; /* bytes in output buffer */ #define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ #define RESERVED 0xC0 /* bit 6,7: reserved */ -#define get_byte() inbuf[inptr++] - +#define get_byte() (inbuf[inptr++]) + /* Diagnostic functions */ #ifdef DEBUG -# define Assert(cond,msg) {if(!(cond)) error(msg);} +# define Assert(cond, msg) do { \ + if (!(cond)) \ + error(msg); \ + } while (0) # define Trace(x) fprintf x -# define Tracev(x) {if (verbose) fprintf x ;} -# define Tracevv(x) {if (verbose>1) fprintf x ;} -# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} -# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +# define Tracev(x) do { \ + if (verbose) \ + fprintf x; \ + } while (0) +# define Tracevv(x) do { \ + if (verbose > 1) \ + fprintf x; \ + } while (0) +# define Tracec(c, x) do { \ + if (verbose && (c)) \ + fprintf x; \ + } while (0) +# define Tracecv(c, x) do { \ + if (verbose > 1 && (c)) \ + fprintf x; \ + } while (0) #else -# define Assert(cond,msg) +# define Assert(cond, msg) # define Trace(x) # define Tracev(x) # define Tracevv(x) -# define Tracec(c,x) -# define Tracecv(c,x) +# define Tracec(c, x) +# define Tracecv(c, x) #endif static void flush_window(void); @@ -88,26 +102,27 @@ extern char *input_data; /* lives in head.S */ static long bytes_out = 0; static uch *output_data; static unsigned long output_ptr = 0; - + static void *malloc(int size); static void free(void *where); static void gzip_mark(void **); static void gzip_release(void **); - + static void puts(const char *); /* the "heap" is put directly after the BSS ends, at end */ - + extern int _end; static long free_mem_ptr = (long)&_end; - + #include "../../../../../lib/inflate.c" static void *malloc(int size) { void *p; - if (size <0) error("Malloc error"); + if (size < 0) + error("Malloc error"); free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ @@ -137,48 +152,47 @@ static void puts(const char *s) { #ifndef CONFIG_ETRAX_DEBUG_PORT_NULL - while(*s) { + while (*s) { #ifdef CONFIG_ETRAX_DEBUG_PORT0 - while(!(*R_SERIAL0_STATUS & (1 << 5))) ; + while (!(*R_SERIAL0_STATUS & (1 << 5))) ; *R_SERIAL0_TR_DATA = *s++; #endif #ifdef CONFIG_ETRAX_DEBUG_PORT1 - while(!(*R_SERIAL1_STATUS & (1 << 5))) ; + while (!(*R_SERIAL1_STATUS & (1 << 5))) ; *R_SERIAL1_TR_DATA = *s++; #endif #ifdef CONFIG_ETRAX_DEBUG_PORT2 - while(!(*R_SERIAL2_STATUS & (1 << 5))) ; + while (!(*R_SERIAL2_STATUS & (1 << 5))) ; *R_SERIAL2_TR_DATA = *s++; #endif #ifdef CONFIG_ETRAX_DEBUG_PORT3 - while(!(*R_SERIAL3_STATUS & (1 << 5))) ; + while (!(*R_SERIAL3_STATUS & (1 << 5))) ; *R_SERIAL3_TR_DATA = *s++; #endif } #endif } -void* -memset(void* s, int c, size_t n) +void *memset(void *s, int c, size_t n) { int i; - char *ss = (char*)s; + char *ss = (char *)s; - for (i=0;i> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - output_ptr += (ulg)outcnt; - outcnt = 0; + ulg c = crc; /* temporary variable */ + unsigned n; + uch *in, *out, ch; + + in = window; + out = &output_data[output_ptr]; + for (n = 0; n < outcnt; n++) { + ch = *out = *in; + out++; + in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + output_ptr += (ulg)outcnt; + outcnt = 0; } -static void -error(char *x) +static void error(char *x) { puts("\n\n"); puts(x); puts("\n\n -- System halted\n"); - while(1); /* Halt */ + while (1); /* Halt */ } void setup_normal_output_buffer(void) @@ -223,7 +237,7 @@ void setup_normal_output_buffer(void) void decompress_kernel(void) { char revision; - + /* input_data is set in head.S */ inbuf = input_data; @@ -255,10 +269,9 @@ void decompress_kernel(void) makecrc(); __asm__ volatile ("move $vr,%0" : "=rm" (revision)); - if (revision < 10) - { + if (revision < 10) { puts("You need an ETRAX 100LX to run linux 2.6\n"); - while(1); + while (1); } puts("Uncompressing Linux...\n"); -- cgit v1.2.3 From 299a140dacaa514be5e567b5851c187c42ec38c4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 8 Jul 2008 14:47:16 +0200 Subject: x86, AMD IOMMU: ignore detection of GART IOMMU One of the last IOMMU updates covered a bug in the AMD IOMMU code. The early detection code does not succeed if the GART is already detected. This patch fixes this. Cc: Robert Richter Cc: Bhavna Sarathy Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: Joerg Roedel Cc: Robert Richter Cc: Bhavna Sarathy Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 2a13e430437..bb0280077a3 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -828,7 +828,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) void __init amd_iommu_detect(void) { - if (swiotlb || no_iommu || iommu_detected) + if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) return; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { -- cgit v1.2.3 From ab6bc3e343fbe3be4a0f67225e849d0db6b4b7ac Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 5 Jul 2008 15:53:36 +0400 Subject: x86: idle process - add checking for NULL early param Signed-off-by: Cyrill Gorcunov Cc: akpm@linux-foundation.org Cc: andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba370dc8685..58325a6604a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -164,6 +164,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) static int __init idle_setup(char *str) { + if (!str) + return -EINVAL; + if (!strcmp(str, "poll")) { printk("using polling idle threads.\n"); pm_idle = poll_idle; -- cgit v1.2.3 From d6cd7effcc5e0047faf15ab0a54c980f1a616a07 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 5 Jul 2008 15:53:37 +0400 Subject: x86: io delay - add checking for NULL early param Signed-off-by: Cyrill Gorcunov Cc: akpm@linux-foundation.org Cc: andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_delay.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 5921e5f0a64..1c3a66a67f8 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -103,6 +103,9 @@ void __init io_delay_init(void) static int __init io_delay_param(char *s) { + if (!s) + return -EINVAL; + if (!strcmp(s, "0x80")) io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; else if (!strcmp(s, "0xed")) -- cgit v1.2.3 From 4d8cc874d7ed43eda72765e9c0e141e170fee4f3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 5 Jul 2008 15:53:38 +0400 Subject: x86: smpboot maxcpus - add checking for NULL early param Signed-off-by: Cyrill Gorcunov Cc: akpm@linux-foundation.org Cc: andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fd933b5465b..e47bfac70c3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1452,7 +1452,8 @@ static int __init parse_maxcpus(char *arg) { extern unsigned int maxcpus; - maxcpus = simple_strtoul(arg, NULL, 0); + if (arg) + maxcpus = simple_strtoul(arg, NULL, 0); return 0; } early_param("maxcpus", parse_maxcpus); -- cgit v1.2.3 From 46a7fa270afbe5fddc6042a598cfe22977b0e989 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 11 Jul 2008 10:23:42 +0900 Subject: x86: make only GART code include gart.h gart.h has only GART-specific stuff. Only GART code needs it. Other IOMMU stuff should include iommu.h instead of gart.h. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- arch/x86/kernel/amd_iommu_init.c | 2 +- arch/x86/kernel/aperture_64.c | 1 + arch/x86/kernel/early-quirks.c | 5 +---- arch/x86/kernel/pci-calgary_64.c | 2 +- arch/x86/kernel/pci-dma.c | 2 +- arch/x86/kernel/pci-gart_64.c | 1 + arch/x86/kernel/pci-nommu.c | 2 +- arch/x86/kernel/pci-swiotlb_64.c | 2 +- arch/x86/kernel/setup.c | 2 +- 10 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f2766d84c7a..cf2f74bcde5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 2a13e430437..66438284c69 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include /* * definitions for the ACPI scanning code diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9f907806c1a..44e21826db1 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a4665f37cfc..510b8e36773 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -16,10 +16,7 @@ #include #include #include - -#ifdef CONFIG_GART_IOMMU -#include -#endif +#include static void __init fix_hypertransport_config(int num, int slot, int func) { diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 6959b5c45df..151f2d171f7 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8467ec2320f..f581a4b63b4 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index d0d18db5d2a..949ca985deb 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index aec43d56f49..792b9179eff 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82299cd1d04..20df839b9c2 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 86fc2d62427..e5d208934bf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -96,7 +96,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From ac7ded2adb2e43152fe7385ddd53bf45f5c92285 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 11 Jul 2008 10:23:43 +0900 Subject: x86: remove ifdef CONFIG_GART_IOMMU in pci-dma.c Our way to handle gart_* functions for CONFIG_GART_IOMMU and !CONFIG_GART_IOMMU cases is inconsistent. We have some dummy gart_* functions in !CONFIG_GART_IOMMU case and also use ifdef CONFIG_GART_IOMMU tricks in pci-dma.c to call some gart_* functions in only CONFIG_GART_IOMMU case. This patch removes ifdef CONFIG_GART_IOMMU in pci-dma.c and always use dummy gart_* functions in iommu.h. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f581a4b63b4..dd57c5bbe2d 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -114,9 +114,7 @@ void __init pci_iommu_alloc(void) * The order of these functions is important for * fall-back/fail-over reasons */ -#ifdef CONFIG_GART_IOMMU gart_iommu_hole_init(); -#endif #ifdef CONFIG_CALGARY_IOMMU detect_calgary(); @@ -184,9 +182,7 @@ static __init int iommu_setup(char *p) swiotlb = 1; #endif -#ifdef CONFIG_GART_IOMMU gart_parse_options(p); -#endif #ifdef CONFIG_CALGARY_IOMMU if (!strncmp(p, "calgary", 7)) @@ -508,9 +504,7 @@ static int __init pci_iommu_init(void) amd_iommu_init(); -#ifdef CONFIG_GART_IOMMU gart_iommu_init(); -#endif no_iommu_init(); return 0; -- cgit v1.2.3 From b8b48326f312026af12799917383c54c25d05482 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 11 Jul 2008 10:23:44 +0900 Subject: x86: remove ifdef CONFIG_CALGARY_IOMMU in pci-dma.c asm-x86/calgary.h has dummy calgary_iommu_init() and detect_calgary() in !CONFIG_CALGARY_IOMMU case. So we don't need ifdef CONFIG_CALGARY_IOMMU in pci-dma.c. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Cc: Alexis Bruemmer Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index dd57c5bbe2d..f16cbbe424a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -116,9 +116,7 @@ void __init pci_iommu_alloc(void) */ gart_iommu_hole_init(); -#ifdef CONFIG_CALGARY_IOMMU detect_calgary(); -#endif detect_intel_iommu(); @@ -496,9 +494,7 @@ EXPORT_SYMBOL(dma_free_coherent); static int __init pci_iommu_init(void) { -#ifdef CONFIG_CALGARY_IOMMU calgary_iommu_init(); -#endif intel_iommu_init(); -- cgit v1.2.3 From be54f9d1c8df93c4998e134a306652caaa58f67f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 11 Jul 2008 10:23:45 +0900 Subject: x86: remove ifdef CONFIG_SWIOTLB in pci-dma.c As other IOMMUs do, this puts dummy pci_swiotlb_init() in swiotlb.h and remove ifdef CONFIG_SWIOTLB in pci-dma.c. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f16cbbe424a..d12945de056 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -122,9 +122,7 @@ void __init pci_iommu_alloc(void) amd_iommu_detect(); -#ifdef CONFIG_SWIOTLB pci_swiotlb_init(); -#endif } #endif -- cgit v1.2.3 From b65233a9c1da587bf19ee161982f4f0ec59941c0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:21 +0200 Subject: x86, AMD IOMMU: add comments to the initialization code This patch adds some comments to the AMD IOMMU initialization code to increase its readability. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 214 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 206 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index bb0280077a3..9ddb46d7c52 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -71,6 +71,17 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 +/* + * ACPI table definitions + * + * These data structures are laid over the table to parse the important values + * out of it. + */ + +/* + * structure describing one IOMMU in the ACPI table. Typically followed by one + * or more ivhd_entrys. + */ struct ivhd_header { u8 type; u8 flags; @@ -83,6 +94,10 @@ struct ivhd_header { u32 reserved; } __attribute__((packed)); +/* + * A device entry describing which devices a specific IOMMU translates and + * which requestor ids they use. + */ struct ivhd_entry { u8 type; u16 devid; @@ -90,6 +105,10 @@ struct ivhd_entry { u32 ext; } __attribute__((packed)); +/* + * An AMD IOMMU memory definition structure. It defines things like exclusion + * ranges for devices and regions that should be unity mapped. + */ struct ivmd_header { u8 type; u8 flags; @@ -103,22 +122,66 @@ struct ivmd_header { static int __initdata amd_iommu_detected; -u16 amd_iommu_last_bdf; -struct list_head amd_iommu_unity_map; -unsigned amd_iommu_aperture_order = 26; -int amd_iommu_isolate; +u16 amd_iommu_last_bdf; /* largest PCI device id we have + to handle */ +struct list_head amd_iommu_unity_map; /* a list of required unity mappings + we find in ACPI */ +unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ +int amd_iommu_isolate; /* if 1, device isolation is enabled */ -struct list_head amd_iommu_list; +struct list_head amd_iommu_list; /* list of all AMD IOMMUs in the + system */ + +/* + * Pointer to the device table which is shared by all AMD IOMMUs + * it is indexed by the PCI device id or the HT unit id and contains + * information about the domain the device belongs to as well as the + * page table root pointer. + */ struct dev_table_entry *amd_iommu_dev_table; + +/* + * The alias table is a driver specific data structure which contains the + * mappings of the PCI device ids to the actual requestor ids on the IOMMU. + * More than one device can share the same requestor id. + */ u16 *amd_iommu_alias_table; + +/* + * The rlookup table is used to find the IOMMU which is responsible + * for a specific device. It is also indexed by the PCI device id. + */ struct amd_iommu **amd_iommu_rlookup_table; + +/* + * The pd table (protection domain table) is used to find the protection domain + * data structure a device belongs to. Indexed with the PCI device id too. + */ struct protection_domain **amd_iommu_pd_table; + +/* + * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap + * to know which ones are already in use. + */ unsigned long *amd_iommu_pd_alloc_bitmap; -static u32 dev_table_size; -static u32 alias_table_size; -static u32 rlookup_table_size; +static u32 dev_table_size; /* size of the device table */ +static u32 alias_table_size; /* size of the alias table */ +static u32 rlookup_table_size; /* size if the rlookup table */ + +/**************************************************************************** + * + * AMD IOMMU MMIO register space handling functions + * + * These functions are used to program the IOMMU device registers in + * MMIO space required for that driver. + * + ****************************************************************************/ +/* + * This function set the exclusion range in the IOMMU. DMA accesses to the + * exclusion range are passed through untranslated + */ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) { u64 start = iommu->exclusion_start & PAGE_MASK; @@ -137,6 +200,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) &entry, sizeof(entry)); } +/* Programs the physical address of the device table into the IOMMU hardware */ static void __init iommu_set_device_table(struct amd_iommu *iommu) { u32 entry; @@ -149,6 +213,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu) &entry, sizeof(entry)); } +/* Generic functions to enable/disable certain features of the IOMMU. */ static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { u32 ctrl; @@ -167,6 +232,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } +/* Function to enable the hardware */ void __init iommu_enable(struct amd_iommu *iommu) { printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); @@ -176,6 +242,10 @@ void __init iommu_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } +/* + * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in + * the system has one. + */ static u8 * __init iommu_map_mmio_space(u64 address) { u8 *ret; @@ -199,6 +269,19 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); } +/**************************************************************************** + * + * The functions below belong to the first pass of AMD IOMMU ACPI table + * parsing. In this pass we try to find out the highest device id this + * code has to handle. Upon this information the size of the shared data + * structures is determined later. + * + ****************************************************************************/ + +/* + * This function reads the last device id the IOMMU has to handle from the PCI + * capability header for this IOMMU + */ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) { u32 cap; @@ -209,6 +292,10 @@ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) return 0; } +/* + * After reading the highest device id from the IOMMU PCI capability header + * this function looks if there is a higher device id defined in the ACPI table + */ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) { u8 *p = (void *)h, *end = (void *)h; @@ -229,6 +316,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) case IVHD_DEV_RANGE_END: case IVHD_DEV_ALIAS: case IVHD_DEV_EXT_SELECT: + /* all the above subfield types refer to device ids */ UPDATE_LAST_BDF(dev->devid); break; default: @@ -242,6 +330,11 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) return 0; } +/* + * Iterate over all IVHD entries in the ACPI table and find the highest device + * id which we need to handle. This is the first of three functions which parse + * the ACPI table. So we check the checksum here. + */ static int __init find_last_devid_acpi(struct acpi_table_header *table) { int i; @@ -277,6 +370,20 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) return 0; } +/**************************************************************************** + * + * The following functions belong the the code path which parses the ACPI table + * the second time. In this ACPI parsing iteration we allocate IOMMU specific + * data structures, initialize the device/alias/rlookup table and also + * basically initialize the hardware. + * + ****************************************************************************/ + +/* + * Allocates the command buffer. This buffer is per AMD IOMMU. We can + * write commands to that buffer later and the IOMMU will execute them + * asynchronously + */ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) { u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, @@ -307,6 +414,7 @@ static void __init free_command_buffer(struct amd_iommu *iommu) get_order(CMD_BUFFER_SIZE)); } +/* sets a specific bit in the device table entry. */ static void set_dev_entry_bit(u16 devid, u8 bit) { int i = (bit >> 5) & 0x07; @@ -315,6 +423,10 @@ static void set_dev_entry_bit(u16 devid, u8 bit) amd_iommu_dev_table[devid].data[i] |= (1 << _bit); } +/* + * This function takes the device specific flags read from the ACPI + * table and sets up the device table entry with that information + */ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) { if (flags & ACPI_DEVFLAG_INITPASS) @@ -333,11 +445,16 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); } +/* Writes the specific IOMMU for a device into the rlookup table */ static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) { amd_iommu_rlookup_table[devid] = iommu; } +/* + * Reads the device exclusion range from ACPI and initialize IOMMU with + * it + */ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) { struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; @@ -346,12 +463,22 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) return; if (iommu) { + /* + * We only can configure exclusion ranges per IOMMU, not + * per device. But we can enable the exclusion range per + * device. This is done here + */ set_dev_entry_bit(m->devid, DEV_ENTRY_EX); iommu->exclusion_start = m->range_start; iommu->exclusion_length = m->range_length; } } +/* + * This function reads some important data from the IOMMU PCI space and + * initializes the driver data structure with it. It reads the hardware + * capabilities and the first/last device entries + */ static void __init init_iommu_from_pci(struct amd_iommu *iommu) { int bus = PCI_BUS(iommu->devid); @@ -367,6 +494,10 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); } +/* + * Takes a pointer to an AMD IOMMU entry in the ACPI table and + * initializes the hardware and our data structures with it. + */ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_header *h) { @@ -467,6 +598,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, } } +/* Initializes the device->iommu mapping for the driver */ static int __init init_iommu_devices(struct amd_iommu *iommu) { u16 i; @@ -494,6 +626,11 @@ static void __init free_iommu_all(void) } } +/* + * This function clues the initialization function for one IOMMU + * together and also allocates the command buffer and programs the + * hardware. It does NOT enable the IOMMU. This is done afterwards. + */ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { spin_lock_init(&iommu->lock); @@ -521,6 +658,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) return 0; } +/* + * Iterates over all IOMMU entries in the ACPI table, allocates the + * IOMMU structure and initializes it with init_iommu_one() + */ static int __init init_iommu_all(struct acpi_table_header *table) { u8 *p = (u8 *)table, *end = (u8 *)table; @@ -555,6 +696,14 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } +/**************************************************************************** + * + * The next functions belong to the third pass of parsing the ACPI + * table. In this last pass the memory mapping requirements are + * gathered (like exclusion and unity mapping reanges). + * + ****************************************************************************/ + static void __init free_unity_maps(void) { struct unity_map_entry *entry, *next; @@ -565,6 +714,7 @@ static void __init free_unity_maps(void) } } +/* called when we find an exclusion range definition in ACPI */ static int __init init_exclusion_range(struct ivmd_header *m) { int i; @@ -588,6 +738,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) return 0; } +/* called for unity map ACPI definition */ static int __init init_unity_map_range(struct ivmd_header *m) { struct unity_map_entry *e = 0; @@ -619,6 +770,7 @@ static int __init init_unity_map_range(struct ivmd_header *m) return 0; } +/* iterates over all memory definitions we find in the ACPI table */ static int __init init_memory_definitions(struct acpi_table_header *table) { u8 *p = (u8 *)table, *end = (u8 *)table; @@ -642,6 +794,10 @@ static int __init init_memory_definitions(struct acpi_table_header *table) return 0; } +/* + * This function finally enables all IOMMUs found in the system after + * they have been initialized + */ static void __init enable_iommus(void) { struct amd_iommu *iommu; @@ -678,6 +834,34 @@ static struct sys_device device_amd_iommu = { .cls = &amd_iommu_sysdev_class, }; +/* + * This is the core init function for AMD IOMMU hardware in the system. + * This function is called from the generic x86 DMA layer initialization + * code. + * + * This function basically parses the ACPI table for AMD IOMMU (IVRS) + * three times: + * + * 1 pass) Find the highest PCI device id the driver has to handle. + * Upon this information the size of the data structures is + * determined that needs to be allocated. + * + * 2 pass) Initialize the data structures just allocated with the + * information in the ACPI table about available AMD IOMMUs + * in the system. It also maps the PCI devices in the + * system to specific IOMMUs + * + * 3 pass) After the basic data structures are allocated and + * initialized we update them with information about memory + * remapping requirements parsed out of the ACPI table in + * this last pass. + * + * After that the hardware is initialized and ready to go. In the last + * step we do some Linux specific things like registering the driver in + * the dma_ops interface and initializing the suspend/resume support + * functions. Finally it prints some information about AMD IOMMUs and + * the driver state and enables the hardware. + */ int __init amd_iommu_init(void) { int i, ret = 0; @@ -821,6 +1005,13 @@ free: goto out; } +/**************************************************************************** + * + * Early detect code. This code runs at IOMMU detection time in the DMA + * layer. It just looks if there is an IVRS ACPI table to detect AMD + * IOMMUs + * + ****************************************************************************/ static int __init early_amd_iommu_detect(struct acpi_table_header *table) { return 0; @@ -841,6 +1032,13 @@ void __init amd_iommu_detect(void) } } +/**************************************************************************** + * + * Parsing functions for the AMD IOMMU specific kernel command line + * options. + * + ****************************************************************************/ + static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { -- cgit v1.2.3 From 431b2a2015337533f1a9e39a840266a8a2c93144 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:22 +0200 Subject: x86, AMD IOMMU: add comments to core code This patch adds comments about how the AMD IOMMU core code works for the DMA remapping functionality. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 201 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 199 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f2766d84c7a..4bae96ca7c1 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -34,6 +34,9 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); +/* + * general struct to manage commands send to an IOMMU + */ struct command { u32 data[4]; }; @@ -41,11 +44,22 @@ struct command { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); +/* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) { return iommu->cap & IOMMU_CAP_NPCACHE; } +/**************************************************************************** + * + * IOMMU command queuing functions + * + ****************************************************************************/ + +/* + * Writes the command to the IOMMUs command buffer and informs the + * hardware about the new command. Must be called with iommu->lock held. + */ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) { u32 tail, head; @@ -63,6 +77,10 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) return 0; } +/* + * General queuing function for commands. Takes iommu->lock and calls + * __iommu_queue_command(). + */ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) { unsigned long flags; @@ -75,6 +93,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) return ret; } +/* + * This function is called whenever we need to ensure that the IOMMU has + * completed execution of all commands we sent. It sends a + * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs + * us about that by writing a value to a physical address we pass with + * the command. + */ static int iommu_completion_wait(struct amd_iommu *iommu) { int ret; @@ -101,6 +126,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu) return 0; } +/* + * Command send function for invalidating a device table entry + */ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) { struct command cmd; @@ -116,6 +144,9 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, &cmd); } +/* + * Generic command send function for invalidaing TLB entries + */ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, u64 address, u16 domid, int pde, int s) { @@ -127,9 +158,9 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, cmd.data[1] |= domid; cmd.data[2] = LOW_U32(address); cmd.data[3] = HIGH_U32(address); - if (s) + if (s) /* size bit - we flush more than one 4kb page */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; iommu->need_sync = 1; @@ -137,6 +168,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, return iommu_queue_command(iommu, &cmd); } +/* + * TLB invalidation function which is called from the mapping functions. + * It invalidates a single PTE if the range to flush is within a single + * page. Otherwise it flushes the whole TLB of the IOMMU. + */ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { @@ -159,6 +195,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, return 0; } +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +/* + * Generic mapping functions. It maps a physical address into a DMA + * address space. It allocates the page table pages if necessary. + * In the future it can be extended to a generic mapping function + * supporting all features of AMD IOMMU page tables like level skipping + * and full 64 bit address spaces. + */ static int iommu_map(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, @@ -209,6 +259,10 @@ static int iommu_map(struct protection_domain *dom, return 0; } +/* + * This function checks if a specific unity mapping entry is needed for + * this specific IOMMU. + */ static int iommu_for_unity_map(struct amd_iommu *iommu, struct unity_map_entry *entry) { @@ -223,6 +277,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, return 0; } +/* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ static int iommu_init_unity_mappings(struct amd_iommu *iommu) { struct unity_map_entry *entry; @@ -239,6 +299,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu) return 0; } +/* + * This function actually applies the mapping to the page table of the + * dma_ops domain. + */ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e) { @@ -261,6 +325,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, return 0; } +/* + * Inits the unity mappings required for a specific device + */ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, u16 devid) { @@ -278,12 +345,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, return 0; } +/**************************************************************************** + * + * The next functions belong to the address allocator for the dma_ops + * interface functions. They work like the allocators in the other IOMMU + * drivers. Its basically a bitmap which marks the allocated pages in + * the aperture. Maybe it could be enhanced in the future to a more + * efficient allocator. + * + ****************************************************************************/ static unsigned long dma_mask_to_pages(unsigned long mask) { return (mask >> PAGE_SHIFT) + (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); } +/* + * The address allocator core function. + * + * called with domain->lock held + */ static unsigned long dma_ops_alloc_addresses(struct device *dev, struct dma_ops_domain *dom, unsigned int pages) @@ -317,6 +398,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, return address; } +/* + * The address free function. + * + * called with domain->lock held + */ static void dma_ops_free_addresses(struct dma_ops_domain *dom, unsigned long address, unsigned int pages) @@ -325,6 +411,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, iommu_area_free(dom->bitmap, address, pages); } +/**************************************************************************** + * + * The next functions belong to the domain allocation. A domain is + * allocated for every IOMMU as the default domain. If device isolation + * is enabled, every device get its own domain. The most important thing + * about domains is the page table mapping the DMA address space they + * contain. + * + ****************************************************************************/ + static u16 domain_id_alloc(void) { unsigned long flags; @@ -342,6 +438,10 @@ static u16 domain_id_alloc(void) return id; } +/* + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges. + */ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, unsigned long start_page, unsigned int pages) @@ -382,6 +482,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) free_page((unsigned long)p1); } +/* + * Free a domain, only used if something went wrong in the + * allocation path and we need to free an already allocated page table + */ static void dma_ops_domain_free(struct dma_ops_domain *dom) { if (!dom) @@ -396,6 +500,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) kfree(dom); } +/* + * Allocates a new protection domain usable for the dma_ops functions. + * It also intializes the page table and the address allocator data + * structures required for the dma_ops interface + */ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, unsigned order) { @@ -436,6 +545,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_dom->bitmap[0] = 1; dma_dom->next_bit = 0; + /* Intialize the exclusion range if necessary */ if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; @@ -444,6 +554,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, dma_ops_reserve_addresses(dma_dom, startpage, pages); } + /* + * At the last step, build the page tables so we don't need to + * allocate page table pages in the dma_ops mapping/unmapping + * path. + */ num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), GFP_KERNEL); @@ -472,6 +587,10 @@ free_dma_dom: return NULL; } +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ static struct protection_domain *domain_for_device(u16 devid) { struct protection_domain *dom; @@ -484,6 +603,10 @@ static struct protection_domain *domain_for_device(u16 devid) return dom; } +/* + * If a device is not yet associated with a domain, this function does + * assigns it visible for the hardware + */ static void set_device_domain(struct amd_iommu *iommu, struct protection_domain *domain, u16 devid) @@ -508,6 +631,19 @@ static void set_device_domain(struct amd_iommu *iommu, iommu->need_sync = 1; } +/***************************************************************************** + * + * The next functions belong to the dma_ops mapping/unmapping code. + * + *****************************************************************************/ + +/* + * In the dma_ops path we only have the struct device. This function + * finds the corresponding IOMMU, the protection domain and the + * requestor id for a given device. + * If the device is not yet associated with a domain this is also done + * in this function. + */ static int get_device_resources(struct device *dev, struct amd_iommu **iommu, struct protection_domain **domain, @@ -522,6 +658,7 @@ static int get_device_resources(struct device *dev, pcidev = to_pci_dev(dev); _bdf = (pcidev->bus->number << 8) | pcidev->devfn; + /* device not translated by any IOMMU in the system? */ if (_bdf >= amd_iommu_last_bdf) { *iommu = NULL; *domain = NULL; @@ -547,6 +684,10 @@ static int get_device_resources(struct device *dev, return 1; } +/* + * This is the generic map function. It maps one 4kb page at paddr to + * the given address in the DMA address space for the domain. + */ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, struct dma_ops_domain *dom, unsigned long address, @@ -578,6 +719,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, return (dma_addr_t)address; } +/* + * The generic unmapping function for on page in the DMA address space. + */ static void dma_ops_domain_unmap(struct amd_iommu *iommu, struct dma_ops_domain *dom, unsigned long address) @@ -597,6 +741,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, *pte = 0ULL; } +/* + * This function contains common code for mapping of a physically + * contiguous memory region into DMA address space. It is uses by all + * mapping functions provided by this IOMMU driver. + * Must be called with the domain lock held. + */ static dma_addr_t __map_single(struct device *dev, struct amd_iommu *iommu, struct dma_ops_domain *dma_dom, @@ -628,6 +778,10 @@ out: return address; } +/* + * Does the reverse of the __map_single function. Must be called with + * the domain lock held too + */ static void __unmap_single(struct amd_iommu *iommu, struct dma_ops_domain *dma_dom, dma_addr_t dma_addr, @@ -652,6 +806,9 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); } +/* + * The exported map_single function for dma_ops. + */ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) { @@ -664,6 +821,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, get_device_resources(dev, &iommu, &domain, &devid); if (iommu == NULL || domain == NULL) + /* device not handled by any AMD IOMMU */ return (dma_addr_t)paddr; spin_lock_irqsave(&domain->lock, flags); @@ -683,6 +841,9 @@ out: return addr; } +/* + * The exported unmap_single function for dma_ops. + */ static void unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, int dir) { @@ -692,6 +853,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, u16 devid; if (!get_device_resources(dev, &iommu, &domain, &devid)) + /* device not handled by any AMD IOMMU */ return; spin_lock_irqsave(&domain->lock, flags); @@ -706,6 +868,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, spin_unlock_irqrestore(&domain->lock, flags); } +/* + * This is a special map_sg function which is used if we should map a + * device which is not handled by an AMD IOMMU in the system. + */ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, int nelems, int dir) { @@ -720,6 +886,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, return nelems; } +/* + * The exported map_sg function for dma_ops (handles scatter-gather + * lists). + */ static int map_sg(struct device *dev, struct scatterlist *sglist, int nelems, int dir) { @@ -775,6 +945,10 @@ unmap: goto out; } +/* + * The exported map_sg function for dma_ops (handles scatter-gather + * lists). + */ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int dir) { @@ -804,6 +978,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, spin_unlock_irqrestore(&domain->lock, flags); } +/* + * The exported alloc_coherent function for dma_ops. + */ static void *alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag) { @@ -851,6 +1028,11 @@ out: return virt_addr; } +/* + * The exported free_coherent function for dma_ops. + * FIXME: fix the generic x86 DMA layer so that it actually calls that + * function. + */ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) { @@ -879,6 +1061,8 @@ free_mem: } /* + * The function for pre-allocating protection domains. + * * If the driver core informs the DMA layer if a driver grabs a device * we don't need to preallocate the protection domains anymore. * For now we have to. @@ -921,12 +1105,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { .unmap_sg = unmap_sg, }; +/* + * The function which clues the AMD IOMMU driver into dma_ops. + */ int __init amd_iommu_init_dma_ops(void) { struct amd_iommu *iommu; int order = amd_iommu_aperture_order; int ret; + /* + * first allocate a default protection domain for every IOMMU we + * found in the system. Devices not assigned to any other + * protection domain will be assigned to the default one. + */ list_for_each_entry(iommu, &amd_iommu_list, list) { iommu->default_dom = dma_ops_domain_alloc(iommu, order); if (iommu->default_dom == NULL) @@ -936,6 +1128,10 @@ int __init amd_iommu_init_dma_ops(void) goto free_domains; } + /* + * If device isolation is enabled, pre-allocate the protection + * domains for each device. + */ if (amd_iommu_isolate) prealloc_protection_domains(); @@ -947,6 +1143,7 @@ int __init amd_iommu_init_dma_ops(void) gart_iommu_aperture = 0; #endif + /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; return 0; -- cgit v1.2.3 From 8ea80d783efd0c50577ec8d69757ae54c408eacd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:23 +0200 Subject: x86, AMD IOMMU: replace HIGH_U32 macro with upper_32_bits function Removes a driver specific macro and replaces it with a generic function already available in Linux. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 4bae96ca7c1..9098f047c1a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -109,7 +109,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) memset(&cmd, 0, sizeof(cmd)); cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; - cmd.data[1] = HIGH_U32(ready_phys); + cmd.data[1] = upper_32_bits(ready_phys); cmd.data[2] = 1; /* value written to 'ready' */ CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); @@ -157,7 +157,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); cmd.data[1] |= domid; cmd.data[2] = LOW_U32(address); - cmd.data[3] = HIGH_U32(address); + cmd.data[3] = upper_32_bits(address); if (s) /* size bit - we flush more than one 4kb page */ cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ -- cgit v1.2.3 From 208ec8c94d818a3def0b424958493728871716d1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:24 +0200 Subject: x86, AMD IOMMU: replace UPDATE_LAST_BDF macro with a function This patch replaces the UPDATE_LAST_BDF macro in the init code with the update_last_devid function. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 9ddb46d7c52..6e1c8ffc0c5 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -30,11 +30,6 @@ /* * definitions for the ACPI scanning code */ -#define UPDATE_LAST_BDF(x) do {\ - if ((x) > amd_iommu_last_bdf) \ - amd_iommu_last_bdf = (x); \ - } while (0); - #define DEVID(bus, devfn) (((bus) << 8) | (devfn)) #define PCI_BUS(x) (((x) >> 8) & 0xff) #define IVRS_HEADER_LENGTH 48 @@ -169,6 +164,12 @@ static u32 dev_table_size; /* size of the device table */ static u32 alias_table_size; /* size of the alias table */ static u32 rlookup_table_size; /* size if the rlookup table */ +static inline void update_last_devid(u16 devid) +{ + if (devid > amd_iommu_last_bdf) + amd_iommu_last_bdf = devid; +} + /**************************************************************************** * * AMD IOMMU MMIO register space handling functions @@ -287,7 +288,7 @@ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) u32 cap; cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); + update_last_devid(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); return 0; } @@ -317,7 +318,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) case IVHD_DEV_ALIAS: case IVHD_DEV_EXT_SELECT: /* all the above subfield types refer to device ids */ - UPDATE_LAST_BDF(dev->devid); + update_last_devid(dev->devid); break; default: break; -- cgit v1.2.3 From c571484e53f3e1d90bc5374528580c7419d28d4c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:25 +0200 Subject: x86, AMD IOMMU: replace TBL_SIZE macro with a function This patch converts the TBL_SIZE macro in the init code to a function. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 6e1c8ffc0c5..1f148393cf7 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -33,7 +33,6 @@ #define DEVID(bus, devfn) (((bus) << 8) | (devfn)) #define PCI_BUS(x) (((x) >> 8) & 0xff) #define IVRS_HEADER_LENGTH 48 -#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x)))) #define ACPI_IVHD_TYPE 0x10 #define ACPI_IVMD_TYPE_ALL 0x20 @@ -170,6 +169,14 @@ static inline void update_last_devid(u16 devid) amd_iommu_last_bdf = devid; } +static inline unsigned long tbl_size(int entry_size) +{ + unsigned shift = PAGE_SHIFT + + get_order(amd_iommu_last_bdf * entry_size); + + return 1UL << shift; +} + /**************************************************************************** * * AMD IOMMU MMIO register space handling functions @@ -884,9 +891,9 @@ int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) return -ENODEV; - dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); - alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); - rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); + dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); + alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); + rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); ret = -ENOMEM; -- cgit v1.2.3 From 9a836de0c9944c42d006ec241712c72e74737c73 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:26 +0200 Subject: x86, AMD IOMMU: remove unnecessary free checks from init code This patch removes unnecessary checks before memory is released. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 1f148393cf7..0f5a9115a69 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -417,9 +417,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - if (iommu->cmd_buf) - free_pages((unsigned long)iommu->cmd_buf, - get_order(CMD_BUFFER_SIZE)); + free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } /* sets a specific bit in the device table entry. */ @@ -987,24 +985,19 @@ out: return ret; free: - if (amd_iommu_pd_alloc_bitmap) - free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); - if (amd_iommu_pd_table) - free_pages((unsigned long)amd_iommu_pd_table, - get_order(rlookup_table_size)); + free_pages((unsigned long)amd_iommu_pd_table, + get_order(rlookup_table_size)); - if (amd_iommu_rlookup_table) - free_pages((unsigned long)amd_iommu_rlookup_table, - get_order(rlookup_table_size)); + free_pages((unsigned long)amd_iommu_rlookup_table, + get_order(rlookup_table_size)); - if (amd_iommu_alias_table) - free_pages((unsigned long)amd_iommu_alias_table, - get_order(alias_table_size)); + free_pages((unsigned long)amd_iommu_alias_table, + get_order(alias_table_size)); - if (amd_iommu_dev_table) - free_pages((unsigned long)amd_iommu_dev_table, - get_order(dev_table_size)); + free_pages((unsigned long)amd_iommu_dev_table, + get_order(dev_table_size)); free_iommu_all(); -- cgit v1.2.3 From 136f78a19cf94d469f31a4009c7c0ac2301fbbf0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:27 +0200 Subject: x86, AMD IOMMU: add an emergency exit to the completion wait loop To make the loop waiting for the completion wait command not wait forever this patch adds a limit of cycles that loop. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 9098f047c1a..7fa2d5d57dd 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -32,6 +32,8 @@ #define to_pages(addr, size) \ (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) +#define EXIT_LOOP_COUNT 10000000 + static DEFINE_RWLOCK(amd_iommu_devtable_lock); /* @@ -106,6 +108,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) struct command cmd; volatile u64 ready = 0; unsigned long ready_phys = virt_to_phys(&ready); + unsigned long i = 0; memset(&cmd, 0, sizeof(cmd)); cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; @@ -120,8 +123,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu) if (ret) return ret; - while (!ready) + while (!ready && (i < EXIT_LOOP_COUNT)) { + ++i; cpu_relax(); + } + + if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) + printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); return 0; } -- cgit v1.2.3 From d64495366ff78fdbd5bd3176a7ada2f0c2cbfba6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:28 +0200 Subject: x86, AMD IOMMU: rename struct command to iommu_cmd This patch gives the struct command a more descriptive and not so generic name. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7fa2d5d57dd..dec10e1a397 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -39,7 +39,7 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); /* * general struct to manage commands send to an IOMMU */ -struct command { +struct iommu_cmd { u32 data[4]; }; @@ -62,7 +62,7 @@ static int iommu_has_npcache(struct amd_iommu *iommu) * Writes the command to the IOMMUs command buffer and informs the * hardware about the new command. Must be called with iommu->lock held. */ -static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) +static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) { u32 tail, head; u8 *target; @@ -83,7 +83,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) * General queuing function for commands. Takes iommu->lock and calls * __iommu_queue_command(). */ -static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) +static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) { unsigned long flags; int ret; @@ -105,7 +105,7 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) static int iommu_completion_wait(struct amd_iommu *iommu) { int ret; - struct command cmd; + struct iommu_cmd cmd; volatile u64 ready = 0; unsigned long ready_phys = virt_to_phys(&ready); unsigned long i = 0; @@ -139,7 +139,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) */ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) { - struct command cmd; + struct iommu_cmd cmd; BUG_ON(iommu == NULL); @@ -158,7 +158,7 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, u64 address, u16 domid, int pde, int s) { - struct command cmd; + struct iommu_cmd cmd; memset(&cmd, 0, sizeof(cmd)); address &= PAGE_MASK; -- cgit v1.2.3 From d0312b2142ac7665031755c1cc3dba827d4eb711 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:29 +0200 Subject: x86, AMD IOMMU: remove unneeded initializations from command buffer allocation This patch removes an unneeded initialization from the alloc_command_buffer function and replaces a memset with __GFP_ZERO. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0f5a9115a69..0124995c7b5 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -394,17 +394,15 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) */ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) { - u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, + u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(CMD_BUFFER_SIZE)); - u64 entry = 0; + u64 entry; if (cmd_buf == NULL) return NULL; iommu->cmd_buf_size = CMD_BUFFER_SIZE; - memset(cmd_buf, 0, CMD_BUFFER_SIZE); - entry = (u64)virt_to_phys(cmd_buf); entry |= MMIO_CMD_SIZE_512; memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, -- cgit v1.2.3 From 58a3bee567b588a84cdde05fecc45439b396362c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:30 +0200 Subject: x86, AMD IOMMU: use true/false instead of 0/1 for bool value This patch replaces the integer values used for the bool variable in ACPI scanning code with true and false. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0124995c7b5..316fe2eaeef 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -509,7 +509,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, u8 *end = p, flags = 0; u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; u32 ext_flags = 0; - bool alias = 0; + bool alias = false; struct ivhd_entry *e; /* @@ -559,7 +559,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, devid_start = e->devid; flags = e->flags; ext_flags = 0; - alias = 0; + alias = false; break; case IVHD_DEV_ALIAS: devid = e->devid; @@ -572,7 +572,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, flags = e->flags; devid_to = e->ext >> 8; ext_flags = 0; - alias = 1; + alias = true; break; case IVHD_DEV_EXT_SELECT: devid = e->devid; @@ -582,7 +582,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, devid_start = e->devid; flags = e->flags; ext_flags = e->ext; - alias = 0; + alias = false; break; case IVHD_DEV_RANGE_END: devid = e->devid; -- cgit v1.2.3 From 2e22847fbe05f2543ccebd0c2df94d9cf3c52aa5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:31 +0200 Subject: x86, AMD IOMMU: do runtime list initialization at compile time This patch changes the list initialization for the iommu list and the unity map list from runtime to compile time. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 316fe2eaeef..0c247032308 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -118,12 +118,12 @@ static int __initdata amd_iommu_detected; u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ -struct list_head amd_iommu_unity_map; /* a list of required unity mappings +LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ int amd_iommu_isolate; /* if 1, device isolation is enabled */ -struct list_head amd_iommu_list; /* list of all AMD IOMMUs in the +LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ /* @@ -673,8 +673,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) struct amd_iommu *iommu; int ret; - INIT_LIST_HEAD(&amd_iommu_list); - end += table->length; p += IVRS_HEADER_LENGTH; @@ -780,8 +778,6 @@ static int __init init_memory_definitions(struct acpi_table_header *table) u8 *p = (u8 *)table, *end = (u8 *)table; struct ivmd_header *m; - INIT_LIST_HEAD(&amd_iommu_unity_map); - end += table->length; p += IVRS_HEADER_LENGTH; -- cgit v1.2.3 From 5dc8bff0f6d0dfeb1f1c6e694294ba7c33d099f1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:32 +0200 Subject: x86, AMD IOMMU: replace memset with __GFP_ZERO for table allocation This patch removes the memset from the data structure initialization code and allocate the structures with the __GFP_ZERO flag. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0c247032308..2efc3d59b7e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -890,7 +890,7 @@ int __init amd_iommu_init(void) ret = -ENOMEM; /* Device table - directly used by all IOMMUs */ - amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, + amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(dev_table_size)); if (amd_iommu_dev_table == NULL) goto out; @@ -914,27 +914,23 @@ int __init amd_iommu_init(void) * Protection Domain table - maps devices to protection domains * This table has the same size as the rlookup_table */ - amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, + amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(rlookup_table_size)); if (amd_iommu_pd_table == NULL) goto free; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, + amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, get_order(MAX_DOMAIN_ID/8)); if (amd_iommu_pd_alloc_bitmap == NULL) goto free; /* - * memory is allocated now; initialize the device table with all zeroes - * and let all alias entries point to itself + * let all alias entries point to itself */ - memset(amd_iommu_dev_table, 0, dev_table_size); for (i = 0; i < amd_iommu_last_bdf; ++i) amd_iommu_alias_table[i] = i; - memset(amd_iommu_pd_table, 0, rlookup_table_size); - memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8); - /* * never allocate domain 0 because its used as the non-allocated and * error value placeholder -- cgit v1.2.3 From 0906372e6cf372f3162481f24a0b8ccae0eff4d7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:33 +0200 Subject: x86, AMD IOMMU: replace self made size parsing with memparse call This patch replaces the self-made parsing of the amd_iommu_size option with the generic memparse function call. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 2efc3d59b7e..e0ff9404e6c 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1042,20 +1042,10 @@ static int __init parse_amd_iommu_options(char *str) static int __init parse_amd_iommu_size_options(char *str) { - for (; *str; ++str) { - if (strcmp(str, "32M") == 0) - amd_iommu_aperture_order = 25; - if (strcmp(str, "64M") == 0) - amd_iommu_aperture_order = 26; - if (strcmp(str, "128M") == 0) - amd_iommu_aperture_order = 27; - if (strcmp(str, "256M") == 0) - amd_iommu_aperture_order = 28; - if (strcmp(str, "512M") == 0) - amd_iommu_aperture_order = 29; - if (strcmp(str, "1G") == 0) - amd_iommu_aperture_order = 30; - } + unsigned order = PAGE_SHIFT + get_order(memparse(str, &str)); + + if ((order > 24) && (order < 31)) + amd_iommu_aperture_order = order; return 1; } -- cgit v1.2.3 From d591b0a3ae25f587d0c4da1e1d1a425143590790 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 11 Jul 2008 17:14:35 +0200 Subject: x86, AMD IOMMU: replace DEVID macro with a function This patch replaces the DEVID macro with a function and uses them where apropriate (also in the core code). Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu.c | 2 +- arch/x86/kernel/amd_iommu_init.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index dec10e1a397..8c3deb027d3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -664,7 +664,7 @@ static int get_device_resources(struct device *dev, BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); pcidev = to_pci_dev(dev); - _bdf = (pcidev->bus->number << 8) | pcidev->devfn; + _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); /* device not translated by any IOMMU in the system? */ if (_bdf >= amd_iommu_last_bdf) { diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index e0ff9404e6c..9bf1b8111b0 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -30,7 +30,6 @@ /* * definitions for the ACPI scanning code */ -#define DEVID(bus, devfn) (((bus) << 8) | (devfn)) #define PCI_BUS(x) (((x) >> 8) & 0xff) #define IVRS_HEADER_LENGTH 48 @@ -295,7 +294,7 @@ static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) u32 cap; cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - update_last_devid(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); + update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); return 0; } @@ -494,8 +493,10 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); - iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); - iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); + iommu->first_device = calc_devid(MMIO_GET_BUS(range), + MMIO_GET_FD(range)); + iommu->last_device = calc_devid(MMIO_GET_BUS(range), + MMIO_GET_LD(range)); } /* -- cgit v1.2.3 From 2510495e208e7a69b64fcf5cdf8966d873536d9e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 11 Jul 2008 12:13:59 +0200 Subject: x86/pci: Removing pci-y in Makefile Cc: Sam Ravnborg Signed-off-by: Robert Richter Cc: Robert Richter Cc: Sam Ravnborg Signed-off-by: Ingo Molnar --- arch/x86/pci/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index e515e8db842..28451f41e0e 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -5,13 +5,13 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o obj-$(CONFIG_PCI_OLPC) += olpc.o -pci-y := fixup.o -pci-$(CONFIG_ACPI) += acpi.o -pci-y += legacy.o irq.o +obj-y += fixup.o +obj-$(CONFIG_ACPI) += acpi.o +obj-y += legacy.o irq.o -pci-$(CONFIG_X86_VISWS) += visws.o +obj-$(CONFIG_X86_VISWS) += visws.o -pci-$(CONFIG_X86_NUMAQ) += numa.o +obj-$(CONFIG_X86_NUMAQ) += numa.o -obj-y += $(pci-y) common.o early.o +obj-y += common.o early.o obj-y += amd_bus.o -- cgit v1.2.3 From 060b9708a0c04cf9af69c128ef7954b6f0a84180 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 11 Jul 2008 12:14:27 +0200 Subject: x86/pci: Changing subsystem initialization order for NUMA Cc: Yinghai Lu Signed-off-by: Robert Richter Cc: Robert Richter Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/pci/legacy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 132876cc6fc..60e8caa1356 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -57,14 +57,14 @@ static int __init pci_legacy_init(void) int __init pci_subsys_init(void) { +#ifdef CONFIG_X86_NUMAQ + pci_numa_init(); +#endif #ifdef CONFIG_ACPI pci_acpi_init(); #endif pci_legacy_init(); pcibios_irq_init(); -#ifdef CONFIG_X86_NUMAQ - pci_numa_init(); -#endif pcibios_init(); return 0; -- cgit v1.2.3 From 9314d301390ad0d96986da3d893a21e81a287982 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 11 Jul 2008 12:18:40 +0200 Subject: x86/pci: renamed: numa.c -> numaq_32.c Cc: Yinghai Lu Signed-off-by: Robert Richter Cc: Robert Richter Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/pci/Makefile | 2 +- arch/x86/pci/numa.c | 178 ------------------------------------------------ arch/x86/pci/numaq_32.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+), 179 deletions(-) delete mode 100644 arch/x86/pci/numa.c create mode 100644 arch/x86/pci/numaq_32.c (limited to 'arch') diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 28451f41e0e..d49202e740e 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -11,7 +11,7 @@ obj-y += legacy.o irq.o obj-$(CONFIG_X86_VISWS) += visws.o -obj-$(CONFIG_X86_NUMAQ) += numa.o +obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c deleted file mode 100644 index 8b5ca196673..00000000000 --- a/arch/x86/pci/numa.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * numa.c - Low-level PCI access for NUMA-Q machines - */ - -#include -#include -#include -#include -#include -#include "pci.h" - -#define XQUAD_PORTIO_BASE 0xfe400000 -#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ - -#define BUS2QUAD(global) (mp_bus_id_to_node[global]) - -#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) - -#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) - -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; -EXPORT_SYMBOL(xquad_portio); - -#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) - -#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) - -static void write_cf8(unsigned bus, unsigned devfn, unsigned reg) -{ - unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg); - if (xquad_portio) - writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus))); - else - outl(val, 0xCF8); -} - -static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) -{ - unsigned long flags; - void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); - - if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - write_cf8(bus, devfn, reg); - - switch (len) { - case 1: - if (xquad_portio) - *value = readb(adr + (reg & 3)); - else - *value = inb(0xCFC + (reg & 3)); - break; - case 2: - if (xquad_portio) - *value = readw(adr + (reg & 2)); - else - *value = inw(0xCFC + (reg & 2)); - break; - case 4: - if (xquad_portio) - *value = readl(adr); - else - *value = inl(0xCFC); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) -{ - unsigned long flags; - void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); - - if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - write_cf8(bus, devfn, reg); - - switch (len) { - case 1: - if (xquad_portio) - writeb(value, adr + (reg & 3)); - else - outb((u8)value, 0xCFC + (reg & 3)); - break; - case 2: - if (xquad_portio) - writew(value, adr + (reg & 2)); - else - outw((u16)value, 0xCFC + (reg & 2)); - break; - case 4: - if (xquad_portio) - writel(value, adr + reg); - else - outl((u32)value, 0xCFC); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF1_MQ_ADDRESS - -static struct pci_raw_ops pci_direct_conf1_mq = { - .read = pci_conf1_mq_read, - .write = pci_conf1_mq_write -}; - - -static void __devinit pci_fixup_i450nx(struct pci_dev *d) -{ - /* - * i450NX -- Find and scan all secondary buses on all PXB's. - */ - int pxb, reg; - u8 busno, suba, subb; - int quad = BUS2QUAD(d->bus->number); - - printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); - reg = 0xd0; - for(pxb=0; pxb<2; pxb++) { - pci_read_config_byte(d, reg++, &busno); - pci_read_config_byte(d, reg++, &suba); - pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); - if (busno) { - /* Bus A */ - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); - } - if (suba < subb) { - /* Bus B */ - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1)); - } - } - pcibios_last_bus = -1; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); - -int __init pci_numa_init(void) -{ - int quad; - - if (!found_numaq) - return 0; - - raw_pci_ops = &pci_direct_conf1_mq; - - if (pcibios_scanned++) - return 0; - - pci_root_bus = pcibios_scan_root(0); - if (pci_root_bus) - pci_bus_add_devices(pci_root_bus); - if (num_online_nodes() > 1) - for_each_online_node(quad) { - if (quad == 0) - continue; - printk("Scanning PCI bus %d for quad %d\n", - QUADLOCAL2BUS(quad,0), quad); - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0)); - } - return 0; -} diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c new file mode 100644 index 00000000000..8b5ca196673 --- /dev/null +++ b/arch/x86/pci/numaq_32.c @@ -0,0 +1,178 @@ +/* + * numa.c - Low-level PCI access for NUMA-Q machines + */ + +#include +#include +#include +#include +#include +#include "pci.h" + +#define XQUAD_PORTIO_BASE 0xfe400000 +#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ + +#define BUS2QUAD(global) (mp_bus_id_to_node[global]) + +#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) + +#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) + +/* Where the IO area was mapped on multiquad, always 0 otherwise */ +void *xquad_portio; +EXPORT_SYMBOL(xquad_portio); + +#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) + +#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ + (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) + +static void write_cf8(unsigned bus, unsigned devfn, unsigned reg) +{ + unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg); + if (xquad_portio) + writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus))); + else + outl(val, 0xCF8); +} + +static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); + + if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + write_cf8(bus, devfn, reg); + + switch (len) { + case 1: + if (xquad_portio) + *value = readb(adr + (reg & 3)); + else + *value = inb(0xCFC + (reg & 3)); + break; + case 2: + if (xquad_portio) + *value = readw(adr + (reg & 2)); + else + *value = inw(0xCFC + (reg & 2)); + break; + case 4: + if (xquad_portio) + *value = readl(adr); + else + *value = inl(0xCFC); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); + + if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + write_cf8(bus, devfn, reg); + + switch (len) { + case 1: + if (xquad_portio) + writeb(value, adr + (reg & 3)); + else + outb((u8)value, 0xCFC + (reg & 3)); + break; + case 2: + if (xquad_portio) + writew(value, adr + (reg & 2)); + else + outw((u16)value, 0xCFC + (reg & 2)); + break; + case 4: + if (xquad_portio) + writel(value, adr + reg); + else + outl((u32)value, 0xCFC); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF1_MQ_ADDRESS + +static struct pci_raw_ops pci_direct_conf1_mq = { + .read = pci_conf1_mq_read, + .write = pci_conf1_mq_write +}; + + +static void __devinit pci_fixup_i450nx(struct pci_dev *d) +{ + /* + * i450NX -- Find and scan all secondary buses on all PXB's. + */ + int pxb, reg; + u8 busno, suba, subb; + int quad = BUS2QUAD(d->bus->number); + + printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); + reg = 0xd0; + for(pxb=0; pxb<2; pxb++) { + pci_read_config_byte(d, reg++, &busno); + pci_read_config_byte(d, reg++, &suba); + pci_read_config_byte(d, reg++, &subb); + DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + if (busno) { + /* Bus A */ + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); + } + if (suba < subb) { + /* Bus B */ + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1)); + } + } + pcibios_last_bus = -1; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); + +int __init pci_numa_init(void) +{ + int quad; + + if (!found_numaq) + return 0; + + raw_pci_ops = &pci_direct_conf1_mq; + + if (pcibios_scanned++) + return 0; + + pci_root_bus = pcibios_scan_root(0); + if (pci_root_bus) + pci_bus_add_devices(pci_root_bus); + if (num_online_nodes() > 1) + for_each_online_node(quad) { + if (quad == 0) + continue; + printk("Scanning PCI bus %d for quad %d\n", + QUADLOCAL2BUS(quad,0), quad); + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0)); + } + return 0; +} -- cgit v1.2.3 From e27cf3a2e151b79375efadf71a5d383ad416fb44 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 11 Jul 2008 12:18:41 +0200 Subject: x86/pci: renaming numa into numaq Cc: Yinghai Lu Signed-off-by: Robert Richter Cc: Robert Richter Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/pci/legacy.c | 2 +- arch/x86/pci/numaq_32.c | 4 ++-- arch/x86/pci/pci.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 60e8caa1356..f405eb0b891 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -58,7 +58,7 @@ static int __init pci_legacy_init(void) int __init pci_subsys_init(void) { #ifdef CONFIG_X86_NUMAQ - pci_numa_init(); + pci_numaq_init(); #endif #ifdef CONFIG_ACPI pci_acpi_init(); diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8b5ca196673..f4b16dc11da 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -1,5 +1,5 @@ /* - * numa.c - Low-level PCI access for NUMA-Q machines + * numaq_32.c - Low-level PCI access for NUMA-Q machines */ #include @@ -151,7 +151,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); -int __init pci_numa_init(void) +int __init pci_numaq_init(void) { int quad; diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index b2270a55b0c..36b8dd019fa 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -107,7 +107,7 @@ extern void __init dmi_check_skip_isa_align(void); /* some common used subsys_initcalls */ extern int __init pci_acpi_init(void); extern int __init pcibios_irq_init(void); -extern int __init pci_numa_init(void); +extern int __init pci_numaq_init(void); extern int __init pcibios_init(void); /* pci-mmconfig.c */ -- cgit v1.2.3 From 3cabf37f6167125cb5185db05f5061650f685ab7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 11 Jul 2008 12:26:59 +0200 Subject: x86/pci: Changing subsystem init for visws I don't know, if this new code boots, but at least it compiles. Someone should really test it. Signed-off-by: Robert Richter Cc: Robert Richter Signed-off-by: Ingo Molnar --- arch/x86/pci/legacy.c | 3 +++ arch/x86/pci/pci.h | 1 + arch/x86/pci/visws.c | 23 +++++++---------------- 3 files changed, 11 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index f405eb0b891..ec9ce35e44d 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -62,6 +62,9 @@ int __init pci_subsys_init(void) #endif #ifdef CONFIG_ACPI pci_acpi_init(); +#endif +#ifdef CONFIG_X86_VISWS + pci_visws_init(); #endif pci_legacy_init(); pcibios_irq_init(); diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 36b8dd019fa..a2c55ee98af 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -107,6 +107,7 @@ extern void __init dmi_check_skip_isa_align(void); /* some common used subsys_initcalls */ extern int __init pci_acpi_init(void); extern int __init pcibios_irq_init(void); +extern int __init pci_visws_init(void); extern int __init pci_numaq_init(void); extern int __init pcibios_init(void); diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 1a7bed492bb..42f4cb19fac 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -86,8 +86,14 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } -static int __init pci_visws_init(void) +int __init pci_visws_init(void) { + if (!is_visws_box()) + return -1; + + pcibios_enable_irq = &pci_visws_enable_irq; + pcibios_disable_irq = &pci_visws_disable_irq; + /* The VISWS supports configuration access type 1 only */ pci_probe = (pci_probe | PCI_PROBE_CONF1) & ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2); @@ -105,18 +111,3 @@ static int __init pci_visws_init(void) pcibios_resource_survey(); return 0; } - -static __init int pci_subsys_init(void) -{ - if (!is_visws_box()) - return -1; - - pcibios_enable_irq = &pci_visws_enable_irq; - pcibios_disable_irq = &pci_visws_disable_irq; - - pci_visws_init(); - pcibios_init(); - - return 0; -} -subsys_initcall(pci_subsys_init); -- cgit v1.2.3 From d54191b85e294c46f05a2249b1f55ae54930bcc7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Apr 2008 15:09:30 +0200 Subject: Kprobe smoke test lockdep warning On Mon, 2008-04-21 at 18:54 -0400, Masami Hiramatsu wrote: > Thank you for reporting. > > Actually, kprobes tries to fixup thread's flags in post_kprobe_handler > (which is called from kprobe_exceptions_notify) by > trace_hardirqs_fixup_flags(pt_regs->flags). However, even the irq flag > is set in pt_regs->flags, true hardirq is still off until returning > from do_debug. Thus, lockdep assumes that hardirq is off without annotation. > > IMHO, one possible solution is that fixing hardirq flags right after > notify_die in do_debug instead of in post_kprobe_handler. My reply to BZ 10489: > [ 2.707509] Kprobe smoke test started > [ 2.709300] ------------[ cut here ]------------ > [ 2.709420] WARNING: at kernel/lockdep.c:2658 check_flags+0x4d/0x12c() > [ 2.709541] Modules linked in: > [ 2.709588] Pid: 1, comm: swapper Not tainted 2.6.25.jml.057 #1 > [ 2.709588] [] warn_on_slowpath+0x41/0x51 > [ 2.709588] [] ? save_stack_trace+0x1d/0x3b > [ 2.709588] [] ? save_trace+0x37/0x89 > [ 2.709588] [] ? kernel_map_pages+0x103/0x11c > [ 2.709588] [] ? native_sched_clock+0xca/0xea > [ 2.709588] [] ? mark_held_locks+0x41/0x5c > [ 2.709588] [] ? kprobe_exceptions_notify+0x322/0x3af > [ 2.709588] [] ? trace_hardirqs_on+0xf1/0x119 > [ 2.709588] [] ? kprobe_exceptions_notify+0x355/0x3af > [ 2.709588] [] check_flags+0x4d/0x12c > [ 2.709588] [] lock_release+0x58/0x195 > [ 2.709588] [] ? __atomic_notifier_call_chain+0x0/0x80 > [ 2.709588] [] __atomic_notifier_call_chain+0x5a/0x80 > [ 2.709588] [] atomic_notifier_call_chain+0xc/0xe > [ 2.709588] [] notify_die+0x2d/0x2f > [ 2.709588] [] do_debug+0x67/0xfe > [ 2.709588] [] debug_stack_correct+0x27/0x30 > [ 2.709588] [] ? kprobe_target+0x1/0x34 > [ 2.709588] [] ? init_test_probes+0x50/0x186 > [ 2.709588] [] init_kprobes+0x85/0x8c > [ 2.709588] [] kernel_init+0x13d/0x298 > [ 2.709588] [] ? kernel_init+0x0/0x298 > [ 2.709588] [] ? kernel_init+0x0/0x298 > [ 2.709588] [] kernel_thread_helper+0x7/0x10 > [ 2.709588] ======================= > [ 2.709588] ---[ end trace 778e504de7e3b1e3 ]--- > [ 2.709588] possible reason: unannotated irqs-off. > [ 2.709588] irq event stamp: 370065 > [ 2.709588] hardirqs last enabled at (370065): [] kprobe_exceptions_notify+0x322/0x3af > [ 2.709588] hardirqs last disabled at (370064): [] do_int3+0x1d/0x7d > [ 2.709588] softirqs last enabled at (370050): [] __do_softirq+0xfa/0x100 > [ 2.709588] softirqs last disabled at (370045): [] do_softirq+0x74/0xd9 > [ 2.714751] Kprobe smoke test passed successfully how I love this stuff... Ok, do_debug() is a trap, this can happen at any time regardless of the machine's IRQ state. So the first thing we do is fix up the IRQ state. Then we call this die notifier stuff; and return with messed up IRQ state... YAY. So, kprobes fudges it.. notify_die(DIE_DEBUG) kprobe_exceptions_notify() post_kprobe_handler() modify regs->flags trace_hardirqs_fixup_flags(regs->flags); <--- must be it So what's the use of modifying flags if they're not meant to take effect at some point. /me tries to reproduce issue; enable kprobes test thingy && boot OK, that reproduces.. So the below makes it work - but I'm not getting this code; at the time I wrote that stuff I CC'ed each and every kprobe maintainer listed in the usual places but got no reposonse - can some please explain this stuff to me? Are the saved flags only for the TF bit or are they made in full effect later (and if so, where) ? Signed-off-by: Peter Zijlstra Acked-by: Masami Hiramatsu Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b8c6743a13d..43c019f85f0 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->flags |= kcb->kprobe_saved_flags; - trace_hardirqs_fixup_flags(regs->flags); if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { kcb->kprobe_status = KPROBE_HIT_SSDONE; -- cgit v1.2.3 From a312b37b2a212fd2e227d1d6321f903b91b65ec7 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 8 Jul 2008 15:06:23 -0700 Subject: x86/paravirt: call paravirt_pagetable_setup_{start, done} Call paravirt_pagetable_setup_{start,done} These paravirt_ops functions were not being called on x86_64. Signed-off-by: Eduardo Habkost Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 4 ++++ arch/x86/kernel/setup.c | 2 ++ arch/x86/xen/enlighten.c | 4 ++++ 3 files changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d58c1..2963ab5d91e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -373,6 +374,9 @@ struct pv_mmu_ops pv_mmu_ops = { #ifndef CONFIG_X86_64 .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, +#else + .pagetable_setup_start = paravirt_nop, + .pagetable_setup_done = paravirt_nop, #endif .read_cr2 = native_read_cr2, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 36c540d4ac4..8ce6a91ce10 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -819,7 +819,9 @@ void __init setup_arch(char **cmdline_p) vmi_init(); #endif + paravirt_pagetable_setup_start(swapper_pg_dir); paging_init(); + paravirt_pagetable_setup_done(swapper_pg_dir); #ifdef CONFIG_X86_64 map_vsyscall(); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bb508456ef5..eaab6c9b4a8 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -841,6 +841,7 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) static __init void xen_pagetable_setup_start(pgd_t *base) { +#ifdef CONFIG_X86_32 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; int i; @@ -886,6 +887,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) /* Unpin initial Xen pagetable */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(xen_start_info->pt_base))); +#endif /* CONFIG_X86_32 */ } void xen_setup_shared_info(void) @@ -927,9 +929,11 @@ static __init void xen_pagetable_setup_done(pgd_t *base) xen_setup_shared_info(); +#ifdef CONFIG_X86_32 /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); +#endif } static __init void xen_post_allocator_init(void) -- cgit v1.2.3 From c1f2f09ef66d5dadd5fe42ea909e708470c9636d Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 8 Jul 2008 15:06:24 -0700 Subject: pvops-64: call paravirt_post_allocator_init() on setup_arch() Signed-off-by: Eduardo Habkost Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 1 + arch/x86/mm/init_32.c | 2 -- arch/x86/xen/mmu.c | 8 +++++--- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8ce6a91ce10..2ed504b97d4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -822,6 +822,7 @@ void __init setup_arch(char **cmdline_p) paravirt_pagetable_setup_start(swapper_pg_dir); paging_init(); paravirt_pagetable_setup_done(swapper_pg_dir); + paravirt_post_allocator_init(); #ifdef CONFIG_X86_64 map_vsyscall(); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9689a5138e6..7113acd8ac4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -868,8 +868,6 @@ void __init paging_init(void) */ sparse_init(); zone_sizes_init(); - - paravirt_post_allocator_init(); } /* diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ff0aa74afaa..ebd6900e331 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -656,9 +656,11 @@ void xen_mm_pin_all(void) spin_unlock_irqrestore(&pgd_lock, flags); } -/* The init_mm pagetable is really pinned as soon as its created, but - that's before we have page structures to store the bits. So do all - the book-keeping now. */ +/* + * The init_mm pagetable is really pinned as soon as its created, but + * that's before we have page structures to store the bits. So do all + * the book-keeping now. + */ static __init int mark_pinned(struct page *page, enum pt_level level) { SetPagePinned(page); -- cgit v1.2.3 From 8840c0ccd763936a8e730ece118197a51be8dc8e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:25 -0700 Subject: x86_64: there's no need to preallocate level1_fixmap_pgt Early fixmap will allocate its own L1 pagetable page for fixmap mappings, so there's no need to preallocate one. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b07ac7b217c..4b6bda21837 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -362,12 +362,6 @@ NEXT_PAGE(level3_kernel_pgt) .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 - -NEXT_PAGE(level1_fixmap_pgt) .fill 512,8,0 NEXT_PAGE(level2_ident_pgt) -- cgit v1.2.3 From 87b935a0ef9a1ddf62f2f0c0fc17b10654ff41cd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:26 -0700 Subject: x86: clean up formatting of __switch_to process_64.c:__switch_to has some very old strange formatting, some of it dating back to pre-git. Fix it up. No functional changes. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 56 ++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a8e53626ac9..e8a8e1b9981 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -537,8 +537,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { - struct thread_struct *prev = &prev_p->thread, - *next = &next_p->thread; + struct thread_struct *prev = &prev_p->thread; + struct thread_struct *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); unsigned fsindex, gsindex; @@ -586,35 +586,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch FS and GS. + * + * Segment register != 0 always requires a reload. Also + * reload when it has changed. When prev process used 64bit + * base always reload to avoid an information leak. */ - { - /* segment register != 0 always requires a reload. - also reload when it has changed. - when prev process used 64bit base always reload - to avoid an information leak. */ - if (unlikely(fsindex | next->fsindex | prev->fs)) { - loadsegment(fs, next->fsindex); - /* check if the user used a selector != 0 - * if yes clear 64bit base, since overloaded base - * is always mapped to the Null selector - */ - if (fsindex) + if (unlikely(fsindex | next->fsindex | prev->fs)) { + loadsegment(fs, next->fsindex); + /* + * Check if the user used a selector != 0; if yes + * clear 64bit base, since overloaded base is always + * mapped to the Null selector + */ + if (fsindex) prev->fs = 0; - } - /* when next process has a 64bit base use it */ - if (next->fs) - wrmsrl(MSR_FS_BASE, next->fs); - prev->fsindex = fsindex; - - if (unlikely(gsindex | next->gsindex | prev->gs)) { - load_gs_index(next->gsindex); - if (gsindex) + } + /* when next process has a 64bit base use it */ + if (next->fs) + wrmsrl(MSR_FS_BASE, next->fs); + prev->fsindex = fsindex; + + if (unlikely(gsindex | next->gsindex | prev->gs)) { + load_gs_index(next->gsindex); + if (gsindex) prev->gs = 0; - } - if (next->gs) - wrmsrl(MSR_KERNEL_GS_BASE, next->gs); - prev->gsindex = gsindex; } + if (next->gs) + wrmsrl(MSR_KERNEL_GS_BASE, next->gs); + prev->gsindex = gsindex; /* Must be after DS reload */ unlazy_fpu(prev_p); @@ -627,7 +626,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(pcurrent, next_p); write_pda(kernelstack, - (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE - PDA_STACKOFFSET); #ifdef CONFIG_CC_STACKPROTECTOR write_pda(stack_canary, next_p->stack_canary); /* -- cgit v1.2.3 From cbcd79c2e5b496b84845618cef734b4c40736576 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:27 -0700 Subject: x86: use __page_aligned_data/bss Update arch/x86's use of page-aligned variables. The change to arch/x86/xen/mmu.c fixes an actual bug, but the rest are cleanups and to set a precedent. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 4 ++-- arch/x86/kernel/irq_32.c | 7 ++----- arch/x86/xen/mmu.c | 15 ++++++--------- 3 files changed, 10 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7b8cc72feb4..15419cd3c5a 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -517,8 +518,7 @@ void pda_init(int cpu) } char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] -__attribute__((section(".bss.page_aligned"))); + DEBUG_STKSZ] __page_aligned_bss; extern asmlinkage void ignore_sysret(void); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 47a6f6f1247..1cf8c1fcc08 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -83,11 +83,8 @@ union irq_ctx { static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; -static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - -static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); +static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; +static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; static void call_on_stack(void *func, void *stack) { diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ebd6900e331..4fca9d88bef 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -60,22 +61,18 @@ #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) /* Placeholder for holes in the address space */ -static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] - __attribute__((section(".data.page_aligned"))) = +static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; /* Array of pointers to pages containing p2m entries */ -static unsigned long *p2m_top[TOP_ENTRIES] - __attribute__((section(".data.page_aligned"))) = +static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data = { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; /* Arrays of p2m arrays expressed in mfns used for save/restore */ -static unsigned long p2m_top_mfn[TOP_ENTRIES] - __attribute__((section(".bss.page_aligned"))); +static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss; -static unsigned long p2m_top_mfn_list[ - PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] - __attribute__((section(".bss.page_aligned"))); +static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] + __page_aligned_bss; static inline unsigned p2m_top_index(unsigned long pfn) { -- cgit v1.2.3 From 360c044eb1b985a9ef29d952276a3e14973bed93 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:28 -0700 Subject: x86_64: adjust exception frame in ia32entry The 32-bit compat int $0x80 entrypoint needs exception frame adjustment. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32entry.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 20371d0635e..0ae1e77eae5 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -321,6 +321,7 @@ ENTRY(ia32_syscall) /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ /*CFI_REL_OFFSET cs,CS-RIP*/ CFI_REL_OFFSET rip,RIP-RIP + PARAVIRT_ADJUST_EXCEPTION_FRAME SWAPGS /* * No need to follow this irqs on/off section: the syscall -- cgit v1.2.3 From 7c33b1e6ee26d67551109aca04d46544d0ce55b1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:29 -0700 Subject: x86_64: unstatic get_local_pda This allows Xen's xen_cpu_up() to allocate a pda for the new CPU. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 687376ab07e..1deb3b624a7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -768,7 +768,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work) * * Must be called after the _cpu_pda pointer table is initialized. */ -static int __cpuinit get_local_pda(int cpu) +int __cpuinit get_local_pda(int cpu) { struct x8664_pda *oldpda, *newpda; unsigned long size = sizeof(struct x8664_pda); -- cgit v1.2.3 From 8ba6c2b0958c332d2f3336f4ca9c116ed81f38e9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:30 -0700 Subject: xen: print backtrace on multicall failure Print a backtrace if a multicall fails, to help with debugging. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/multicalls.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 3c63c4da7ed..9efd1c6c977 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -76,6 +76,7 @@ void xen_mc_flush(void) if (ret) { printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", ret, smp_processor_id()); + dump_stack(); for (i = 0; i < b->mcidx; i++) { printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", i+1, b->mcidx, -- cgit v1.2.3 From ad55db9fed6d6cd09333045945cb03ba2c070085 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 8 Jul 2008 15:06:32 -0700 Subject: xen: add xen_arch_resume()/xen_timer_resume hook for ia64 support add xen_timer_resume() hook. Timer resume should be done after event channel is resumed. add xen_arch_resume() hook when ipi becomes usable after resume. After resume, some cpu specific resource must be reinitialized on ia64 that can't be set by another cpu. However available hooks is run once on only one cpu so that ipi has to be used. During stop_machine_run() ipi can't be used because interrupt is masked. So add another hook after stop_machine_run(). Another approach might be use resume hook which is run by device_resume(). However device_resume() may be executed on suspend error recovery path. So it is necessary to determine whether it is executed on real resume path or error recovery path. Signed-off-by: Isaku Yamahata Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/suspend.c | 5 ++++- arch/x86/xen/xen-ops.h | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 251669a932d..2a234db5949 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -38,8 +38,11 @@ void xen_post_suspend(int suspend_cancelled) xen_cpu_initialized_map = cpu_online_map; #endif xen_vcpu_restore(); - xen_timer_resume(); } } +void xen_arch_resume(void) +{ + /* nothing */ +} diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 6f4b1045c1c..77354d20425 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -37,7 +37,6 @@ void __init xen_time_init(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); -void xen_timer_resume(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); -- cgit v1.2.3 From 851fa3c4e7b50d6a946d8b4c0a68683b5e56b2f1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:33 -0700 Subject: xen: define set_pte from the outset We need set_pte to work from a relatively early point, so enable it from the start. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index eaab6c9b4a8..c5f0b40aa39 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -845,9 +845,6 @@ static __init void xen_pagetable_setup_start(pgd_t *base) pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; int i; - /* special set_pte for pagetable initialization */ - pv_mmu_ops.set_pte = xen_set_pte_init; - init_mm.pgd = base; /* * copy top-level of Xen-supplied pagetable into place. This @@ -1174,7 +1171,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .kmap_atomic_pte = xen_kmap_atomic_pte, #endif - .set_pte = NULL, /* see xen_pagetable_setup_* */ + .set_pte = xen_set_pte_init, .set_pte_at = xen_set_pte_at, .set_pmd = xen_set_pmd_hyper, -- cgit v1.2.3 From 48b5db20621388582ca11ac3c61d3403966dbe51 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:34 -0700 Subject: xen64: define asm/xen/interface for 64-bit Copy 64-bit definitions of various interface structures into place. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 297bf9f5b8b..7856e37f604 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -10,18 +10,6 @@ enum pt_level { PT_PTE }; -/* - * Page-directory addresses above 4GB do not fit into architectural %cr3. - * When accessing %cr3, or equivalent field in vcpu_guest_context, guests - * must use the following accessor macros to pack/unpack valid MFNs. - * - * Note that Xen is using the fact that the pagetable base is always - * page-aligned, and putting the 12 MSB of the address into the 12 LSB - * of cr3. - */ -#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) -#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) - void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -- cgit v1.2.3 From 7077c33d81a8d790135ae87cd19e6efcb075c23a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:35 -0700 Subject: xen: make ELF notes work for 32 and 64 bit Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/xen-head.S | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7c0cf6320a0..a9cac9dc04b 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -5,7 +5,10 @@ #include #include + #include +#include + #include #include @@ -21,21 +24,21 @@ ENTRY(startup_xen) .pushsection .text .align PAGE_SIZE_asm ENTRY(hypercall_page) - .skip 0x1000 + .skip PAGE_SIZE_asm .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET) - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET) + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) - ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) #endif /*CONFIG_XEN */ -- cgit v1.2.3 From f6e587325b3bc7e5c829a407ddc25b52c1e73851 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:38 -0700 Subject: xen64: add extra pv_mmu_ops We need extra pv_mmu_ops for 64-bit, to deal with the extra level of pagetable. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 33 ++++++++++++++++++++++++++++++- arch/x86/xen/mmu.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++- arch/x86/xen/mmu.h | 15 ++++++++++++-- 3 files changed, 95 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c5f0b40aa39..afb047e30bd 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -803,6 +803,18 @@ static void xen_release_pmd(u32 pfn) xen_release_ptpage(pfn, PT_PMD); } +#if PAGETABLE_LEVELS == 4 +static void xen_alloc_pud(struct mm_struct *mm, u32 pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PUD); +} + +static void xen_release_pud(u32 pfn) +{ + xen_release_ptpage(pfn, PT_PUD); +} +#endif + #ifdef CONFIG_HIGHPTE static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) { @@ -922,6 +934,11 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.alloc_pmd = xen_alloc_pmd; pv_mmu_ops.release_pte = xen_release_pte; pv_mmu_ops.release_pmd = xen_release_pmd; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.alloc_pud = xen_alloc_pud; + pv_mmu_ops.release_pud = xen_release_pud; +#endif + pv_mmu_ops.set_pte = xen_set_pte; xen_setup_shared_info(); @@ -937,6 +954,9 @@ static __init void xen_post_allocator_init(void) { pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = xen_set_pgd; +#endif xen_mark_init_mm_pinned(); } @@ -1185,15 +1205,26 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pte = xen_make_pte, .make_pgd = xen_make_pgd, +#ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, - .set_pud = xen_set_pud_hyper, .pte_clear = xen_pte_clear, .pmd_clear = xen_pmd_clear, +#endif /* CONFIG_X86_PAE */ + .set_pud = xen_set_pud_hyper, .make_pmd = xen_make_pmd, .pmd_val = xen_pmd_val, +#if PAGETABLE_LEVELS == 4 + .pud_val = xen_pud_val, + .make_pud = xen_make_pud, + .set_pgd = xen_set_pgd_hyper, + + .alloc_pud = xen_alloc_pte_init, + .release_pud = xen_release_pte_init, +#endif /* PAGETABLE_LEVELS == 4 */ + .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, .exit_mmap = xen_exit_mmap, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4fca9d88bef..d0976b87cd2 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -438,14 +438,19 @@ void xen_set_pud(pud_t *ptr, pud_t val) void xen_set_pte(pte_t *ptep, pte_t pte) { +#ifdef CONFIG_X86_PAE ptep->pte_high = pte.pte_high; smp_wmb(); ptep->pte_low = pte.pte_low; +#else + *ptep = pte; +#endif } +#ifdef CONFIG_X86_PAE void xen_set_pte_atomic(pte_t *ptep, pte_t pte) { - set_64bit((u64 *)ptep, pte_val_ma(pte)); + set_64bit((u64 *)ptep, native_pte_val(pte)); } void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -459,6 +464,7 @@ void xen_pmd_clear(pmd_t *pmdp) { set_pmd(pmdp, __pmd(0)); } +#endif /* CONFIG_X86_PAE */ pmd_t xen_make_pmd(pmdval_t pmd) { @@ -466,6 +472,49 @@ pmd_t xen_make_pmd(pmdval_t pmd) return native_make_pmd(pmd); } +#if PAGETABLE_LEVELS == 4 +pudval_t xen_pud_val(pud_t pud) +{ + return pte_mfn_to_pfn(pud.pud); +} + +pud_t xen_make_pud(pudval_t pud) +{ + pud = pte_pfn_to_mfn(pud); + + return native_make_pud(pud); +} + +void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) +{ + struct mmu_update u; + + preempt_disable(); + + xen_mc_batch(); + + u.ptr = virt_to_machine(ptr).maddr; + u.val = pgd_val_ma(val); + extend_mmu_update(&u); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +void xen_set_pgd(pgd_t *ptr, pgd_t val) +{ + /* If page is not pinned, we can just update the entry + directly */ + if (!page_pinned(ptr)) { + *ptr = val; + return; + } + + xen_set_pgd_hyper(ptr, val); +} +#endif /* PAGETABLE_LEVELS == 4 */ + /* (Yet another) pagetable walker. This one is intended for pinning a pagetable. This means that it walks a pagetable and calls the diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 7856e37f604..19d544b0b6c 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -32,13 +32,24 @@ pgd_t xen_make_pgd(pgdval_t); void xen_set_pte(pte_t *ptep, pte_t pteval); void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); + +#ifdef CONFIG_X86_PAE void xen_set_pte_atomic(pte_t *ptep, pte_t pte); +void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void xen_pmd_clear(pmd_t *pmdp); +#endif /* CONFIG_X86_PAE */ + void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); void xen_set_pud(pud_t *ptr, pud_t val); void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); void xen_set_pud_hyper(pud_t *ptr, pud_t val); -void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -void xen_pmd_clear(pmd_t *pmdp); + +#if PAGETABLE_LEVELS == 4 +pudval_t xen_pud_val(pud_t pud); +pud_t xen_make_pud(pudval_t pudval); +void xen_set_pgd(pgd_t *pgdp, pgd_t pgd); +void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); +#endif pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, -- cgit v1.2.3 From f5d36de069f4b343f64e858e7377cfc9c772c4fb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:39 -0700 Subject: xen64: random ifdefs to mask out 32-bit only code Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index afb047e30bd..ada2e1a141d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1296,6 +1296,7 @@ static const struct machine_ops __initdata xen_machine_ops = { static void __init xen_reserve_top(void) { +#ifdef CONFIG_X86_32 unsigned long top = HYPERVISOR_VIRT_START; struct xen_platform_parameters pp; @@ -1303,6 +1304,7 @@ static void __init xen_reserve_top(void) top = pp.virt_start; reserve_top_address(-top + 2 * PAGE_SIZE); +#endif /* CONFIG_X86_32 */ } /* First C function to be called on Xen boot */ @@ -1333,6 +1335,11 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; +#ifdef CONFIG_X86_64 + /* Disable until direct per-cpu data access. */ + have_vcpu_info_placement = 0; +#endif + #ifdef CONFIG_SMP smp_ops = xen_smp_ops; #endif @@ -1343,9 +1350,11 @@ asmlinkage void __init xen_start_kernel(void) pgd = (pgd_t *)xen_start_info->pt_base; +#ifdef CONFIG_X86_32 init_pg_tables_start = __pa(pgd); init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; +#endif init_mm.pgd = pgd; /* use the Xen pagetables to start */ @@ -1372,7 +1381,9 @@ asmlinkage void __init xen_start_kernel(void) /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); +#ifdef CONFIG_X86_32 new_cpu_data.hard_math = 1; +#endif new_cpu_data.x86_capability[0] = cpuid_edx(1); /* Poke various useful things into boot_params */ @@ -1388,5 +1399,9 @@ asmlinkage void __init xen_start_kernel(void) } /* Start the world */ +#ifdef CONFIG_X86_32 i386_start_kernel(); +#else + x86_64_start_kernel((char *)&boot_params); +#endif } -- cgit v1.2.3 From ce87b3d326de733c72b47662f106ee6cd699a20f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:40 -0700 Subject: xen64: get active_mm from the pda x86_64 stores the active_mm in the pda, so fetch it from there. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d0976b87cd2..2579e70cdd0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -805,8 +805,15 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) static void drop_other_mm_ref(void *info) { struct mm_struct *mm = info; + struct mm_struct *active_mm; - if (__get_cpu_var(cpu_tlbstate).active_mm == mm) +#ifdef CONFIG_X86_64 + active_mm = read_pda(active_mm); +#else + active_mm = __get_cpu_var(cpu_tlbstate).active_mm; +#endif + + if (active_mm == mm) leave_mm(smp_processor_id()); /* If this cpu still has a stale cr3 reference, then make sure -- cgit v1.2.3 From a9e7062d7339f1a1df2b6d7e5d595c7d55b56bfb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:41 -0700 Subject: xen: move smp setup into smp.c Move all the smp_ops setup into smp.c, allowing a lot of things to become static. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 19 +------------------ arch/x86/xen/smp.c | 34 ++++++++++++++++++++++++++-------- arch/x86/xen/xen-ops.h | 13 +++++-------- 3 files changed, 32 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ada2e1a141d..a85f447b8d0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1237,21 +1237,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .set_fixmap = xen_set_fixmap, }; -#ifdef CONFIG_SMP -static const struct smp_ops xen_smp_ops __initdata = { - .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, - .smp_prepare_cpus = xen_smp_prepare_cpus, - .cpu_up = xen_cpu_up, - .smp_cpus_done = xen_smp_cpus_done, - - .smp_send_stop = xen_smp_send_stop, - .smp_send_reschedule = xen_smp_send_reschedule, - - .send_call_func_ipi = xen_smp_send_call_function_ipi, - .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, -}; -#endif /* CONFIG_SMP */ - static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1340,9 +1325,7 @@ asmlinkage void __init xen_start_kernel(void) have_vcpu_info_placement = 0; #endif -#ifdef CONFIG_SMP - smp_ops = xen_smp_ops; -#endif + xen_smp_init(); /* Get mfn list */ if (!xen_feature(XENFEAT_auto_translated_physmap)) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 233156f39b7..91fae8ff756 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -152,7 +152,7 @@ void __init xen_fill_possible_map(void) } } -void __init xen_smp_prepare_boot_cpu(void) +static void __init xen_smp_prepare_boot_cpu(void) { int cpu; @@ -176,7 +176,7 @@ void __init xen_smp_prepare_boot_cpu(void) xen_setup_vcpu_info_placement(); } -void __init xen_smp_prepare_cpus(unsigned int max_cpus) +static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; @@ -276,7 +276,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) return 0; } -int __cpuinit xen_cpu_up(unsigned int cpu) +static int __cpuinit xen_cpu_up(unsigned int cpu) { struct task_struct *idle = idle_task(cpu); int rc; @@ -319,7 +319,7 @@ int __cpuinit xen_cpu_up(unsigned int cpu) return 0; } -void xen_smp_cpus_done(unsigned int max_cpus) +static void xen_smp_cpus_done(unsigned int max_cpus) { } @@ -335,12 +335,12 @@ static void stop_self(void *v) BUG(); } -void xen_smp_send_stop(void) +static void xen_smp_send_stop(void) { smp_call_function(stop_self, NULL, 0); } -void xen_smp_send_reschedule(int cpu) +static void xen_smp_send_reschedule(int cpu) { xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } @@ -355,7 +355,7 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) xen_send_IPI_one(cpu, vector); } -void xen_smp_send_call_function_ipi(cpumask_t mask) +static void xen_smp_send_call_function_ipi(cpumask_t mask) { int cpu; @@ -370,7 +370,7 @@ void xen_smp_send_call_function_ipi(cpumask_t mask) } } -void xen_smp_send_call_function_single_ipi(int cpu) +static void xen_smp_send_call_function_single_ipi(int cpu) { xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); } @@ -394,3 +394,21 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } + +static const struct smp_ops xen_smp_ops __initdata = { + .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, + .smp_prepare_cpus = xen_smp_prepare_cpus, + .cpu_up = xen_cpu_up, + .smp_cpus_done = xen_smp_cpus_done, + + .smp_send_stop = xen_smp_send_stop, + .smp_send_reschedule = xen_smp_send_reschedule, + + .send_call_func_ipi = xen_smp_send_call_function_ipi, + .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, +}; + +void __init xen_smp_init(void) +{ + smp_ops = xen_smp_ops; +} diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 77354d20425..81a779fc9b2 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -47,17 +47,14 @@ void xen_mark_init_mm_pinned(void); void __init xen_fill_possible_map(void); void __init xen_setup_vcpu_info_placement(void); -void xen_smp_prepare_boot_cpu(void); -void xen_smp_prepare_cpus(unsigned int max_cpus); -int xen_cpu_up(unsigned int cpu); -void xen_smp_cpus_done(unsigned int max_cpus); -void xen_smp_send_stop(void); -void xen_smp_send_reschedule(int cpu); -void xen_smp_send_call_function_ipi(cpumask_t mask); -void xen_smp_send_call_function_single_ipi(int cpu); +#ifdef CONFIG_SMP +void xen_smp_init(void); extern cpumask_t xen_cpu_initialized_map; +#else +static inline void xen_smp_init(void) {} +#endif /* Declare an asm function, along with symbols needed to make it -- cgit v1.2.3 From 5b09b2876ed1a8e34a0da8f069575fc6174e2077 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:42 -0700 Subject: x86_64: add workaround for no %gs-based percpu As a stopgap until Mike Travis's x86-64 gs-based percpu patches are ready, provide workaround functions for x86_read/write_percpu for Xen's use. Specifically, this means that we can't really make use of vcpu placement, because we can't use a single gs-based memory access to get to vcpu fields. So disable all that for now. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 11 ++++++++--- arch/x86/xen/enlighten.c | 5 +++++ 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index c9781982914..1b318e903bf 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; #endif +void __init x86_64_init_pda(void) +{ + _cpu_pda = __cpu_pda; + cpu_pda(0) = &_boot_cpu_pda; + pda_init(0); +} + static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -102,9 +109,7 @@ void __init x86_64_start_kernel(char * real_mode_data) early_printk("Kernel alive\n"); - _cpu_pda = __cpu_pda; - cpu_pda(0) = &_boot_cpu_pda; - pda_init(0); + x86_64_init_pda(); early_printk("Kernel really alive\n"); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a85f447b8d0..f3f11acf785 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -971,6 +971,7 @@ void xen_setup_vcpu_info_placement(void) /* xen_vcpu_setup managed to place the vcpu_info within the percpu area for all cpus, so make use of it */ +#ifdef CONFIG_X86_32 if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); @@ -980,6 +981,7 @@ void xen_setup_vcpu_info_placement(void) pv_irq_ops.irq_enable = xen_irq_enable_direct; pv_mmu_ops.read_cr2 = xen_read_cr2_direct; } +#endif } static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, @@ -1000,10 +1002,12 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, goto patch_site switch (type) { +#ifdef CONFIG_X86_32 SITE(pv_irq_ops, irq_enable); SITE(pv_irq_ops, irq_disable); SITE(pv_irq_ops, save_fl); SITE(pv_irq_ops, restore_fl); +#endif /* CONFIG_X86_32 */ #undef SITE patch_site: @@ -1323,6 +1327,7 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; + x86_64_init_pda(); #endif xen_smp_init(); -- cgit v1.2.3 From c7b75947f89d45493562ede6d9ee7311dfa5c4ce Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:43 -0700 Subject: xen64: smp.c compile hacking A number of random changes to make xen/smp.c compile in 64-bit mode. Signed-off-by: Jeremy Fitzhardinge a Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/setup.c | 7 +--- arch/x86/xen/smp.c | 98 +++++++++++++++++++++++++++++--------------------- arch/x86/xen/xen-ops.h | 2 -- 3 files changed, 58 insertions(+), 49 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e0a39595bde..f52f3855fb6 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -98,7 +98,7 @@ void xen_enable_sysenter(void) /* Mask events on entry, even though they get enabled immediately */ static struct callback_register sysenter = { .type = CALLBACKTYPE_sysenter, - .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target }, + .address = XEN_CALLBACK(__KERNEL_CS, xen_sysenter_target), .flags = CALLBACKF_mask_events, }; @@ -143,11 +143,6 @@ void __init xen_arch_setup(void) pm_idle = xen_idle; -#ifdef CONFIG_SMP - /* fill cpus_possible with all available cpus */ - xen_fill_possible_map(); -#endif - paravirt_disable_iospace(); fiddle_vdso(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 91fae8ff756..800bb2191e2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -66,13 +66,21 @@ static __cpuinit void cpu_bringup_and_idle(void) int cpu = smp_processor_id(); cpu_init(); + preempt_disable(); + xen_enable_sysenter(); - preempt_disable(); - per_cpu(cpu_state, cpu) = CPU_ONLINE; + cpu = smp_processor_id(); + smp_store_cpu_info(cpu); + cpu_data(cpu).x86_max_cores = 1; + set_cpu_sibling_map(cpu); xen_setup_cpu_clockevents(); + cpu_set(cpu, cpu_online_map); + x86_write_percpu(cpu_state, CPU_ONLINE); + wmb(); + /* We can take interrupts now: we're officially "up". */ local_irq_enable(); @@ -141,7 +149,7 @@ static int xen_smp_intr_init(unsigned int cpu) return rc; } -void __init xen_fill_possible_map(void) +static void __init xen_fill_possible_map(void) { int i, rc; @@ -154,24 +162,12 @@ void __init xen_fill_possible_map(void) static void __init xen_smp_prepare_boot_cpu(void) { - int cpu; - BUG_ON(smp_processor_id() != 0); native_smp_prepare_boot_cpu(); /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(&per_cpu__gdt_page); - - for_each_possible_cpu(cpu) { - cpus_clear(per_cpu(cpu_sibling_map, cpu)); - /* - * cpu_core_map lives in a per cpu area that is cleared - * when the per cpu array is allocated. - * - * cpus_clear(per_cpu(cpu_core_map, cpu)); - */ - } + make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); xen_setup_vcpu_info_placement(); } @@ -180,17 +176,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; - for_each_possible_cpu(cpu) { - cpus_clear(per_cpu(cpu_sibling_map, cpu)); - /* - * cpu_core_ map will be zeroed when the per - * cpu area is allocated. - * - * cpus_clear(per_cpu(cpu_core_map, cpu)); - */ - } - smp_store_cpu_info(0); + cpu_data(0).x86_max_cores = 1; set_cpu_sibling_map(0); if (xen_smp_intr_init(0)) @@ -225,7 +212,7 @@ static __cpuinit int cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; - struct gdt_page *gdt = &per_cpu(gdt_page, cpu); + struct desc_struct *gdt; if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) return 0; @@ -234,12 +221,15 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) if (ctxt == NULL) return -ENOMEM; + gdt = get_cpu_gdt_table(cpu); + ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.ds = __USER_DS; ctxt->user_regs.es = __USER_DS; - ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = 0; ctxt->user_regs.ss = __KERNEL_DS; +#ifdef CONFIG_X86_32 + ctxt->user_regs.fs = __KERNEL_PERCPU; +#endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ @@ -249,11 +239,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->ldt_ents = 0; - BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK); - make_lowmem_page_readonly(gdt->gdt); + BUG_ON((unsigned long)gdt & ~PAGE_MASK); + make_lowmem_page_readonly(gdt); - ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt); - ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt); + ctxt->gdt_frames[0] = virt_to_mfn(gdt); + ctxt->gdt_ents = GDT_ENTRIES; ctxt->user_regs.cs = __KERNEL_CS; ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); @@ -261,9 +251,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->kernel_ss = __KERNEL_DS; ctxt->kernel_sp = idle->thread.sp0; +#ifdef CONFIG_X86_32 ctxt->event_callback_cs = __KERNEL_CS; - ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_cs = __KERNEL_CS; +#endif + ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); @@ -287,11 +279,28 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) return rc; #endif +#ifdef CONFIG_X86_64 + /* Allocate node local memory for AP pdas */ + WARN_ON(cpu == 0); + if (cpu > 0) { + rc = get_local_pda(cpu); + if (rc) + return rc; + } +#endif + +#ifdef CONFIG_X86_32 init_gdt(cpu); per_cpu(current_task, cpu) = idle; irq_ctx_init(cpu); +#else + cpu_pda(cpu)->pcurrent = idle; + clear_tsk_thread_flag(idle, TIF_FORK); +#endif xen_setup_timer(cpu); + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; @@ -306,16 +315,14 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) if (rc) return rc; - smp_store_cpu_info(cpu); - set_cpu_sibling_map(cpu); - /* This must be done before setting cpu_online_map */ - wmb(); - - cpu_set(cpu, cpu_online_map); - rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); BUG_ON(rc); + while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { + HYPERVISOR_sched_op(SCHEDOP_yield, 0); + barrier(); + } + return 0; } @@ -379,7 +386,11 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_interrupt(); +#ifdef CONFIG_X86_32 __get_cpu_var(irq_stat).irq_call_count++; +#else + add_pda(irq_call_count, 1); +#endif irq_exit(); return IRQ_HANDLED; @@ -389,7 +400,11 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_single_interrupt(); +#ifdef CONFIG_X86_32 __get_cpu_var(irq_stat).irq_call_count++; +#else + add_pda(irq_call_count, 1); +#endif irq_exit(); return IRQ_HANDLED; @@ -411,4 +426,5 @@ static const struct smp_ops xen_smp_ops __initdata = { void __init xen_smp_init(void) { smp_ops = xen_smp_ops; + xen_fill_possible_map(); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 81a779fc9b2..aca4a7803e2 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -44,8 +44,6 @@ bool xen_vcpu_stolen(int vcpu); void xen_mark_init_mm_pinned(void); -void __init xen_fill_possible_map(void); - void __init xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP -- cgit v1.2.3 From 8c5e5ac32fe08793246709fbb94c055ec76a7c0e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:44 -0700 Subject: xen64: add xen-head code to head_64.S Add the Xen entrypoint and ELF notes to head_64.S. Adapts xen-head.S to compile either 32-bit or 64-bit. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_64.c | 3 +++ arch/x86/kernel/head_64.S | 1 + arch/x86/xen/xen-head.S | 15 +++++++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index bacf5deeec2..0f7e1f09aa0 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -131,5 +131,8 @@ int main(void) OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); + + BLANK(); + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); return 0; } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 4b6bda21837..2240f823676 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -401,6 +401,7 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 +#include "../../x86/xen/xen-head.S" .section .bss, "aw", @nobits .align L1_CACHE_BYTES diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index a9cac9dc04b..63d49a523ed 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -8,15 +8,21 @@ #include #include +#include #include #include __INIT ENTRY(startup_xen) - movl %esi,xen_start_info cld - movl $(init_thread_union+THREAD_SIZE),%esp +#ifdef CONFIG_X86_32 + mov %esi,xen_start_info + mov $init_thread_union+THREAD_SIZE,%esp +#else + mov %rsi,xen_start_info + mov $init_thread_union+THREAD_SIZE,%rsp +#endif jmp xen_start_kernel __FINIT @@ -30,7 +36,11 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") +#ifdef CONFIG_X86_32 ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET) +#else + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) +#endif ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") @@ -40,5 +50,6 @@ ENTRY(hypercall_page) .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) #endif /*CONFIG_XEN */ -- cgit v1.2.3 From 555cf2b5805a213ba262a2830c4d22ad635a249e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:45 -0700 Subject: xen64: add asm-offsets Add Xen vcpu_info offsets to asm-offsets_64. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/asm-offsets_64.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 0f7e1f09aa0..aa89387006f 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -18,6 +18,8 @@ #include #include +#include + #define __NO_STUBS 1 #undef __SYSCALL #undef _ASM_X86_64_UNISTD_H_ @@ -134,5 +136,11 @@ int main(void) BLANK(); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); +#ifdef CONFIG_XEN + BLANK(); + OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); + OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); +#undef ENTRY +#endif return 0; } -- cgit v1.2.3 From cdacc1278b12d929f9a053c245ff3d16eb7af9f8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:46 -0700 Subject: xen64: add 64-bit assembler Split xen-asm into 32- and 64-bit files, and implement the 64-bit variants. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/Makefile | 2 +- arch/x86/xen/xen-asm.S | 305 ---------------------------------------------- arch/x86/xen/xen-asm_32.S | 305 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/xen-asm_64.S | 141 +++++++++++++++++++++ 4 files changed, 447 insertions(+), 306 deletions(-) delete mode 100644 arch/x86/xen/xen-asm.S create mode 100644 arch/x86/xen/xen-asm_32.S create mode 100644 arch/x86/xen/xen-asm_64.S (limited to 'arch') diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 2ba2d164913..59c1e539aed 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o xen-asm.o grant-table.o suspend.o + time.o xen-asm_$(BITS).o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S deleted file mode 100644 index 2497a30f41d..00000000000 --- a/arch/x86/xen/xen-asm.S +++ /dev/null @@ -1,305 +0,0 @@ -/* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. - */ - -#include - -#include -#include -#include -#include -#include - -#include - -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) - -/* - We can't use sysexit directly, because we're not running in ring0. - But we can easily fake it up using iret. Assuming xen_sysexit - is jumped to with a standard stack frame, we can just strip it - back to a standard iret frame and use iret. - */ -ENTRY(xen_sysexit) - movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ - orl $X86_EFLAGS_IF, PT_EFLAGS(%esp) - lea PT_EIP(%esp), %esp - - jmp xen_iret -ENDPROC(xen_sysexit) - -/* - This is run where a normal iret would be run, with the same stack setup: - 8: eflags - 4: cs - esp-> 0: eip - - This attempts to make sure that any pending events are dealt - with on return to usermode, but there is a small window in - which an event can happen just before entering usermode. If - the nested interrupt ends up setting one of the TIF_WORK_MASK - pending work flags, they will not be tested again before - returning to usermode. This means that a process can end up - with pending work, which will be unprocessed until the process - enters and leaves the kernel again, which could be an - unbounded amount of time. This means that a pending signal or - reschedule event could be indefinitely delayed. - - The fix is to notice a nested interrupt in the critical - window, and if one occurs, then fold the nested interrupt into - the current interrupt stack frame, and re-process it - iteratively rather than recursively. This means that it will - exit via the normal path, and all pending work will be dealt - with appropriately. - - Because the nested interrupt handler needs to deal with the - current stack state in whatever form its in, we keep things - simple by only using a single register which is pushed/popped - on the stack. - */ -ENTRY(xen_iret) - /* test eflags for special cases */ - testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) - jnz hyper_iret - - push %eax - ESP_OFFSET=4 # bytes pushed onto stack - - /* Store vcpu_info pointer for easy access. Do it this - way to avoid having to reload %fs */ -#ifdef CONFIG_SMP - GET_THREAD_INFO(%eax) - movl TI_cpu(%eax),%eax - movl __per_cpu_offset(,%eax,4),%eax - mov per_cpu__xen_vcpu(%eax),%eax -#else - movl per_cpu__xen_vcpu, %eax -#endif - - /* check IF state we're restoring */ - testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - - /* Maybe enable events. Once this happens we could get a - recursive event, so the critical region starts immediately - afterwards. However, if that happens we don't end up - resuming the code, so we don't have to be worried about - being preempted to another CPU. */ - setz XEN_vcpu_info_mask(%eax) -xen_iret_start_crit: - - /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) - - /* If there's something pending, mask events again so we - can jump back into xen_hypervisor_callback */ - sete XEN_vcpu_info_mask(%eax) - - popl %eax - - /* From this point on the registers are restored and the stack - updated, so we don't need to worry about it if we're preempted */ -iret_restore_end: - - /* Jump to hypervisor_callback after fixing up the stack. - Events are masked, so jumping out of the critical - region is OK. */ - je xen_hypervisor_callback - -1: iret -xen_iret_end_crit: -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous - -hyper_iret: - /* put this out of line since its very rarely used */ - jmp hypercall_page + __HYPERVISOR_iret * 32 - - .globl xen_iret_start_crit, xen_iret_end_crit - -/* - This is called by xen_hypervisor_callback in entry.S when it sees - that the EIP at the time of interrupt was between xen_iret_start_crit - and xen_iret_end_crit. We're passed the EIP in %eax so we can do - a more refined determination of what to do. - - The stack format at this point is: - ---------------- - ss : (ss/esp may be present if we came from usermode) - esp : - eflags } outer exception info - cs } - eip } - ---------------- <- edi (copy dest) - eax : outer eax if it hasn't been restored - ---------------- - eflags } nested exception info - cs } (no ss/esp because we're nested - eip } from the same ring) - orig_eax }<- esi (copy src) - - - - - - - - - - fs } - es } - ds } SAVE_ALL state - eax } - : : - ebx }<- esp - ---------------- - - In order to deliver the nested exception properly, we need to shift - everything from the return addr up to the error code so it - sits just under the outer exception info. This means that when we - handle the exception, we do it in the context of the outer exception - rather than starting a new one. - - The only caveat is that if the outer eax hasn't been - restored yet (ie, it's still on stack), we need to insert - its value into the SAVE_ALL state before going on, since - it's usermode state which we eventually need to restore. - */ -ENTRY(xen_iret_crit_fixup) - /* - Paranoia: Make sure we're really coming from kernel space. - One could imagine a case where userspace jumps into the - critical range address, but just before the CPU delivers a GP, - it decides to deliver an interrupt instead. Unlikely? - Definitely. Easy to avoid? Yes. The Intel documents - explicitly say that the reported EIP for a bad jump is the - jump instruction itself, not the destination, but some virtual - environments get this wrong. - */ - movl PT_CS(%esp), %ecx - andl $SEGMENT_RPL_MASK, %ecx - cmpl $USER_RPL, %ecx - je 2f - - lea PT_ORIG_EAX(%esp), %esi - lea PT_EFLAGS(%esp), %edi - - /* If eip is before iret_restore_end then stack - hasn't been restored yet. */ - cmp $iret_restore_end, %eax - jae 1f - - movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ - movl %eax, PT_EAX(%esp) - - lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ - - /* set up the copy */ -1: std - mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */ - rep movsl - cld - - lea 4(%edi),%esp /* point esp to new frame */ -2: jmp xen_do_upcall - - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S new file mode 100644 index 00000000000..2497a30f41d --- /dev/null +++ b/arch/x86/xen/xen-asm_32.S @@ -0,0 +1,305 @@ +/* + Asm versions of Xen pv-ops, suitable for either direct use or inlining. + The inline versions are the same as the direct-use versions, with the + pre- and post-amble chopped off. + + This code is encoded for size rather than absolute efficiency, + with a view to being able to inline as much as possible. + + We only bother with direct forms (ie, vcpu in pda) of the operations + here; the indirect forms are better handled in C, since they're + generally too large to inline anyway. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#define RELOC(x, v) .globl x##_reloc; x##_reloc=v +#define ENDPATCH(x) .globl x##_end; x##_end=. + +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 + +/* + Enable events. This clears the event mask and tests the pending + event status with one and operation. If there are pending + events, then enter the hypervisor to get them handled. + */ +ENTRY(xen_irq_enable_direct) + /* Unmask events */ + movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask + + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* Test for pending */ + testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending + jz 1f + +2: call check_events +1: +ENDPATCH(xen_irq_enable_direct) + ret + ENDPROC(xen_irq_enable_direct) + RELOC(xen_irq_enable_direct, 2b+1) + + +/* + Disabling events is simply a matter of making the event mask + non-zero. + */ +ENTRY(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask +ENDPATCH(xen_irq_disable_direct) + ret + ENDPROC(xen_irq_disable_direct) + RELOC(xen_irq_disable_direct, 0) + +/* + (xen_)save_fl is used to get the current interrupt enable status. + Callers expect the status to be in X86_EFLAGS_IF, and other bits + may be set in the return value. We take advantage of this by + making sure that X86_EFLAGS_IF has the right value (and other bits + in that byte are 0), but other bits in the return value are + undefined. We need to toggle the state of the bit, because + Xen and x86 use opposite senses (mask vs enable). + */ +ENTRY(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask + setz %ah + addb %ah,%ah +ENDPATCH(xen_save_fl_direct) + ret + ENDPROC(xen_save_fl_direct) + RELOC(xen_save_fl_direct, 0) + + +/* + In principle the caller should be passing us a value return + from xen_save_fl_direct, but for robustness sake we test only + the X86_EFLAGS_IF flag rather than the whole byte. After + setting the interrupt mask state, it checks for unmasked + pending events and enters the hypervisor to get them delivered + if so. + */ +ENTRY(xen_restore_fl_direct) + testb $X86_EFLAGS_IF>>8, %ah + setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* check for unmasked and pending */ + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending + jz 1f +2: call check_events +1: +ENDPATCH(xen_restore_fl_direct) + ret + ENDPROC(xen_restore_fl_direct) + RELOC(xen_restore_fl_direct, 2b+1) + +/* + We can't use sysexit directly, because we're not running in ring0. + But we can easily fake it up using iret. Assuming xen_sysexit + is jumped to with a standard stack frame, we can just strip it + back to a standard iret frame and use iret. + */ +ENTRY(xen_sysexit) + movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ + orl $X86_EFLAGS_IF, PT_EFLAGS(%esp) + lea PT_EIP(%esp), %esp + + jmp xen_iret +ENDPROC(xen_sysexit) + +/* + This is run where a normal iret would be run, with the same stack setup: + 8: eflags + 4: cs + esp-> 0: eip + + This attempts to make sure that any pending events are dealt + with on return to usermode, but there is a small window in + which an event can happen just before entering usermode. If + the nested interrupt ends up setting one of the TIF_WORK_MASK + pending work flags, they will not be tested again before + returning to usermode. This means that a process can end up + with pending work, which will be unprocessed until the process + enters and leaves the kernel again, which could be an + unbounded amount of time. This means that a pending signal or + reschedule event could be indefinitely delayed. + + The fix is to notice a nested interrupt in the critical + window, and if one occurs, then fold the nested interrupt into + the current interrupt stack frame, and re-process it + iteratively rather than recursively. This means that it will + exit via the normal path, and all pending work will be dealt + with appropriately. + + Because the nested interrupt handler needs to deal with the + current stack state in whatever form its in, we keep things + simple by only using a single register which is pushed/popped + on the stack. + */ +ENTRY(xen_iret) + /* test eflags for special cases */ + testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) + jnz hyper_iret + + push %eax + ESP_OFFSET=4 # bytes pushed onto stack + + /* Store vcpu_info pointer for easy access. Do it this + way to avoid having to reload %fs */ +#ifdef CONFIG_SMP + GET_THREAD_INFO(%eax) + movl TI_cpu(%eax),%eax + movl __per_cpu_offset(,%eax,4),%eax + mov per_cpu__xen_vcpu(%eax),%eax +#else + movl per_cpu__xen_vcpu, %eax +#endif + + /* check IF state we're restoring */ + testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) + + /* Maybe enable events. Once this happens we could get a + recursive event, so the critical region starts immediately + afterwards. However, if that happens we don't end up + resuming the code, so we don't have to be worried about + being preempted to another CPU. */ + setz XEN_vcpu_info_mask(%eax) +xen_iret_start_crit: + + /* check for unmasked and pending */ + cmpw $0x0001, XEN_vcpu_info_pending(%eax) + + /* If there's something pending, mask events again so we + can jump back into xen_hypervisor_callback */ + sete XEN_vcpu_info_mask(%eax) + + popl %eax + + /* From this point on the registers are restored and the stack + updated, so we don't need to worry about it if we're preempted */ +iret_restore_end: + + /* Jump to hypervisor_callback after fixing up the stack. + Events are masked, so jumping out of the critical + region is OK. */ + je xen_hypervisor_callback + +1: iret +xen_iret_end_crit: +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous + +hyper_iret: + /* put this out of line since its very rarely used */ + jmp hypercall_page + __HYPERVISOR_iret * 32 + + .globl xen_iret_start_crit, xen_iret_end_crit + +/* + This is called by xen_hypervisor_callback in entry.S when it sees + that the EIP at the time of interrupt was between xen_iret_start_crit + and xen_iret_end_crit. We're passed the EIP in %eax so we can do + a more refined determination of what to do. + + The stack format at this point is: + ---------------- + ss : (ss/esp may be present if we came from usermode) + esp : + eflags } outer exception info + cs } + eip } + ---------------- <- edi (copy dest) + eax : outer eax if it hasn't been restored + ---------------- + eflags } nested exception info + cs } (no ss/esp because we're nested + eip } from the same ring) + orig_eax }<- esi (copy src) + - - - - - - - - + fs } + es } + ds } SAVE_ALL state + eax } + : : + ebx }<- esp + ---------------- + + In order to deliver the nested exception properly, we need to shift + everything from the return addr up to the error code so it + sits just under the outer exception info. This means that when we + handle the exception, we do it in the context of the outer exception + rather than starting a new one. + + The only caveat is that if the outer eax hasn't been + restored yet (ie, it's still on stack), we need to insert + its value into the SAVE_ALL state before going on, since + it's usermode state which we eventually need to restore. + */ +ENTRY(xen_iret_crit_fixup) + /* + Paranoia: Make sure we're really coming from kernel space. + One could imagine a case where userspace jumps into the + critical range address, but just before the CPU delivers a GP, + it decides to deliver an interrupt instead. Unlikely? + Definitely. Easy to avoid? Yes. The Intel documents + explicitly say that the reported EIP for a bad jump is the + jump instruction itself, not the destination, but some virtual + environments get this wrong. + */ + movl PT_CS(%esp), %ecx + andl $SEGMENT_RPL_MASK, %ecx + cmpl $USER_RPL, %ecx + je 2f + + lea PT_ORIG_EAX(%esp), %esi + lea PT_EFLAGS(%esp), %edi + + /* If eip is before iret_restore_end then stack + hasn't been restored yet. */ + cmp $iret_restore_end, %eax + jae 1f + + movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ + movl %eax, PT_EAX(%esp) + + lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ + + /* set up the copy */ +1: std + mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */ + rep movsl + cld + + lea 4(%edi),%esp /* point esp to new frame */ +2: jmp xen_do_upcall + + +/* + Force an event check by making a hypercall, + but preserve regs before making the call. + */ +check_events: + push %eax + push %ecx + push %edx + call force_evtchn_callback + pop %edx + pop %ecx + pop %eax + ret diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S new file mode 100644 index 00000000000..4ec10827370 --- /dev/null +++ b/arch/x86/xen/xen-asm_64.S @@ -0,0 +1,141 @@ +/* + Asm versions of Xen pv-ops, suitable for either direct use or inlining. + The inline versions are the same as the direct-use versions, with the + pre- and post-amble chopped off. + + This code is encoded for size rather than absolute efficiency, + with a view to being able to inline as much as possible. + + We only bother with direct forms (ie, vcpu in pda) of the operations + here; the indirect forms are better handled in C, since they're + generally too large to inline anyway. + */ + +#include + +#include +#include + +#include + +#define RELOC(x, v) .globl x##_reloc; x##_reloc=v +#define ENDPATCH(x) .globl x##_end; x##_end=. + +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 + +#if 0 +#include + +/* + Enable events. This clears the event mask and tests the pending + event status with one and operation. If there are pending + events, then enter the hypervisor to get them handled. + */ +ENTRY(xen_irq_enable_direct) + /* Unmask events */ + movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* Test for pending */ + testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + jz 1f + +2: call check_events +1: +ENDPATCH(xen_irq_enable_direct) + ret + ENDPROC(xen_irq_enable_direct) + RELOC(xen_irq_enable_direct, 2b+1) + +/* + Disabling events is simply a matter of making the event mask + non-zero. + */ +ENTRY(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) +ENDPATCH(xen_irq_disable_direct) + ret + ENDPROC(xen_irq_disable_direct) + RELOC(xen_irq_disable_direct, 0) + +/* + (xen_)save_fl is used to get the current interrupt enable status. + Callers expect the status to be in X86_EFLAGS_IF, and other bits + may be set in the return value. We take advantage of this by + making sure that X86_EFLAGS_IF has the right value (and other bits + in that byte are 0), but other bits in the return value are + undefined. We need to toggle the state of the bit, because + Xen and x86 use opposite senses (mask vs enable). + */ +ENTRY(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + setz %ah + addb %ah,%ah +ENDPATCH(xen_save_fl_direct) + ret + ENDPROC(xen_save_fl_direct) + RELOC(xen_save_fl_direct, 0) + +/* + In principle the caller should be passing us a value return + from xen_save_fl_direct, but for robustness sake we test only + the X86_EFLAGS_IF flag rather than the whole byte. After + setting the interrupt mask state, it checks for unmasked + pending events and enters the hypervisor to get them delivered + if so. + */ +ENTRY(xen_restore_fl_direct) + testb $X86_EFLAGS_IF>>8, %ah + setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* check for unmasked and pending */ + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + jz 1f +2: call check_events +1: +ENDPATCH(xen_restore_fl_direct) + ret + ENDPROC(xen_restore_fl_direct) + RELOC(xen_restore_fl_direct, 2b+1) + + +/* + Force an event check by making a hypercall, + but preserve regs before making the call. + */ +check_events: + push %rax + push %rcx + push %rdx + push %rsi + push %rdi + push %r8 + push %r9 + push %r10 + push %r11 + call force_evtchn_callback + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rdi + pop %rsi + pop %rdx + pop %rcx + pop %rax + ret +#endif + +ENTRY(xen_iret) + pushq $0 + jmp hypercall_page + __HYPERVISOR_iret * 32 + +ENTRY(xen_sysexit) + ud2a -- cgit v1.2.3 From 15664f968a95d8fbf4a0d7b462fcc20f88906bb3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:47 -0700 Subject: xen64: use set_fixmap for shared_info structure Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f3f11acf785..dbe3549fad4 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -902,18 +902,11 @@ static __init void xen_pagetable_setup_start(pgd_t *base) void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { - unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); - - /* - * Create a mapping for the shared info page. - * Should be set_fixmap(), but shared_info is a machine - * address with no corresponding pseudo-phys address. - */ - set_pte_mfn(addr, - PFN_DOWN(xen_start_info->shared_info), - PAGE_KERNEL); - - HYPERVISOR_shared_info = (struct shared_info *)addr; + set_fixmap(FIX_PARAVIRT_BOOTMAP, + xen_start_info->shared_info); + + HYPERVISOR_shared_info = + (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); } else HYPERVISOR_shared_info = (struct shared_info *)__va(xen_start_info->shared_info); @@ -1050,8 +1043,13 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) #ifdef CONFIG_X86_F00F_BUG case FIX_F00F_IDT: #endif +#ifdef CONFIG_X86_32 case FIX_WP_TEST: case FIX_VDSO: + case FIX_KMAP_BEGIN ... FIX_KMAP_END: +#else + case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: +#endif #ifdef CONFIG_X86_LOCAL_APIC case FIX_APIC_BASE: /* maps dummy local APIC */ #endif -- cgit v1.2.3 From 7d087b68d6ddb2398fb7f6e45990b7248de640ef Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:48 -0700 Subject: xen: cpu_detect is 32-bit only Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dbe3549fad4..2b7bea3bb6f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1365,12 +1365,12 @@ asmlinkage void __init xen_start_kernel(void) /* set the limit of our address space */ xen_reserve_top(); +#ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); -#ifdef CONFIG_X86_32 new_cpu_data.hard_math = 1; -#endif new_cpu_data.x86_capability[0] = cpuid_edx(1); +#endif /* Poke various useful things into boot_params */ boot_params.hdr.type_of_loader = (9 << 4) | 0; -- cgit v1.2.3 From 3d75e1b8ef1567348ceba93d4666a1c7c2333583 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:49 -0700 Subject: xen64: add hypervisor callbacks for events, etc Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ae63e584c34..7cc2de79614 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1312,3 +1312,101 @@ KPROBE_ENTRY(ignore_sysret) sysret CFI_ENDPROC ENDPROC(ignore_sysret) + +#ifdef CONFIG_XEN +ENTRY(xen_hypervisor_callback) + zeroentry xen_do_hypervisor_callback +END(xen_hypervisor_callback) + +/* +# A note on the "critical region" in our callback handler. +# We want to avoid stacking callback handlers due to events occurring +# during handling of the last event. To do this, we keep events disabled +# until we've done all processing. HOWEVER, we must enable events before +# popping the stack frame (can't be done atomically) and so it would still +# be possible to get enough handler activations to overflow the stack. +# Although unlikely, bugs of that kind are hard to track down, so we'd +# like to avoid the possibility. +# So, on entry to the handler we detect whether we interrupted an +# existing activation in its critical region -- if so, we pop the current +# activation and restart the handler using the previous one. +*/ +ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) + CFI_STARTPROC +/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will + see the correct pointer to the pt_regs */ + movq %rdi, %rsp # we don't return, adjust the stack frame + CFI_ENDPROC + CFI_DEFAULT_STACK +11: incl %gs:pda_irqcount + movq %rsp,%rbp + CFI_DEF_CFA_REGISTER rbp + cmovzq %gs:pda_irqstackptr,%rsp + pushq %rbp # backlink for old unwinder + call xen_evtchn_do_upcall + popq %rsp + CFI_DEF_CFA_REGISTER rsp + decl %gs:pda_irqcount + jmp error_exit + CFI_ENDPROC +END(do_hypervisor_callback) + +/* +# Hypervisor uses this for application faults while it executes. +# We get here for two reasons: +# 1. Fault while reloading DS, ES, FS or GS +# 2. Fault while executing IRET +# Category 1 we do not need to fix up as Xen has already reloaded all segment +# registers that could be reloaded and zeroed the others. +# Category 2 we fix up by killing the current process. We cannot use the +# normal Linux return path in this case because if we use the IRET hypercall +# to pop the stack frame we end up in an infinite loop of failsafe callbacks. +# We distinguish between categories by comparing each saved segment register +# with its current contents: any discrepancy means we in category 1. +*/ +ENTRY(xen_failsafe_callback) +#if 1 + ud2a +#else + _frame (RIP-0x30) + CFI_REL_OFFSET rcx, 0 + CFI_REL_OFFSET r11, 8 + movw %ds,%cx + cmpw %cx,0x10(%rsp) + CFI_REMEMBER_STATE + jne 1f + movw %es,%cx + cmpw %cx,0x18(%rsp) + jne 1f + movw %fs,%cx + cmpw %cx,0x20(%rsp) + jne 1f + movw %gs,%cx + cmpw %cx,0x28(%rsp) + jne 1f + /* All segments match their saved values => Category 2 (Bad IRET). */ + movq (%rsp),%rcx + CFI_RESTORE rcx + movq 8(%rsp),%r11 + CFI_RESTORE r11 + addq $0x30,%rsp + CFI_ADJUST_CFA_OFFSET -0x30 + movq $11,%rdi /* SIGSEGV */ + jmp do_exit + CFI_RESTORE_STATE +1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ + movq (%rsp),%rcx + CFI_RESTORE rcx + movq 8(%rsp),%r11 + CFI_RESTORE r11 + addq $0x30,%rsp + CFI_ADJUST_CFA_OFFSET -0x30 + pushq $0 + CFI_ADJUST_CFA_OFFSET 8 + SAVE_ALL + jmp error_exit + CFI_ENDPROC +#endif +END(xen_failsafe_callback) + +#endif /* CONFIG_XEN */ -- cgit v1.2.3 From 084a2a4e7656209ea93aac9778defa03213ca31d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:50 -0700 Subject: xen64: early mapping setup Set up the initial pagetables to map the kernel mapping into the physical mapping space. This makes __va() usable, since it requires physical mappings. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 192 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 176 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2b7bea3bb6f..a991ee7ade9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -1294,6 +1295,157 @@ static void __init xen_reserve_top(void) #endif /* CONFIG_X86_32 */ } +#ifdef CONFIG_X86_64 +/* + * Like __va(), but returns address in the kernel mapping (which is + * all we have until the physical memory mapping has been set up. + */ +static void *__ka(phys_addr_t paddr) +{ + return (void *)(paddr + __START_KERNEL_map); +} + +/* Convert a machine address to physical address */ +static unsigned long m2p(phys_addr_t maddr) +{ + phys_addr_t paddr; + + maddr &= PTE_MASK; + paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; + + return paddr; +} + +/* Convert a machine address to kernel virtual */ +static void *m2v(phys_addr_t maddr) +{ + return __ka(m2p(maddr)); +} + +static void walk(pgd_t *pgd, unsigned long addr) +{ + unsigned l4idx = pgd_index(addr); + unsigned l3idx = pud_index(addr); + unsigned l2idx = pmd_index(addr); + unsigned l1idx = pte_index(addr); + pgd_t l4; + pud_t l3; + pmd_t l2; + pte_t l1; + + xen_raw_printk("walk %p, %lx -> %d %d %d %d\n", + pgd, addr, l4idx, l3idx, l2idx, l1idx); + + l4 = pgd[l4idx]; + xen_raw_printk(" l4: %016lx\n", l4.pgd); + xen_raw_printk(" %016lx\n", pgd_val(l4)); + + l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx]; + xen_raw_printk(" l3: %016lx\n", l3.pud); + xen_raw_printk(" %016lx\n", pud_val(l3)); + + l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx]; + xen_raw_printk(" l2: %016lx\n", l2.pmd); + xen_raw_printk(" %016lx\n", pmd_val(l2)); + + l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx]; + xen_raw_printk(" l1: %016lx\n", l1.pte); + xen_raw_printk(" %016lx\n", pte_val(l1)); +} + +static void set_page_prot(void *addr, pgprot_t prot) +{ + unsigned long pfn = __pa(addr) >> PAGE_SHIFT; + pte_t pte = pfn_pte(pfn, prot); + + xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016x pte=%016x\n", + addr, pfn, get_phys_to_machine(pfn), + pgprot_val(prot), pte.pte); + + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + BUG(); +} + +static void convert_pfn_mfn(void *v) +{ + pte_t *pte = v; + int i; + + /* All levels are converted the same way, so just treat them + as ptes. */ + for(i = 0; i < PTRS_PER_PTE; i++) + pte[i] = xen_make_pte(pte[i].pte); +} + +/* + * Set up the inital kernel pagetable. + * + * We can construct this by grafting the Xen provided pagetable into + * head_64.S's preconstructed pagetables. We copy the Xen L2's into + * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This + * means that only the kernel has a physical mapping to start with - + * but that's enough to get __va working. We need to fill in the rest + * of the physical mapping once some sort of allocator has been set + * up. + */ +static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd) +{ + pud_t *l3; + pmd_t *l2; + + /* Zap identity mapping */ + init_level4_pgt[0] = __pgd(0); + + /* Pre-constructed entries are in pfn, so convert to mfn */ + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + + memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); + memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(init_level4_pgt))); + + /* Unpin Xen-provided one */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + /* Switch over */ + pgd = init_level4_pgt; + xen_write_cr3(__pa(pgd)); + + max_pfn_mapped = PFN_DOWN(__pa(pgd) + + xen_start_info->nr_pt_frames*PAGE_SIZE + + 512*1024); + + return pgd; +} +#else +static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd) +{ + init_pg_tables_start = __pa(pgd); + init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + + return pgd; +} +#endif /* CONFIG_X86_64 */ + /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -1336,32 +1488,29 @@ asmlinkage void __init xen_start_kernel(void) pgd = (pgd_t *)xen_start_info->pt_base; -#ifdef CONFIG_X86_32 - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; -#endif + /* Prevent unwanted bits from being set in PTEs. */ + __supported_pte_mask &= ~_PAGE_GLOBAL; + if (!is_initial_xendomain()) + __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); + + /* Don't do the full vcpu_info placement stuff until we have a + possible map and a non-dummy shared_info. */ + per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; + + xen_raw_console_write("mapping kernel into physical memory\n"); + pgd = xen_setup_kernel_pagetable(pgd); - init_mm.pgd = pgd; /* use the Xen pagetables to start */ + init_mm.pgd = pgd; /* keep using Xen gdt for now; no urgent need to change it */ x86_write_percpu(xen_cr3, __pa(pgd)); x86_write_percpu(xen_current_cr3, __pa(pgd)); - /* Don't do the full vcpu_info placement stuff until we have a - possible map and a non-dummy shared_info. */ - per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; - pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; - /* Prevent unwanted bits from being set in PTEs. */ - __supported_pte_mask &= ~_PAGE_GLOBAL; - if (!is_initial_xendomain()) - __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); - /* set the limit of our address space */ xen_reserve_top(); @@ -1384,10 +1533,21 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("hvc", 0, NULL); } + xen_raw_console_write("about to get started...\n"); + +#if 0 + xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n", + &boot_params, __pa_symbol(&boot_params), + __va(__pa_symbol(&boot_params))); + + walk(pgd, &boot_params); + walk(pgd, __va(__pa(&boot_params))); +#endif + /* Start the world */ #ifdef CONFIG_X86_32 i386_start_kernel(); #else - x86_64_start_kernel((char *)&boot_params); + x86_64_start_reservations((char *)__pa_symbol(&boot_params)); #endif } -- cgit v1.2.3 From 22911b3f1cf5431058e56b1727e8ef77be5e0ac9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:51 -0700 Subject: xen64: 64-bit starts using set_pte from very early It also doesn't need the 32-bit hack version of set_pte for initial pagetable construction, so just make it use the real thing. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a991ee7ade9..392450787aa 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1194,7 +1194,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .kmap_atomic_pte = xen_kmap_atomic_pte, #endif +#ifdef CONFIG_X86_64 + .set_pte = xen_set_pte, +#else .set_pte = xen_set_pte_init, +#endif .set_pte_at = xen_set_pte_at, .set_pmd = xen_set_pmd_hyper, -- cgit v1.2.3 From d114e1981cc1a51131230993a082c27c79ab370a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:52 -0700 Subject: xen64: map an initial chunk of physical memory Early in boot, map a chunk of extra physical memory for use later on. We need a pool of mapped pages to allocate further pages to construct pagetables mapping all physical memory. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 79 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 69 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 392450787aa..e9e3bafe48c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1381,6 +1381,61 @@ static void convert_pfn_mfn(void *v) pte[i] = xen_make_pte(pte[i].pte); } +/* + * Identity map, in addition to plain kernel map. This needs to be + * large enough to allocate page table pages to allocate the rest. + * Each page can map 2MB. + */ +static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; + +static __init void xen_map_identity_early(unsigned long max_pfn) +{ + unsigned pmdidx, pteidx; + unsigned ident_pte; + unsigned long pfn; + + ident_pte = 0; + pfn = 0; + for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { + pte_t *pte_page; + + BUG_ON(level2_ident_pgt[pmdidx].pmd != level2_kernel_pgt[pmdidx].pmd); + + /* Reuse or allocate a page of ptes */ + if (pmd_present(level2_ident_pgt[pmdidx])) + pte_page = m2v(level2_ident_pgt[pmdidx].pmd); + else { + /* Check for free pte pages */ + if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) + break; + + pte_page = &level1_ident_pgt[ident_pte]; + ident_pte += PTRS_PER_PTE; + + /* Install new l1 in l2(s) */ + level2_ident_pgt[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); + level2_kernel_pgt[pmdidx] = level2_ident_pgt[pmdidx]; + } + + /* Install mappings */ + for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { + pte_t pte; + + if (pfn > max_pfn_mapped) + max_pfn_mapped = pfn; + + if (!pte_none(pte_page[pteidx])) + continue; + + pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); + pte_page[pteidx] = pte; + } + } + + for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) + set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); +} + /* * Set up the inital kernel pagetable. * @@ -1392,7 +1447,7 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd) +static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; @@ -1415,6 +1470,9 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd) l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + /* Set up identity map */ + xen_map_identity_early(max_pfn); + /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); @@ -1424,7 +1482,7 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd) set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(init_level4_pgt))); + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(init_level4_pgt))); /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); @@ -1433,19 +1491,23 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd) pgd = init_level4_pgt; xen_write_cr3(__pa(pgd)); - max_pfn_mapped = PFN_DOWN(__pa(pgd) + - xen_start_info->nr_pt_frames*PAGE_SIZE + - 512*1024); + reserve_early(__pa(xen_start_info->pt_base), + __pa(xen_start_info->pt_base + + xen_start_info->nr_pt_frames * PAGE_SIZE), + "XEN PAGETABLES"); return pgd; } #else -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd) +static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { init_pg_tables_start = __pa(pgd); init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + x86_write_percpu(xen_cr3, __pa(pgd)); + x86_write_percpu(xen_current_cr3, __pa(pgd)); + return pgd; } #endif /* CONFIG_X86_64 */ @@ -1502,15 +1564,12 @@ asmlinkage void __init xen_start_kernel(void) per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd); + pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); init_mm.pgd = pgd; /* keep using Xen gdt for now; no urgent need to change it */ - x86_write_percpu(xen_cr3, __pa(pgd)); - x86_write_percpu(xen_current_cr3, __pa(pgd)); - pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; -- cgit v1.2.3 From 39dbc5bd345ebf93e066dde7f8e29467eb61b42e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:53 -0700 Subject: xen32: create initial mappings like 64-bit Rearrange the pagetable initialization to share code with the 64-bit kernel. Rather than deferring anything to pagetable_setup_start, just set up an initial pagetable in swapper_pg_dir early at startup, and create an additional 8MB of physical memory mappings. This matches the native head_32.S mappings to a large degree, and allows the rest of the pagetable setup to continue without much Xen vs. native difference. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 130 +++++++++++++++++++---------------------------- 1 file changed, 52 insertions(+), 78 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e9e3bafe48c..19c12a6c731 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -854,50 +854,6 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) static __init void xen_pagetable_setup_start(pgd_t *base) { -#ifdef CONFIG_X86_32 - pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; - int i; - - init_mm.pgd = base; - /* - * copy top-level of Xen-supplied pagetable into place. This - * is a stand-in while we copy the pmd pages. - */ - memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); - - /* - * For PAE, need to allocate new pmds, rather than - * share Xen's, since Xen doesn't like pmd's being - * shared between address spaces. - */ - for (i = 0; i < PTRS_PER_PGD; i++) { - if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { - pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); - - memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), - PAGE_SIZE); - - make_lowmem_page_readonly(pmd); - - set_pgd(&base[i], __pgd(1 + __pa(pmd))); - } else - pgd_clear(&base[i]); - } - - /* make sure zero_page is mapped RO so we can use it in pagetables */ - make_lowmem_page_readonly(empty_zero_page); - make_lowmem_page_readonly(base); - /* - * Switch to new pagetable. This is done before - * pagetable_init has done anything so that the new pages - * added to the table can be prepared properly for Xen. - */ - xen_write_cr3(__pa(base)); - - /* Unpin initial Xen pagetable */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, - PFN_DOWN(__pa(xen_start_info->pt_base))); -#endif /* CONFIG_X86_32 */ } void xen_setup_shared_info(void) @@ -936,12 +892,6 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.set_pte = xen_set_pte; xen_setup_shared_info(); - -#ifdef CONFIG_X86_32 - /* Actually pin the pagetable down, but we can't set PG_pinned - yet because the page structures don't exist yet. */ - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); -#endif } static __init void xen_post_allocator_init(void) @@ -1299,14 +1249,17 @@ static void __init xen_reserve_top(void) #endif /* CONFIG_X86_32 */ } -#ifdef CONFIG_X86_64 /* * Like __va(), but returns address in the kernel mapping (which is * all we have until the physical memory mapping has been set up. */ static void *__ka(phys_addr_t paddr) { +#ifdef CONFIG_X86_64 return (void *)(paddr + __START_KERNEL_map); +#else + return __va(paddr); +#endif } /* Convert a machine address to physical address */ @@ -1326,6 +1279,7 @@ static void *m2v(phys_addr_t maddr) return __ka(m2p(maddr)); } +#ifdef CONFIG_X86_64 static void walk(pgd_t *pgd, unsigned long addr) { unsigned l4idx = pgd_index(addr); @@ -1356,13 +1310,14 @@ static void walk(pgd_t *pgd, unsigned long addr) xen_raw_printk(" l1: %016lx\n", l1.pte); xen_raw_printk(" %016lx\n", pte_val(l1)); } +#endif static void set_page_prot(void *addr, pgprot_t prot) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); - xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016x pte=%016x\n", + xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n", addr, pfn, get_phys_to_machine(pfn), pgprot_val(prot), pte.pte); @@ -1370,17 +1325,6 @@ static void set_page_prot(void *addr, pgprot_t prot) BUG(); } -static void convert_pfn_mfn(void *v) -{ - pte_t *pte = v; - int i; - - /* All levels are converted the same way, so just treat them - as ptes. */ - for(i = 0; i < PTRS_PER_PTE; i++) - pte[i] = xen_make_pte(pte[i].pte); -} - /* * Identity map, in addition to plain kernel map. This needs to be * large enough to allocate page table pages to allocate the rest. @@ -1388,7 +1332,7 @@ static void convert_pfn_mfn(void *v) */ static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; -static __init void xen_map_identity_early(unsigned long max_pfn) +static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; unsigned ident_pte; @@ -1399,11 +1343,9 @@ static __init void xen_map_identity_early(unsigned long max_pfn) for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { pte_t *pte_page; - BUG_ON(level2_ident_pgt[pmdidx].pmd != level2_kernel_pgt[pmdidx].pmd); - /* Reuse or allocate a page of ptes */ - if (pmd_present(level2_ident_pgt[pmdidx])) - pte_page = m2v(level2_ident_pgt[pmdidx].pmd); + if (pmd_present(pmd[pmdidx])) + pte_page = m2v(pmd[pmdidx].pmd); else { /* Check for free pte pages */ if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) @@ -1412,9 +1354,7 @@ static __init void xen_map_identity_early(unsigned long max_pfn) pte_page = &level1_ident_pgt[ident_pte]; ident_pte += PTRS_PER_PTE; - /* Install new l1 in l2(s) */ - level2_ident_pgt[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - level2_kernel_pgt[pmdidx] = level2_ident_pgt[pmdidx]; + pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); } /* Install mappings */ @@ -1434,6 +1374,20 @@ static __init void xen_map_identity_early(unsigned long max_pfn) for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); + + set_page_prot(pmd, PAGE_KERNEL_RO); +} + +#ifdef CONFIG_X86_64 +static void convert_pfn_mfn(void *v) +{ + pte_t *pte = v; + int i; + + /* All levels are converted the same way, so just treat them + as ptes. */ + for(i = 0; i < PTRS_PER_PTE; i++) + pte[i] = xen_make_pte(pte[i].pte); } /* @@ -1471,18 +1425,18 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); /* Set up identity map */ - xen_map_identity_early(max_pfn); + xen_map_identity_early(level2_ident_pgt, max_pfn); /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa_symbol(init_level4_pgt))); + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(init_level4_pgt))); /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); @@ -1498,17 +1452,37 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf return pgd; } -#else +#else /* !CONFIG_X86_64 */ +static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; + static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { + pmd_t *kernel_pmd; + init_pg_tables_start = __pa(pgd); init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); - x86_write_percpu(xen_cr3, __pa(pgd)); - x86_write_percpu(xen_current_cr3, __pa(pgd)); + kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); + memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); - return pgd; + xen_map_identity_early(level2_kernel_pgt, max_pfn); + + memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], + __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); + + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + set_page_prot(empty_zero_page, PAGE_KERNEL_RO); + + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + xen_write_cr3(__pa(swapper_pg_dir)); + + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + + return swapper_pg_dir; } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From ebd879e397f6361727c36267a12d1650710e465a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:54 -0700 Subject: xen: fix truncation of machine address arbitrary_virt_to_machine can truncate a machine address if its above 4G. Cast the problem away. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2579e70cdd0..05d7392a7a4 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -186,7 +186,7 @@ xmaddr_t arbitrary_virt_to_machine(unsigned long address) BUG_ON(pte == NULL); - return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset); + return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); } void make_lowmem_page_readonly(void *vaddr) -- cgit v1.2.3 From ce803e705f1cbdd2703e83061622089b5b4a5417 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:55 -0700 Subject: xen64: use arbitrary_virt_to_machine for xen_set_pmd When building initial pagetables in 64-bit kernel the pud/pmd pointer may be in ioremap/fixmap space, so we need to walk the pagetable to look up the physical address. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 05d7392a7a4..a8f02327181 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -178,8 +178,9 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) p2m_top[topidx][idx] = mfn; } -xmaddr_t arbitrary_virt_to_machine(unsigned long address) +xmaddr_t arbitrary_virt_to_machine(void *vaddr) { + unsigned long address = (unsigned long)vaddr; unsigned int level; pte_t *pte = lookup_address(address, &level); unsigned offset = address & ~PAGE_MASK; @@ -253,7 +254,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) xen_mc_batch(); - u.ptr = virt_to_machine(ptr).maddr; + /* ptr may be ioremapped for 64-bit pagetable setup */ + u.ptr = arbitrary_virt_to_machine(ptr).maddr; u.val = pmd_val_ma(val); extend_mmu_update(&u); @@ -415,7 +417,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) xen_mc_batch(); - u.ptr = virt_to_machine(ptr).maddr; + /* ptr may be ioremapped for 64-bit pagetable setup */ + u.ptr = arbitrary_virt_to_machine(ptr).maddr; u.val = pud_val_ma(val); extend_mmu_update(&u); -- cgit v1.2.3 From 4560a2947e32670fc6ede108c2b032c396180649 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:56 -0700 Subject: xen: set num_processors Someone's got to do it. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/smp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 800bb2191e2..8310ca0ea37 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -155,8 +155,10 @@ static void __init xen_fill_possible_map(void) for (i = 0; i < NR_CPUS; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) + if (rc >= 0) { + num_processors++; cpu_set(i, cpu_possible_map); + } } } -- cgit v1.2.3 From 8745f8b0b914cf1d617ecc49726c24011858c74e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:57 -0700 Subject: xen64: defer setting pagetable alloc/release ops We need to wait until the page structure is available to use the proper pagetable page alloc/release operations, since they use struct page to determine if a pagetable is pinned. This happened to work in 32bit because nobody allocated new pagetable pages in the interim between xen_pagetable_setup_done and xen_post_allocator_init, but the 64-bit kenrel needs to allocate more pagetable levels. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 19c12a6c731..da91404fc66 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -878,30 +878,29 @@ void xen_setup_shared_info(void) static __init void xen_pagetable_setup_done(pgd_t *base) { - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.alloc_pud = xen_alloc_pud; - pv_mmu_ops.release_pud = xen_release_pud; -#endif - - pv_mmu_ops.set_pte = xen_set_pte; - xen_setup_shared_info(); } static __init void xen_post_allocator_init(void) { + pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; #if PAGETABLE_LEVELS == 4 pv_mmu_ops.set_pgd = xen_set_pgd; #endif + /* This will work as long as patching hasn't happened yet + (which it hasn't) */ + pv_mmu_ops.alloc_pte = xen_alloc_pte; + pv_mmu_ops.alloc_pmd = xen_alloc_pmd; + pv_mmu_ops.release_pte = xen_release_pte; + pv_mmu_ops.release_pmd = xen_release_pmd; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.alloc_pud = xen_alloc_pud; + pv_mmu_ops.release_pud = xen_release_pud; +#endif + xen_mark_init_mm_pinned(); } -- cgit v1.2.3 From 836fe2f291cb450a6193fa713878efe7d32bec6e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:58 -0700 Subject: xen: use set_pte_vaddr Make Xen's set_pte_mfn() use set_pte_vaddr rather than copying it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Juan Quintela Signed-off-by: Mark McLoughlin Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a8f02327181..eb31ed291b9 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -282,35 +282,7 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val) */ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - BUG(); - return; - } - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud)) { - BUG(); - return; - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - BUG(); - return; - } - pte = pte_offset_kernel(pmd, vaddr); - /* stored as-is, to permit clearing entries */ - xen_set_pte(pte, mfn_pte(mfn, flags)); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); + set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); } void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, -- cgit v1.2.3 From e176d367d0cc8b8efd2e0960c9edf5d2fe7cd9f1 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 8 Jul 2008 15:06:59 -0700 Subject: xen64: xen_write_idt_entry() and cvt_gate_to_trap() Changed to use the (to-be-)unified descriptor structs. Signed-off-by: Eduardo Habkost Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index da91404fc66..f5e96f7a4c5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -401,23 +401,18 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, preempt_enable(); } -static int cvt_gate_to_trap(int vector, u32 low, u32 high, +static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { - u8 type, dpl; - - type = (high >> 8) & 0x1f; - dpl = (high >> 13) & 3; - - if (type != 0xf && type != 0xe) + if (val->type != 0xf && val->type != 0xe) return 0; info->vector = vector; - info->address = (high & 0xffff0000) | (low & 0x0000ffff); - info->cs = low >> 16; - info->flags = dpl; + info->address = gate_offset(*val); + info->cs = gate_segment(*val); + info->flags = val->dpl; /* interrupt gates clear IF */ - if (type == 0xe) + if (val->type == 0xe) info->flags |= 4; return 1; @@ -444,11 +439,10 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) if (p >= start && (p + 8) <= end) { struct trap_info info[2]; - u32 *desc = (u32 *)g; info[1].address = 0; - if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0])) + if (cvt_gate_to_trap(entrynum, g, &info[0])) if (HYPERVISOR_set_trap_table(info)) BUG(); } @@ -461,13 +455,13 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, { unsigned in, out, count; - count = (desc->size+1) / 8; + count = (desc->size+1) / sizeof(gate_desc); BUG_ON(count > 256); for (in = out = 0; in < count; in++) { - const u32 *entry = (u32 *)(desc->address + in * 8); + gate_desc *entry = (gate_desc*)(desc->address) + in; - if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) + if (cvt_gate_to_trap(in, entry, &traps[out])) out++; } traps[out].address = 0; -- cgit v1.2.3 From 997409d3d0bd6894f33e31ced251c0fdf523aa14 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:00 -0700 Subject: xen64: deal with extra words Xen pushes onto exception frames Xen pushes two extra words containing the values of rcx and r11. This pvop hook copies the words back into their appropriate registers, and cleans them off the stack. This leaves the stack in native form, so the normal handler can run unchanged. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 2 +- arch/x86/xen/xen-asm_64.S | 5 +++++ arch/x86/xen/xen-ops.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f5e96f7a4c5..9d94483b3b5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1091,7 +1091,7 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { .safe_halt = xen_safe_halt, .halt = xen_halt, #ifdef CONFIG_X86_64 - .adjust_exception_frame = paravirt_nop, + .adjust_exception_frame = xen_adjust_exception_frame, #endif }; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 4ec10827370..b147b495dae 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -133,6 +133,11 @@ check_events: ret #endif +ENTRY(xen_adjust_exception_frame) + mov 8+0(%rsp),%rcx + mov 8+8(%rsp),%r11 + ret $16 + ENTRY(xen_iret) pushq $0 jmp hypercall_page + __HYPERVISOR_iret * 32 diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index aca4a7803e2..c4800a2c5a4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -67,7 +67,9 @@ DECL_ASM(void, xen_irq_disable_direct, void); DECL_ASM(unsigned long, xen_save_fl_direct, void); DECL_ASM(void, xen_restore_fl_direct, unsigned long); +/* These are not functions, and cannot be called normally */ void xen_iret(void); void xen_sysexit(void); +void xen_adjust_exception_frame(void); #endif /* XEN_OPS_H */ -- cgit v1.2.3 From 952d1d7055c8cbf95b4ad2f90be5ed37db8a48ee Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:01 -0700 Subject: xen64: add pvop for swapgs swapgs is a no-op under Xen, because the hypervisor makes sure the right version of %gs is current when switching between user and kernel modes. This means that the swapgs "implementation" can be inlined and used when the stack is unsafe (usermode). Unfortunately, it means that disabling patching will result in a non-booting kernel... Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9d94483b3b5..8b60982e457 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1076,6 +1076,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .set_iopl_mask = xen_set_iopl_mask, .io_delay = xen_io_delay, + /* Xen takes care of %gs when switching to usermode for us */ + .swapgs = paravirt_nop, + .lazy_mode = { .enter = paravirt_enter_lazy_cpu, .leave = xen_leave_lazy, -- cgit v1.2.3 From 88459d4c7eb68c4a15609e00e5d100e2a305f040 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:02 -0700 Subject: xen64: register callbacks in arch-independent way Use callback_op hypercall to register callbacks in a 32/64-bit independent way (64-bit doesn't need a code segment, but that detail is hidden in XEN_CALLBACK). Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/setup.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index f52f3855fb6..bea3d4f779d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -91,19 +91,25 @@ static void __init fiddle_vdso(void) *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; } -void xen_enable_sysenter(void) +static __cpuinit int register_callback(unsigned type, const void *func) { - int cpu = smp_processor_id(); - extern void xen_sysenter_target(void); - /* Mask events on entry, even though they get enabled immediately */ - static struct callback_register sysenter = { - .type = CALLBACKTYPE_sysenter, - .address = XEN_CALLBACK(__KERNEL_CS, xen_sysenter_target), + struct callback_register callback = { + .type = type, + .address = XEN_CALLBACK(__KERNEL_CS, func), .flags = CALLBACKF_mask_events, }; + return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); +} + +void __cpuinit xen_enable_sysenter(void) +{ + int cpu = smp_processor_id(); + extern void xen_sysenter_target(void); + if (!boot_cpu_has(X86_FEATURE_SEP) || - HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) { + register_callback(CALLBACKTYPE_sysenter, + xen_sysenter_target) != 0) { clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); } @@ -120,8 +126,9 @@ void __init xen_arch_setup(void) if (!xen_feature(XENFEAT_auto_translated_physmap)) HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); - HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, - __KERNEL_CS, (unsigned long)xen_failsafe_callback); + if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || + register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) + BUG(); xen_enable_sysenter(); -- cgit v1.2.3 From 0725cbb97793d4e65bf148e4872959cdbb8c6ddd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:03 -0700 Subject: xen64: add identity irq->vector map The x86_64 interrupt subsystem is oriented towards vectors, as opposed to a flat irq space as it is in x86-32. This patch adds a simple identity irq->vector mapping so that we can continue to feed irqs into do_IRQ() and get a good result. Ideally x86_32 will unify with the 64-bit code and use vectors too. At that point we can move to mapping event channels to vectors, which will allow us to economise on irqs (so per-cpu event channels can share irqs, rather than having to allocte one per cpu, for example). Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 8b60982e457..52f2292672c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1085,8 +1085,25 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { }, }; +static void __init __xen_init_IRQ(void) +{ +#ifdef CONFIG_X86_64 + int i; + + /* Create identity vector->irq map */ + for(i = 0; i < NR_VECTORS; i++) { + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[i] = i; + } +#endif /* CONFIG_X86_64 */ + + xen_init_IRQ(); +} + static const struct pv_irq_ops xen_irq_ops __initdata = { - .init_IRQ = xen_init_IRQ, + .init_IRQ = __xen_init_IRQ, .save_fl = xen_save_fl, .restore_fl = xen_restore_fl, .irq_disable = xen_irq_disable, -- cgit v1.2.3 From a8fc1089e49caa5dca346dfacb5c84abf9a22a0c Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 8 Jul 2008 15:07:05 -0700 Subject: xen64: implement xen_load_gs_index() xen-64: implement xen_load_gs_index() Signed-off-by: Eduardo Habkost Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 52f2292672c..3b6b7fcf5b5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -385,6 +385,14 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) loadsegment(gs, 0); } +#ifdef CONFIG_X86_64 +static void xen_load_gs_index(unsigned int idx) +{ + if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) + BUG(); +} +#endif + static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, const void *ptr) { @@ -1063,6 +1071,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .load_gdt = xen_load_gdt, .load_idt = xen_load_idt, .load_tls = xen_load_tls, +#ifdef CONFIG_X86_64 + .load_gs_index = xen_load_gs_index, +#endif .store_gdt = native_store_gdt, .store_idt = native_store_idt, -- cgit v1.2.3 From 5deb30d194d28b6bf7dacfb758267a51bf7c5b78 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:06 -0700 Subject: xen: rework pgd_walk to deal with 32/64 bit Rewrite pgd_walk to deal with 64-bit address spaces. There are two notible features of 64-bit workspaces: 1. The physical address is only 48 bits wide, with the upper 16 bits being sign extension; kernel addresses are negative, and userspace is positive. 2. The Xen hypervisor mapping is at the negative-most address, just above the sign-extension hole. 1. means that we can't easily use addresses when traversing the space, since we must deal with sign extension. This rewrite expresses everything in terms of pgd/pud/pmd indices, which means we don't need to worry about the exact configuration of the virtual memory space. This approach works equally well in 32-bit. To deal with 2, assume the hole is between the uppermost userspace address and PAGE_OFFSET. For 64-bit this skips the Xen mapping hole. For 32-bit, the hole is zero-sized. In all cases, the uppermost kernel address is FIXADDR_TOP. A side-effect of this patch is that the upper boundary is actually handled properly, exposing a long-standing bug in 32-bit, which failed to pin kernel pmd page. The kernel pmd is not shared, and so must be explicitly pinned, even though the kernel ptes are shared and don't need pinning. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 115 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 75 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index eb31ed291b9..046c1f23dd6 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -491,77 +492,103 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) #endif /* PAGETABLE_LEVELS == 4 */ /* - (Yet another) pagetable walker. This one is intended for pinning a - pagetable. This means that it walks a pagetable and calls the - callback function on each page it finds making up the page table, - at every level. It walks the entire pagetable, but it only bothers - pinning pte pages which are below pte_limit. In the normal case - this will be TASK_SIZE, but at boot we need to pin up to - FIXADDR_TOP. But the important bit is that we don't pin beyond - there, because then we start getting into Xen's ptes. -*/ -static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level), + * (Yet another) pagetable walker. This one is intended for pinning a + * pagetable. This means that it walks a pagetable and calls the + * callback function on each page it finds making up the page table, + * at every level. It walks the entire pagetable, but it only bothers + * pinning pte pages which are below limit. In the normal case this + * will be STACK_TOP_MAX, but at boot we need to pin up to + * FIXADDR_TOP. + * + * For 32-bit the important bit is that we don't pin beyond there, + * because then we start getting into Xen's ptes. + * + * For 64-bit, we must skip the Xen hole in the middle of the address + * space, just after the big x86-64 virtual hole. + */ +static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), unsigned long limit) { - pgd_t *pgd = pgd_base; int flush = 0; - unsigned long addr = 0; - unsigned long pgd_next; + unsigned hole_low, hole_high; + unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; + unsigned pgdidx, pudidx, pmdidx; - BUG_ON(limit > FIXADDR_TOP); + /* The limit is the last byte to be touched */ + limit--; + BUG_ON(limit >= FIXADDR_TOP); if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; - for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { + /* + * 64-bit has a great big hole in the middle of the address + * space, which contains the Xen mappings. On 32-bit these + * will end up making a zero-sized hole and so is a no-op. + */ + hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1); + hole_high = pgd_index(PAGE_OFFSET); + + pgdidx_limit = pgd_index(limit); +#if PTRS_PER_PUD > 1 + pudidx_limit = pud_index(limit); +#else + pudidx_limit = 0; +#endif +#if PTRS_PER_PMD > 1 + pmdidx_limit = pmd_index(limit); +#else + pmdidx_limit = 0; +#endif + + flush |= (*func)(virt_to_page(pgd), PT_PGD); + + for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { pud_t *pud; - unsigned long pud_limit, pud_next; - pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); + if (pgdidx >= hole_low && pgdidx < hole_high) + continue; - if (!pgd_val(*pgd)) + if (!pgd_val(pgd[pgdidx])) continue; - pud = pud_offset(pgd, 0); + pud = pud_offset(&pgd[pgdidx], 0); if (PTRS_PER_PUD > 1) /* not folded */ flush |= (*func)(virt_to_page(pud), PT_PUD); - for (; addr != pud_limit; pud++, addr = pud_next) { + for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { pmd_t *pmd; - unsigned long pmd_limit; - pud_next = pud_addr_end(addr, pud_limit); - - if (pud_next < limit) - pmd_limit = pud_next; - else - pmd_limit = limit; + if (pgdidx == pgdidx_limit && + pudidx > pudidx_limit) + goto out; - if (pud_none(*pud)) + if (pud_none(pud[pudidx])) continue; - pmd = pmd_offset(pud, 0); + pmd = pmd_offset(&pud[pudidx], 0); if (PTRS_PER_PMD > 1) /* not folded */ flush |= (*func)(virt_to_page(pmd), PT_PMD); - for (; addr != pmd_limit; pmd++) { - addr += (PAGE_SIZE * PTRS_PER_PTE); - if ((pmd_limit-1) < (addr-1)) { - addr = pmd_limit; - break; - } + for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { + struct page *pte; + + if (pgdidx == pgdidx_limit && + pudidx == pudidx_limit && + pmdidx > pmdidx_limit) + goto out; - if (pmd_none(*pmd)) + if (pmd_none(pmd[pmdidx])) continue; - flush |= (*func)(pmd_page(*pmd), PT_PTE); + pte = pmd_page(pmd[pmdidx]); + flush |= (*func)(pte, PT_PTE); } } } - - flush |= (*func)(virt_to_page(pgd_base), PT_PGD); +out: return flush; } @@ -650,6 +677,11 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_batch(); } +#ifdef CONFIG_X86_PAE + /* Need to make sure unshared kernel PMD is pinnable */ + pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); +#endif + xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); xen_mc_issue(0); } @@ -731,6 +763,10 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); +#ifdef CONFIG_X86_PAE + /* Need to make sure unshared kernel PMD is unpinned */ + pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); +#endif pgd_walk(pgd, unpin_page, TASK_SIZE); xen_mc_issue(0); @@ -750,7 +786,6 @@ void xen_mm_unpin_all(void) list_for_each_entry(page, &pgd_list, lru) { if (PageSavePinned(page)) { BUG_ON(!PagePinned(page)); - printk("unpinning pinned %p\n", page_address(page)); xen_pgd_unpin((pgd_t *)page_address(page)); ClearPageSavePinned(page); } -- cgit v1.2.3 From b7c3c5c15936a40c79ef40af7b3bac801c7feb20 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:07 -0700 Subject: xen: make sure the kernel command line is right Point the boot params cmd_line_ptr to the domain-builder-provided command line. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3b6b7fcf5b5..0172ba77452 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1587,6 +1587,7 @@ asmlinkage void __init xen_start_kernel(void) boot_params.hdr.ramdisk_image = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0; boot_params.hdr.ramdisk_size = xen_start_info->mod_len; + boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); if (!is_initial_xendomain()) { add_preferred_console("xenboot", 0, NULL); -- cgit v1.2.3 From 4a5c3e77f70b3ea8b361d7fa9eb2e4dad18f70ae Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:09 -0700 Subject: xen64: implement failsafe callback Implement the failsafe callback, so that iret and segment register load exceptions are reported to the kernel. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7cc2de79614..6aa6932e21b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1365,10 +1365,8 @@ END(do_hypervisor_callback) # with its current contents: any discrepancy means we in category 1. */ ENTRY(xen_failsafe_callback) -#if 1 - ud2a -#else - _frame (RIP-0x30) + framesz = (RIP-0x30) /* workaround buggy gas */ + _frame framesz CFI_REL_OFFSET rcx, 0 CFI_REL_OFFSET r11, 8 movw %ds,%cx @@ -1391,8 +1389,13 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - movq $11,%rdi /* SIGSEGV */ - jmp do_exit + pushq $0 + CFI_ADJUST_CFA_OFFSET 8 + pushq %r11 + CFI_ADJUST_CFA_OFFSET 8 + pushq %rcx + CFI_ADJUST_CFA_OFFSET 8 + jmp general_protection CFI_RESTORE_STATE 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ movq (%rsp),%rcx @@ -1406,7 +1409,6 @@ ENTRY(xen_failsafe_callback) SAVE_ALL jmp error_exit CFI_ENDPROC -#endif END(xen_failsafe_callback) #endif /* CONFIG_XEN */ -- cgit v1.2.3 From 8a95408e183b3e4aaf3b6a66fa34bff4db53011b Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Tue, 8 Jul 2008 15:07:10 -0700 Subject: xen64: Clear %fs on xen_load_tls() We need to do this, otherwise we can get a GPF on hypercall return after TLS descriptor is cleared but %fs is still pointing to it. Signed-off-by: Eduardo Habkost Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0172ba77452..c13698faae5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -364,14 +364,6 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { - xen_mc_batch(); - - load_TLS_descriptor(t, cpu, 0); - load_TLS_descriptor(t, cpu, 1); - load_TLS_descriptor(t, cpu, 2); - - xen_mc_issue(PARAVIRT_LAZY_CPU); - /* * XXX sleazy hack: If we're being called in a lazy-cpu zone, * it means we're in a context switch, and %gs has just been @@ -380,9 +372,30 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * Either way, it has been saved, and the new value will get * loaded properly. This will go away as soon as Xen has been * modified to not save/restore %gs for normal hypercalls. + * + * On x86_64, this hack is not used for %gs, because gs points + * to KERNEL_GS_BASE (and uses it for PDA references), so we + * must not zero %gs on x86_64 + * + * For x86_64, we need to zero %fs, otherwise we may get an + * exception between the new %fs descriptor being loaded and + * %fs being effectively cleared at __switch_to(). */ - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { +#ifdef CONFIG_X86_32 loadsegment(gs, 0); +#else + loadsegment(fs, 0); +#endif + } + + xen_mc_batch(); + + load_TLS_descriptor(t, cpu, 0); + load_TLS_descriptor(t, cpu, 1); + load_TLS_descriptor(t, cpu, 2); + + xen_mc_issue(PARAVIRT_LAZY_CPU); } #ifdef CONFIG_X86_64 -- cgit v1.2.3 From d6182fbf04164016cb6540db02eef3d6bdc967c3 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:13 -0700 Subject: xen64: allocate and manage user pagetables Because the x86_64 architecture does not enforce segment limits, Xen cannot protect itself with them as it does in 32-bit mode. Therefore, to protect itself, it runs the guest kernel in ring 3. Since it also runs the guest userspace in ring3, the guest kernel must maintain a second pagetable for its userspace, which does not map kernel space. Naturally, the guest kernel pagetables map both kernel and userspace. The userspace pagetable is attached to the corresponding kernel pagetable via the pgd's page->private field. It is allocated and freed at the same time as the kernel pgd via the paravirt_pgd_alloc/free hooks. Fortunately, the user pagetable is almost entirely shared with the kernel pagetable; the only difference is the pgd page itself. set_pgd will populate all entries in the kernel pagetable, and also set the corresponding user pgd entry if the address is less than STACK_TOP_MAX. The user pagetable must be pinned and unpinned with the kernel one, but because the pagetables are aliased, pgd_walk() only needs to be called on the kernel pagetable. The user pgd page is then pinned/unpinned along with the kernel pgd page. xen_write_cr3 must write both the kernel and user cr3s. The init_mm.pgd pagetable never has a user pagetable allocated for it, because it can never be used while running usermode. One awkward area is that early in boot the page structures are not available. No user pagetable can exist at that point, but it complicates the logic to avoid looking at the page structure. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 99 ++++++++++++++++++++++++++++++++++++++++-------- arch/x86/xen/mmu.c | 91 +++++++++++++++++++++++++++++++++++++++----- arch/x86/xen/mmu.h | 2 + 3 files changed, 168 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c13698faae5..48f1a7eca8b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -46,7 +46,6 @@ #include #include #include -#include #include "xen-ops.h" #include "mmu.h" @@ -711,29 +710,57 @@ static void set_current_cr3(void *v) x86_write_percpu(xen_current_cr3, (unsigned long)v); } -static void xen_write_cr3(unsigned long cr3) +static void __xen_write_cr3(bool kernel, unsigned long cr3) { struct mmuext_op *op; struct multicall_space mcs; - unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); + unsigned long mfn; - BUG_ON(preemptible()); + if (cr3) + mfn = pfn_to_mfn(PFN_DOWN(cr3)); + else + mfn = 0; - mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ + WARN_ON(mfn == 0 && kernel); - /* Update while interrupts are disabled, so its atomic with - respect to ipis */ - x86_write_percpu(xen_cr3, cr3); + mcs = __xen_mc_entry(sizeof(*op)); op = mcs.args; - op->cmd = MMUEXT_NEW_BASEPTR; + op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; op->arg1.mfn = mfn; MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - /* Update xen_update_cr3 once the batch has actually - been submitted. */ - xen_mc_callback(set_current_cr3, (void *)cr3); + if (kernel) { + x86_write_percpu(xen_cr3, cr3); + + /* Update xen_current_cr3 once the batch has actually + been submitted. */ + xen_mc_callback(set_current_cr3, (void *)cr3); + } +} + +static void xen_write_cr3(unsigned long cr3) +{ + BUG_ON(preemptible()); + + xen_mc_batch(); /* disables interrupts */ + + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ + x86_write_percpu(xen_cr3, cr3); + + __xen_write_cr3(true, cr3); + +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); + } +#endif xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } @@ -794,6 +821,40 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) xen_alloc_ptpage(mm, pfn, PT_PMD); } +static int xen_pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = mm->pgd; + int ret = 0; + + BUG_ON(PagePinned(virt_to_page(pgd))); + +#ifdef CONFIG_X86_64 + { + struct page *page = virt_to_page(pgd); + + BUG_ON(page->private != 0); + + page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO); + if (page->private == 0) + ret = -ENOMEM; + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + } +#endif + + return ret; +} + +static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_X86_64 + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + if (user_pgd) + free_page((unsigned long)user_pgd); +#endif +} + /* This should never happen until we're OK to use struct page */ static void xen_release_ptpage(u32 pfn, unsigned level) { @@ -1168,8 +1229,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, - .pgd_alloc = __paravirt_pgd_alloc, - .pgd_free = paravirt_nop, + .pgd_alloc = xen_pgd_alloc, + .pgd_free = xen_pgd_free, .alloc_pte = xen_alloc_pte_init, .release_pte = xen_release_pte_init, @@ -1480,7 +1541,15 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf /* Switch over */ pgd = init_level4_pgt; - xen_write_cr3(__pa(pgd)); + + /* + * At this stage there can be no user pgd, and no page + * structure to attach it to, so make sure we just set kernel + * pgd. + */ + xen_mc_batch(); + __xen_write_cr3(true, __pa(pgd)); + xen_mc_issue(PARAVIRT_LAZY_CPU); reserve_early(__pa(xen_start_info->pt_base), __pa(xen_start_info->pt_base + diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 046c1f23dd6..a44d56e38bd 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -58,6 +58,13 @@ #include "multicalls.h" #include "mmu.h" +/* + * Just beyond the highest usermode address. STACK_TOP_MAX has a + * redzone above it, so round it up to a PGD boundary. + */ +#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) + + #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) @@ -461,17 +468,45 @@ pud_t xen_make_pud(pudval_t pud) return native_make_pud(pud); } -void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) +pgd_t *xen_get_user_pgd(pgd_t *pgd) { - struct mmu_update u; + pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); + unsigned offset = pgd - pgd_page; + pgd_t *user_ptr = NULL; - preempt_disable(); + if (offset < pgd_index(USER_LIMIT)) { + struct page *page = virt_to_page(pgd_page); + user_ptr = (pgd_t *)page->private; + if (user_ptr) + user_ptr += offset; + } - xen_mc_batch(); + return user_ptr; +} + +static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) +{ + struct mmu_update u; u.ptr = virt_to_machine(ptr).maddr; u.val = pgd_val_ma(val); extend_mmu_update(&u); +} + +/* + * Raw hypercall-based set_pgd, intended for in early boot before + * there's a page structure. This implies: + * 1. The only existing pagetable is the kernel's + * 2. It is always pinned + * 3. It has no user pagetable attached to it + */ +void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) +{ + preempt_disable(); + + xen_mc_batch(); + + __xen_set_pgd_hyper(ptr, val); xen_mc_issue(PARAVIRT_LAZY_MMU); @@ -480,14 +515,28 @@ void xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) void xen_set_pgd(pgd_t *ptr, pgd_t val) { + pgd_t *user_ptr = xen_get_user_pgd(ptr); + /* If page is not pinned, we can just update the entry directly */ if (!page_pinned(ptr)) { *ptr = val; + if (user_ptr) { + WARN_ON(page_pinned(user_ptr)); + *user_ptr = val; + } return; } - xen_set_pgd_hyper(ptr, val); + /* If it's pinned, then we can at least batch the kernel and + user updates together. */ + xen_mc_batch(); + + __xen_set_pgd_hyper(ptr, val); + if (user_ptr) + __xen_set_pgd_hyper(user_ptr, val); + + xen_mc_issue(PARAVIRT_LAZY_MMU); } #endif /* PAGETABLE_LEVELS == 4 */ @@ -526,7 +575,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), * space, which contains the Xen mappings. On 32-bit these * will end up making a zero-sized hole and so is a no-op. */ - hole_low = pgd_index(STACK_TOP_MAX + PGDIR_SIZE - 1); + hole_low = pgd_index(USER_LIMIT); hole_high = pgd_index(PAGE_OFFSET); pgdidx_limit = pgd_index(limit); @@ -670,19 +719,31 @@ void xen_pgd_pin(pgd_t *pgd) { xen_mc_batch(); - if (pgd_walk(pgd, pin_page, TASK_SIZE)) { + if (pgd_walk(pgd, pin_page, USER_LIMIT)) { /* re-enable interrupts for kmap_flush_unused */ xen_mc_issue(0); kmap_flush_unused(); xen_mc_batch(); } +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); + + if (user_pgd) { + pin_page(virt_to_page(user_pgd), PT_PGD); + xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); + } + } +#else /* CONFIG_X86_32 */ #ifdef CONFIG_X86_PAE /* Need to make sure unshared kernel PMD is pinnable */ pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); #endif - xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); +#endif /* CONFIG_X86_64 */ xen_mc_issue(0); } @@ -763,11 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + if (user_pgd) { + xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); + unpin_page(virt_to_page(user_pgd), PT_PGD); + } + } +#endif + #ifdef CONFIG_X86_PAE /* Need to make sure unshared kernel PMD is unpinned */ pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); #endif - pgd_walk(pgd, unpin_page, TASK_SIZE); + + pgd_walk(pgd, unpin_page, USER_LIMIT); xen_mc_issue(0); } diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 19d544b0b6c..0f59bd03f9e 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -51,6 +51,8 @@ void xen_set_pgd(pgd_t *pgdp, pgd_t pgd); void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); #endif +pgd_t *xen_get_user_pgd(pgd_t *pgd); + pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); -- cgit v1.2.3 From 6fcac6d305e8238939e169f4c52e8ec8a552a31f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:14 -0700 Subject: xen64: set up syscall and sysenter entrypoints for 64-bit We set up entrypoints for syscall and sysenter. sysenter is only used for 32-bit compat processes, whereas syscall can be used in by both 32 and 64-bit processes. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 4 ++ arch/x86/xen/setup.c | 42 +++++++++++++-- arch/x86/xen/smp.c | 1 + arch/x86/xen/xen-asm_64.S | 129 +++++++++++++++++++++++++++++++++++++++++++++- arch/x86/xen/xen-ops.h | 3 ++ 5 files changed, 174 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 48f1a7eca8b..87d36044054 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1139,6 +1139,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .iret = xen_iret, .irq_enable_sysexit = xen_sysexit, +#ifdef CONFIG_X86_64 + .usergs_sysret32 = xen_sysret32, + .usergs_sysret64 = xen_sysret64, +#endif .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index bea3d4f779d..9d7a1440289 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -86,9 +86,11 @@ static void xen_idle(void) */ static void __init fiddle_vdso(void) { +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) extern const char vdso32_default_start; u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK); *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; +#endif } static __cpuinit int register_callback(unsigned type, const void *func) @@ -106,15 +108,48 @@ void __cpuinit xen_enable_sysenter(void) { int cpu = smp_processor_id(); extern void xen_sysenter_target(void); + int ret; + +#ifdef CONFIG_X86_32 + if (!boot_cpu_has(X86_FEATURE_SEP)) { + return; + } +#else + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && + boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) { + return; + } +#endif - if (!boot_cpu_has(X86_FEATURE_SEP) || - register_callback(CALLBACKTYPE_sysenter, - xen_sysenter_target) != 0) { + ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); + if(ret != 0) { clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); } } +void __cpuinit xen_enable_syscall(void) +{ +#ifdef CONFIG_X86_64 + int cpu = smp_processor_id(); + int ret; + extern void xen_syscall_target(void); + extern void xen_syscall32_target(void); + + ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); + if (ret != 0) { + printk("failed to set syscall: %d\n", ret); + clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SYSCALL); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SYSCALL); + } else { + ret = register_callback(CALLBACKTYPE_syscall32, + xen_syscall32_target); + if (ret != 0) + printk("failed to set 32-bit syscall: %d\n", ret); + } +#endif /* CONFIG_X86_64 */ +} + void __init xen_arch_setup(void) { struct physdev_set_iopl set_iopl; @@ -131,6 +166,7 @@ void __init xen_arch_setup(void) BUG(); xen_enable_sysenter(); + xen_enable_syscall(); set_iopl.iopl = 1; rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 8310ca0ea37..f702199312a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -69,6 +69,7 @@ static __cpuinit void cpu_bringup_and_idle(void) preempt_disable(); xen_enable_sysenter(); + xen_enable_syscall(); cpu = smp_processor_id(); smp_store_cpu_info(cpu); diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index b147b495dae..4038cbfe333 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,8 @@ #include #include +#include +#include #include @@ -138,9 +140,132 @@ ENTRY(xen_adjust_exception_frame) mov 8+8(%rsp),%r11 ret $16 +hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 +/* + Xen64 iret frame: + + ss + rsp + rflags + cs + rip <-- standard iret frame + + flags + + rcx } + r11 }<-- pushed by hypercall page +rsp -> rax } + */ ENTRY(xen_iret) pushq $0 - jmp hypercall_page + __HYPERVISOR_iret * 32 +1: jmp hypercall_iret +ENDPATCH(xen_iret) +RELOC(xen_iret, 1b+1) +/* + sysexit is not used for 64-bit processes, so it's + only ever used to return to 32-bit compat userspace. + */ ENTRY(xen_sysexit) - ud2a + pushq $__USER32_DS + pushq %rcx + pushq $X86_EFLAGS_IF + pushq $__USER32_CS + pushq %rdx + + pushq $VGCF_in_syscall +1: jmp hypercall_iret +ENDPATCH(xen_sysexit) +RELOC(xen_sysexit, 1b+1) + +ENTRY(xen_sysret64) + /* We're already on the usermode stack at this point, but still + with the kernel gs, so we can easily switch back */ + movq %rsp, %gs:pda_oldrsp + movq %gs:pda_kernelstack,%rsp + + pushq $__USER_DS + pushq %gs:pda_oldrsp + pushq %r11 + pushq $__USER_CS + pushq %rcx + + pushq $VGCF_in_syscall +1: jmp hypercall_iret +ENDPATCH(xen_sysret64) +RELOC(xen_sysret64, 1b+1) + +ENTRY(xen_sysret32) + /* We're already on the usermode stack at this point, but still + with the kernel gs, so we can easily switch back */ + movq %rsp, %gs:pda_oldrsp + movq %gs:pda_kernelstack, %rsp + + pushq $__USER32_DS + pushq %gs:pda_oldrsp + pushq %r11 + pushq $__USER32_CS + pushq %rcx + + pushq $VGCF_in_syscall +1: jmp hypercall_iret +ENDPATCH(xen_sysret32) +RELOC(xen_sysret32, 1b+1) + +/* + Xen handles syscall callbacks much like ordinary exceptions, + which means we have: + - kernel gs + - kernel rsp + - an iret-like stack frame on the stack (including rcx and r11): + ss + rsp + rflags + cs + rip + r11 + rsp-> rcx + + In all the entrypoints, we undo all that to make it look + like a CPU-generated syscall/sysenter and jump to the normal + entrypoint. + */ + +.macro undo_xen_syscall + mov 0*8(%rsp),%rcx + mov 1*8(%rsp),%r11 + mov 5*8(%rsp),%rsp +.endm + +/* Normal 64-bit system call target */ +ENTRY(xen_syscall_target) + undo_xen_syscall + jmp system_call_after_swapgs +ENDPROC(xen_syscall_target) + +#ifdef CONFIG_IA32_EMULATION + +/* 32-bit compat syscall target */ +ENTRY(xen_syscall32_target) + undo_xen_syscall + jmp ia32_cstar_target +ENDPROC(xen_syscall32_target) + +/* 32-bit compat sysenter target */ +ENTRY(xen_sysenter_target) + undo_xen_syscall + jmp ia32_sysenter_target +ENDPROC(xen_sysenter_target) + +#else /* !CONFIG_IA32_EMULATION */ + +ENTRY(xen_syscall32_target) +ENTRY(xen_sysenter_target) + lea 16(%rsp), %rsp /* strip %rcx,%r11 */ + mov $-ENOSYS, %rax + pushq $VGCF_in_syscall + jmp hypercall_iret +ENDPROC(xen_syscall32_target) +ENDPROC(xen_sysenter_target) + +#endif /* CONFIG_IA32_EMULATION */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index c4800a2c5a4..dd3c23152a2 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -26,6 +26,7 @@ char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); void xen_enable_sysenter(void); +void xen_enable_syscall(void); void xen_vcpu_restore(void); void __init xen_build_dynamic_phys_to_machine(void); @@ -70,6 +71,8 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); /* These are not functions, and cannot be called normally */ void xen_iret(void); void xen_sysexit(void); +void xen_sysret32(void); +void xen_sysret64(void); void xen_adjust_exception_frame(void); #endif /* XEN_OPS_H */ -- cgit v1.2.3 From bf18bf94dc72db998d0fbebc846c07c858a59c90 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:15 -0700 Subject: xen64: set up userspace syscall patch 64-bit userspace expects the vdso to be mapped at a specific fixed address, which happens to be in the middle of the kernel address space. Because we have split user and kernel pagetables, we need to make special arrangements for the vsyscall mapping to appear in the kernel part of the user pagetable. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 87d36044054..f64b8729cd0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -56,6 +56,18 @@ EXPORT_SYMBOL_GPL(hypercall_page); DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +/* + * Identity map, in addition to plain kernel map. This needs to be + * large enough to allocate page table pages to allocate the rest. + * Each page can map 2MB. + */ +static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; + +#ifdef CONFIG_X86_64 +/* l3 pud for userspace vsyscall mapping */ +static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; +#endif /* CONFIG_X86_64 */ + /* * Note about cr3 (pagetable base) values: * @@ -831,12 +843,20 @@ static int xen_pgd_alloc(struct mm_struct *mm) #ifdef CONFIG_X86_64 { struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; BUG_ON(page->private != 0); - page->private = __get_free_page(GFP_KERNEL | __GFP_ZERO); - if (page->private == 0) - ret = -ENOMEM; + ret = -ENOMEM; + + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; + + if (user_pgd != NULL) { + user_pgd[pgd_index(VSYSCALL_START)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + ret = 0; + } BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); } @@ -977,6 +997,9 @@ static __init void xen_post_allocator_init(void) pv_mmu_ops.release_pud = xen_release_pud; #endif +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif xen_mark_init_mm_pinned(); } @@ -1088,6 +1111,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) } __native_set_fixmap(idx, pte); + +#ifdef CONFIG_X86_64 + /* Replicate changes to map the vsyscall page into the user + pagetable vsyscall mapping. */ + if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { + unsigned long vaddr = __fix_to_virt(idx); + set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); + } +#endif } static const struct pv_info xen_info __initdata = { @@ -1427,13 +1459,6 @@ static void set_page_prot(void *addr, pgprot_t prot) BUG(); } -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; - static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; @@ -1533,6 +1558,7 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); -- cgit v1.2.3 From 1153968a48e3ca3e2b7a437e8b82ec9e6f768e24 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:16 -0700 Subject: xen: implement Xen write_msr operation 64-bit uses MSRs for important things like the base for fs and gs-prefixed addresses. It's more efficient to use a hypercall to update these, rather than go via the trap and emulate path. Other MSR writes are just passed through; in an unprivileged domain they do nothing, but it might be useful later. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f64b8729cd0..776c0fb77d6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -777,6 +778,34 @@ static void xen_write_cr3(unsigned long cr3) xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } +static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) +{ + int ret; + + ret = 0; + + switch(msr) { +#ifdef CONFIG_X86_64 + unsigned which; + u64 base; + + case MSR_FS_BASE: which = SEGBASE_FS; goto set; + case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; + case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; + + set: + base = ((u64)high << 32) | low; + if (HYPERVISOR_set_segment_base(which, base) != 0) + ret = -EFAULT; + break; +#endif + default: + ret = native_write_msr_safe(msr, low, high); + } + + return ret; +} + /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) @@ -1165,7 +1194,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, - .write_msr = native_write_msr_safe, + .write_msr = xen_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, -- cgit v1.2.3 From 51dd660a2cd6eab4d470cfe1009c7f473832b786 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:07:17 -0700 Subject: xen: update Kconfig to allow 64-bit Xen Allow Xen to be enabled on 64-bit. Also extend domain size limit from 8 GB (on 32-bit) to 32 GB on 64-bit. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index c2cc9958087..20b49729bed 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,8 +6,8 @@ config XEN bool "Xen guest support" select PARAVIRT select PARAVIRT_CLOCK - depends on X86_32 - depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) + depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER)) + depends on X86_CMPXCHG && X86_TSC help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the @@ -15,10 +15,11 @@ config XEN config XEN_MAX_DOMAIN_MEMORY int "Maximum allowed size of a domain in gigabytes" - default 8 + default 8 if X86_32 + default 32 if X86_64 depends on XEN help The pseudo-physical to machine address array is sized according to the maximum possible memory size of a Xen domain. This array uses 1 page per gigabyte, so there's no - need to be too stingy here. \ No newline at end of file + need to be too stingy here. -- cgit v1.2.3 From b3fe124389f9dd97f0bbd954da2910e286648f0f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Jul 2008 13:45:33 +0200 Subject: xen64: fix build error on 32-bit + !HIGHMEM fix: arch/x86/xen/enlighten.c: In function 'xen_set_fixmap': arch/x86/xen/enlighten.c:1127: error: 'FIX_KMAP_BEGIN' undeclared (first use in this function) arch/x86/xen/enlighten.c:1127: error: (Each undeclared identifier is reported only once arch/x86/xen/enlighten.c:1127: error: for each function it appears in.) arch/x86/xen/enlighten.c:1127: error: 'FIX_KMAP_END' undeclared (first use in this function) make[1]: *** [arch/x86/xen/enlighten.o] Error 1 make: *** [arch/x86/xen/enlighten.o] Error 2 FIX_KMAP_BEGIN is only available on HIGHMEM. Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 776c0fb77d6..3da6acb7eaf 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1124,7 +1124,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) #ifdef CONFIG_X86_32 case FIX_WP_TEST: case FIX_VDSO: +# ifdef CONFIG_HIGHMEM case FIX_KMAP_BEGIN ... FIX_KMAP_END: +# endif #else case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: #endif -- cgit v1.2.3 From 6596f2422306a05be2170efc114da49f26a047dd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Jul 2008 22:32:33 +0200 Subject: Revert "x86_64: there's no need to preallocate level1_fixmap_pgt" This reverts commit 033786969d1d1b5af12a32a19d3a760314d05329. Suresh Siddha reported that this broke booting on his 2GB testbox. Reported-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2240f823676..db3280afe88 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -362,6 +362,12 @@ NEXT_PAGE(level3_kernel_pgt) .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE NEXT_PAGE(level2_fixmap_pgt) + .fill 506,8,0 + .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE + /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ + .fill 5,8,0 + +NEXT_PAGE(level1_fixmap_pgt) .fill 512,8,0 NEXT_PAGE(level2_ident_pgt) -- cgit v1.2.3 From 62541c376668042e20122864a044360707b2fb82 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 10 Jul 2008 16:24:08 -0700 Subject: xen64: disable 32-bit syscall/sysenter if not supported. Old versions of Xen (3.1 and before) don't support sysenter or syscall from 32-bit compat userspaces. If we can't set the appropriate syscall callback, then disable the corresponding feature bit, which will cause the vdso32 setup to fall back appropriately. Linux assumes that syscall is always available to 32-bit userspace, and installs it by default if sysenter isn't available. In that case, we just disable vdso altogether, forcing userspace libc to fall back to int $0x80. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/setup.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 9d7a1440289..9cce4a92aac 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -106,46 +106,46 @@ static __cpuinit int register_callback(unsigned type, const void *func) void __cpuinit xen_enable_sysenter(void) { - int cpu = smp_processor_id(); extern void xen_sysenter_target(void); int ret; + unsigned sysenter_feature; #ifdef CONFIG_X86_32 - if (!boot_cpu_has(X86_FEATURE_SEP)) { - return; - } + sysenter_feature = X86_FEATURE_SEP; #else - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && - boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR) { - return; - } + sysenter_feature = X86_FEATURE_SYSENTER32; #endif + if (!boot_cpu_has(sysenter_feature)) + return; + ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); - if(ret != 0) { - clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); - } + if(ret != 0) + setup_clear_cpu_cap(sysenter_feature); } void __cpuinit xen_enable_syscall(void) { #ifdef CONFIG_X86_64 - int cpu = smp_processor_id(); int ret; extern void xen_syscall_target(void); extern void xen_syscall32_target(void); ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); if (ret != 0) { - printk("failed to set syscall: %d\n", ret); - clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SYSCALL); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SYSCALL); - } else { + printk(KERN_ERR "Failed to set syscall: %d\n", ret); + /* Pretty fatal; 64-bit userspace has no other + mechanism for syscalls. */ + } + + if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { ret = register_callback(CALLBACKTYPE_syscall32, xen_syscall32_target); - if (ret != 0) - printk("failed to set 32-bit syscall: %d\n", ret); + if (ret != 0) { + printk(KERN_INFO "Xen: 32-bit syscall not supported: disabling vdso\n"); + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); + sysctl_vsyscall32 = 0; + } } #endif /* CONFIG_X86_64 */ } -- cgit v1.2.3 From 71415c6a0877d5944d5dc3060f3b03513746158d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 11 Jul 2008 22:41:34 +0200 Subject: x86, xen, vdso: fix build error fix: arch/x86/xen/built-in.o: In function `xen_enable_syscall': (.cpuinit.text+0xdb): undefined reference to `sysctl_vsyscall32' Signed-off-by: Ingo Molnar --- arch/x86/xen/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 9cce4a92aac..3e11779755c 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -144,7 +144,9 @@ void __cpuinit xen_enable_syscall(void) if (ret != 0) { printk(KERN_INFO "Xen: 32-bit syscall not supported: disabling vdso\n"); setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); +#ifdef CONFIG_COMPAT sysctl_vsyscall32 = 0; +#endif } } #endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From 6a52e4b1cddd90fbfde8fb67021657936ee74b07 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 12 Jul 2008 02:22:00 -0700 Subject: x86_64: further cleanup of 32-bit compat syscall mechanisms AMD only supports "syscall" from 32-bit compat usermode. Intel and Centaur(?) only support "sysenter" from 32-bit compat usermode. Set the X86 feature bits accordingly, and set up the vdso in accordance with those bits. On the offchance we run on in a 64-bit environment which supports neither syscall nor sysenter from 32-bit mode, then fall back to the int $0x80 vdso. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd_64.c | 2 ++ arch/x86/kernel/cpu/common_64.c | 3 --- arch/x86/vdso/Makefile | 2 +- arch/x86/vdso/vdso32-setup.c | 19 +++++++++---------- arch/x86/vdso/vdso32.S | 13 ++++++++----- arch/x86/xen/setup.c | 10 +++++++--- 6 files changed, 27 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 7c36fb8a28d..d1692b2a41f 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c @@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ if (c->x86_power & (1<<8)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + + set_cpu_cap(c, X86_FEATURE_SYSCALL32); } static void __cpuinit init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 15419cd3c5a..736f50fa433 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -317,9 +317,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_phys_bits = eax & 0xff; } - /* Assume all 64-bit CPUs support 32-bit syscall */ - set_cpu_cap(c, X86_FEATURE_SYSCALL32); - if (c->x86_vendor != X86_VENDOR_UNKNOWN && cpu_devs[c->x86_vendor]->c_early_init) cpu_devs[c->x86_vendor]->c_early_init(c); diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index b7ad9f89d21..4d6ef0a336d 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -62,7 +62,7 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE # Build multiple 32-bit vDSO images to choose from at boot time. # obj-$(VDSO32-y) += vdso32-syms.lds -vdso32.so-$(CONFIG_X86_32) += int80 +vdso32.so-$(VDSO32-y) += int80 vdso32.so-$(CONFIG_COMPAT) += syscall vdso32.so-$(VDSO32-y) += sysenter diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 0bce5429a51..513f330c583 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -193,17 +193,12 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) } } -/* - * These symbols are defined by vdso32.S to mark the bounds - * of the ELF DSO images included therein. - */ -extern const char vdso32_default_start, vdso32_default_end; -extern const char vdso32_sysenter_start, vdso32_sysenter_end; static struct page *vdso32_pages[1]; #ifdef CONFIG_X86_64 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) +#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32)) /* May not be __init: called during resume */ void syscall32_cpu_init(void) @@ -226,6 +221,7 @@ static inline void map_compat_vdso(int map) #else /* CONFIG_X86_32 */ #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) +#define vdso32_syscall() (0) void enable_sep_cpu(void) { @@ -296,12 +292,15 @@ int __init sysenter_setup(void) gate_vma_init(); #endif - if (!vdso32_sysenter()) { - vsyscall = &vdso32_default_start; - vsyscall_len = &vdso32_default_end - &vdso32_default_start; - } else { + if (vdso32_syscall()) { + vsyscall = &vdso32_syscall_start; + vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; + } else if (vdso32_sysenter()){ vsyscall = &vdso32_sysenter_start; vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; + } else { + vsyscall = &vdso32_int80_start; + vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; } memcpy(syscall_page, vsyscall, vsyscall_len); diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S index 1e36f72cab8..2ce5f82c333 100644 --- a/arch/x86/vdso/vdso32.S +++ b/arch/x86/vdso/vdso32.S @@ -2,14 +2,17 @@ __INITDATA - .globl vdso32_default_start, vdso32_default_end -vdso32_default_start: -#ifdef CONFIG_X86_32 + .globl vdso32_int80_start, vdso32_int80_end +vdso32_int80_start: .incbin "arch/x86/vdso/vdso32-int80.so" -#else +vdso32_int80_end: + + .globl vdso32_syscall_start, vdso32_syscall_end +vdso32_syscall_start: +#ifdef CONFIG_COMPAT .incbin "arch/x86/vdso/vdso32-syscall.so" #endif -vdso32_default_end: +vdso32_syscall_end: .globl vdso32_sysenter_start, vdso32_sysenter_end vdso32_sysenter_start: diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 3e11779755c..e3648e64a63 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -83,12 +83,16 @@ static void xen_idle(void) /* * Set the bit indicating "nosegneg" library variants should be used. + * We only need to bother in pure 32-bit mode; compat 32-bit processes + * can have un-truncated segments, so wrapping around is allowed. */ static void __init fiddle_vdso(void) { -#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) - extern const char vdso32_default_start; - u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK); +#ifdef CONFIG_X86_32 + u32 *mask; + mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK); + *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; + mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK); *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; #endif } -- cgit v1.2.3 From d5303b811b9d6dad2e7396d545eb7db414d42a61 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 12 Jul 2008 02:22:06 -0700 Subject: x86: xen: no need to disable vdso32 Now that the vdso32 code can cope with both syscall and sysenter missing for 32-bit compat processes, just disable the features without disabling vdso altogether. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/setup.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e3648e64a63..b6acc3a0af4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -137,7 +137,7 @@ void __cpuinit xen_enable_syscall(void) ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); if (ret != 0) { - printk(KERN_ERR "Failed to set syscall: %d\n", ret); + printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); /* Pretty fatal; 64-bit userspace has no other mechanism for syscalls. */ } @@ -145,13 +145,8 @@ void __cpuinit xen_enable_syscall(void) if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { ret = register_callback(CALLBACKTYPE_syscall32, xen_syscall32_target); - if (ret != 0) { - printk(KERN_INFO "Xen: 32-bit syscall not supported: disabling vdso\n"); + if (ret != 0) setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); -#ifdef CONFIG_COMPAT - sysctl_vsyscall32 = 0; -#endif - } } #endif /* CONFIG_X86_64 */ } -- cgit v1.2.3 From 094029479be8eb380447f42eff1b35362ef1a464 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 12 Jul 2008 02:22:12 -0700 Subject: x86_64: adjust exception frame on paranoid exceptions Exceptions using paranoidentry need to have their exception frames adjusted explicitly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 6aa6932e21b..80d5663db3b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1189,6 +1189,7 @@ END(device_not_available) /* runs on exception stack */ KPROBE_ENTRY(debug) INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug, DEBUG_STACK @@ -1198,6 +1199,7 @@ KPROBE_END(debug) /* runs on exception stack */ KPROBE_ENTRY(nmi) INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $-1 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_nmi, 0, 0 @@ -1211,6 +1213,7 @@ KPROBE_END(nmi) KPROBE_ENTRY(int3) INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_int3, DEBUG_STACK @@ -1237,6 +1240,7 @@ END(coprocessor_segment_overrun) /* runs on exception stack */ ENTRY(double_fault) XCPT_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME paranoidentry do_double_fault jmp paranoid_exit1 CFI_ENDPROC @@ -1253,6 +1257,7 @@ END(segment_not_present) /* runs on exception stack */ ENTRY(stack_segment) XCPT_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME paranoidentry do_stack_segment jmp paranoid_exit1 CFI_ENDPROC @@ -1278,6 +1283,7 @@ END(spurious_interrupt_bug) /* runs on exception stack */ ENTRY(machine_check) INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_machine_check -- cgit v1.2.3 From 74d4affde8feb8d5bdebf7fba8e90e4eae3b7b1d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 7 Jul 2008 12:07:50 -0700 Subject: x86/paravirt: add hooks for spinlock operations Ticket spinlocks have absolutely ghastly worst-case performance characteristics in a virtual environment. If there is any contention for physical CPUs (ie, there are more runnable vcpus than cpus), then ticket locks can cause the system to end up spending 90+% of its time spinning. The problem is that (v)cpus waiting on a ticket spinlock will be granted access to the lock in strict order they got their tickets. If the hypervisor scheduler doesn't give the vcpus time in that order, they will burn timeslices waiting for the scheduler to give the right vcpu some time. In the worst case it could take O(n^2) vcpu scheduler timeslices for everyone waiting on the lock to get it, not counting new cpus trying to take the lock while the log-jam is sorted out. These hooks allow a paravirt backend to replace the spinlock implementation. At the very least, this could revert the implementation back to the old lock algorithm, which allows the next scheduled vcpu to take the lock, and has basically fairly good performance. It also allows the spinlocks to take advantages of the hypervisor features to make locks more efficient (spin and block, for example). The cost to native execution is an extra direct call when using a spinlock function. There's no overhead if CONFIG_PARAVIRT is turned off. The lock structure is fixed at a single "unsigned int", initialized to zero, but the spinlock implementation can use it as it wishes. Thanks to Thomas Friebel's Xen Summit talk "Preventing Guests from Spinning Around" for pointing out this problem. Signed-off-by: Jeremy Fitzhardinge Cc: Jens Axboe Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Petr Tesarik Cc: Virtualization Cc: Xen devel Cc: Thomas Friebel Cc: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 2963ab5d91e..f3381686870 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -124,6 +124,7 @@ static void *get_call_destination(u8 type) .pv_irq_ops = pv_irq_ops, .pv_apic_ops = pv_apic_ops, .pv_mmu_ops = pv_mmu_ops, + .pv_lock_ops = pv_lock_ops, }; return *((void **)&tmpl + type); } @@ -450,6 +451,15 @@ struct pv_mmu_ops pv_mmu_ops = { .set_fixmap = native_set_fixmap, }; +struct pv_lock_ops pv_lock_ops = { + .spin_is_locked = __ticket_spin_is_locked, + .spin_is_contended = __ticket_spin_is_contended, + + .spin_lock = __ticket_spin_lock, + .spin_trylock = __ticket_spin_trylock, + .spin_unlock = __ticket_spin_unlock, +}; + EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_mmu_ops); -- cgit v1.2.3 From 8efcbab674de2bee45a2e4cdf97de16b8e609ac8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 7 Jul 2008 12:07:51 -0700 Subject: paravirt: introduce a "lock-byte" spinlock implementation Implement a version of the old spinlock algorithm, in which everyone spins waiting for a lock byte. In order to be compatible with the ticket-lock's use of a zero initializer, this uses the convention of '0' for unlocked and '1' for locked. This algorithm is much better than ticket locks in a virtual envionment, because it doesn't interact badly with the vcpu scheduler. If there are multiple vcpus spinning on a lock and the lock is released, the next vcpu to be scheduled will take the lock, rather than cycling around until the next ticketed vcpu gets it. To use this, you must call paravirt_use_bytelocks() very early, before any spinlocks have been taken. Signed-off-by: Jeremy Fitzhardinge Cc: Jens Axboe Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Petr Tesarik Cc: Virtualization Cc: Xen devel Cc: Thomas Friebel Cc: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index f3381686870..bba4041bb7f 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,6 +268,15 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void __init paravirt_use_bytelocks(void) +{ + pv_lock_ops.spin_is_locked = __byte_spin_is_locked; + pv_lock_ops.spin_is_contended = __byte_spin_is_contended; + pv_lock_ops.spin_lock = __byte_spin_lock; + pv_lock_ops.spin_trylock = __byte_spin_trylock; + pv_lock_ops.spin_unlock = __byte_spin_unlock; +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, -- cgit v1.2.3 From 56397f8dadb40055479a8ffff23f21a890098a31 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 7 Jul 2008 12:07:52 -0700 Subject: xen: use lock-byte spinlock implementation Switch to using the lock-byte spinlock implementation, to avoid the worst of the performance hit from ticket locks. Signed-off-by: Jeremy Fitzhardinge Cc: Jens Axboe Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Petr Tesarik Cc: Virtualization Cc: Xen devel Cc: Thomas Friebel Signed-off-by: Ingo Molnar --- arch/x86/xen/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index f702199312a..a8ebafc09d4 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -430,4 +430,5 @@ void __init xen_smp_init(void) { smp_ops = xen_smp_ops; xen_fill_possible_map(); + paravirt_use_bytelocks(); } -- cgit v1.2.3 From 2d9e1e2f58b5612aa4eab0ab54c84308a29dbd79 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 7 Jul 2008 12:07:53 -0700 Subject: xen: implement Xen-specific spinlocks The standard ticket spinlocks are very expensive in a virtual environment, because their performance depends on Xen's scheduler giving vcpus time in the order that they're supposed to take the spinlock. This implements a Xen-specific spinlock, which should be much more efficient. The fast-path is essentially the old Linux-x86 locks, using a single lock byte. The locker decrements the byte; if the result is 0, then they have the lock. If the lock is negative, then locker must spin until the lock is positive again. When there's contention, the locker spin for 2^16[*] iterations waiting to get the lock. If it fails to get the lock in that time, it adds itself to the contention count in the lock and blocks on a per-cpu event channel. When unlocking the spinlock, the locker looks to see if there's anyone blocked waiting for the lock by checking for a non-zero waiter count. If there's a waiter, it traverses the per-cpu "lock_spinners" variable, which contains which lock each CPU is waiting on. It picks one CPU waiting on the lock and sends it an event to wake it up. This allows efficient fast-path spinlock operation, while allowing spinning vcpus to give up their processor time while waiting for a contended lock. [*] 2^16 iterations is threshold at which 98% locks have been taken according to Thomas Friebel's Xen Summit talk "Preventing Guests from Spinning Around". Therefore, we'd expect the lock and unlock slow paths will only be entered 2% of the time. Signed-off-by: Jeremy Fitzhardinge Cc: Jens Axboe Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Petr Tesarik Cc: Virtualization Cc: Xen devel Cc: Thomas Friebel Cc: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/xen/smp.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 171 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index a8ebafc09d4..e693812ac59 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -15,6 +15,7 @@ * This does not handle HOTPLUG_CPU yet. */ #include +#include #include #include @@ -35,6 +36,8 @@ #include "xen-ops.h" #include "mmu.h" +static void __cpuinit xen_init_lock_cpu(int cpu); + cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); @@ -179,6 +182,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; + xen_init_lock_cpu(0); + smp_store_cpu_info(0); cpu_data(0).x86_max_cores = 1; set_cpu_sibling_map(0); @@ -301,6 +306,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); + xen_init_lock_cpu(cpu); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; @@ -413,6 +419,170 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +struct xen_spinlock { + unsigned char lock; /* 0 -> free; 1 -> locked */ + unsigned short spinners; /* count of waiting cpus */ +}; + +static int xen_spin_is_locked(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + return xl->lock != 0; +} + +static int xen_spin_is_contended(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + /* Not strictly true; this is only the count of contended + lock-takers entering the slow path. */ + return xl->spinners != 0; +} + +static int xen_spin_trylock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + u8 old = 1; + + asm("xchgb %b0,%1" + : "+q" (old), "+m" (xl->lock) : : "memory"); + + return old == 0; +} + +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; +static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); + +static inline void spinning_lock(struct xen_spinlock *xl) +{ + __get_cpu_var(lock_spinners) = xl; + wmb(); /* set lock of interest before count */ + asm(LOCK_PREFIX " incw %0" + : "+m" (xl->spinners) : : "memory"); +} + +static inline void unspinning_lock(struct xen_spinlock *xl) +{ + asm(LOCK_PREFIX " decw %0" + : "+m" (xl->spinners) : : "memory"); + wmb(); /* decrement count before clearing lock */ + __get_cpu_var(lock_spinners) = NULL; +} + +static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int irq = __get_cpu_var(lock_kicker_irq); + int ret; + + /* If kicker interrupts not initialized yet, just spin */ + if (irq == -1) + return 0; + + /* announce we're spinning */ + spinning_lock(xl); + + /* clear pending */ + xen_clear_irq_pending(irq); + + /* check again make sure it didn't become free while + we weren't looking */ + ret = xen_spin_trylock(lock); + if (ret) + goto out; + + /* block until irq becomes pending */ + xen_poll_irq(irq); + kstat_this_cpu.irqs[irq]++; + +out: + unspinning_lock(xl); + return ret; +} + +static void xen_spin_lock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int timeout; + u8 oldval; + + do { + timeout = 1 << 10; + + asm("1: xchgb %1,%0\n" + " testb %1,%1\n" + " jz 3f\n" + "2: rep;nop\n" + " cmpb $0,%0\n" + " je 1b\n" + " dec %2\n" + " jnz 2b\n" + "3:\n" + : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) + : "1" (1) + : "memory"); + + } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); +} + +static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) +{ + int cpu; + + for_each_online_cpu(cpu) { + /* XXX should mix up next cpu selection */ + if (per_cpu(lock_spinners, cpu) == xl) { + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); + break; + } + } +} + +static void xen_spin_unlock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + smp_wmb(); /* make sure no writes get moved after unlock */ + xl->lock = 0; /* release lock */ + + /* make sure unlock happens before kick */ + barrier(); + + if (unlikely(xl->spinners)) + xen_spin_unlock_slow(xl); +} + +static __cpuinit void xen_init_lock_cpu(int cpu) +{ + int irq; + const char *name; + + name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); + irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, + cpu, + xen_reschedule_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + name, + NULL); + + if (irq >= 0) { + disable_irq(irq); /* make sure it's never delivered */ + per_cpu(lock_kicker_irq, cpu) = irq; + } + + printk("cpu %d spinlock event irq %d\n", cpu, irq); +} + +static void __init xen_init_spinlocks(void) +{ + pv_lock_ops.spin_is_locked = xen_spin_is_locked; + pv_lock_ops.spin_is_contended = xen_spin_is_contended; + pv_lock_ops.spin_lock = xen_spin_lock; + pv_lock_ops.spin_trylock = xen_spin_trylock; + pv_lock_ops.spin_unlock = xen_spin_unlock; +} + static const struct smp_ops xen_smp_ops __initdata = { .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_smp_prepare_cpus, @@ -430,5 +600,5 @@ void __init xen_smp_init(void) { smp_ops = xen_smp_ops; xen_fill_possible_map(); - paravirt_use_bytelocks(); + xen_init_spinlocks(); } -- cgit v1.2.3 From 4bb689eee12ceb6d669a0c9a519037c049a8af38 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Jul 2008 14:33:33 +0200 Subject: x86: paravirt spinlocks, !CONFIG_SMP build fixes Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index bba4041bb7f..6aa8aed06d5 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -270,11 +270,13 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) void __init paravirt_use_bytelocks(void) { +#ifdef CONFIG_SMP pv_lock_ops.spin_is_locked = __byte_spin_is_locked; pv_lock_ops.spin_is_contended = __byte_spin_is_contended; pv_lock_ops.spin_lock = __byte_spin_lock; pv_lock_ops.spin_trylock = __byte_spin_trylock; pv_lock_ops.spin_unlock = __byte_spin_unlock; +#endif } struct pv_info pv_info = { @@ -461,12 +463,14 @@ struct pv_mmu_ops pv_mmu_ops = { }; struct pv_lock_ops pv_lock_ops = { +#ifdef CONFIG_SMP .spin_is_locked = __ticket_spin_is_locked, .spin_is_contended = __ticket_spin_is_contended, .spin_lock = __ticket_spin_lock, .spin_trylock = __ticket_spin_trylock, .spin_unlock = __ticket_spin_unlock, +#endif }; EXPORT_SYMBOL_GPL(pv_time_ops); -- cgit v1.2.3 From 9af98578d6af588f52d0dacd64fe42caa405a327 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Jul 2008 14:39:15 +0200 Subject: x86: paravirt spinlocks, modular build fix fix: MODPOST 408 modules ERROR: "pv_lock_ops" [net/dccp/dccp.ko] undefined! ERROR: "pv_lock_ops" [fs/jbd2/jbd2.ko] undefined! ERROR: "pv_lock_ops" [drivers/media/common/saa7146_vv.ko] undefined! Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 6aa8aed06d5..3edfd7af22a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -472,6 +472,7 @@ struct pv_lock_ops pv_lock_ops = { .spin_unlock = __ticket_spin_unlock, #endif }; +EXPORT_SYMBOL_GPL(pv_lock_ops); EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); -- cgit v1.2.3 From 34646bca474142e1424e5f6c4a33cb2ba0930ea1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Jul 2008 15:42:09 +0200 Subject: x86, paravirt-spinlocks: fix boot hang the paravirt-spinlock patches caused a boot hang with this config: http://redhat.com/~mingo/misc/config-Wed_Jul__9_14_47_04_CEST_2008.bad i have bisected it down to: | commit e17b58c2e85bc2ad2afc07fb8d898017c2b75ed1 | Author: Jeremy Fitzhardinge | Date: Mon Jul 7 12:07:53 2008 -0700 | | xen: implement Xen-specific spinlocks i.e. applying that patch alone causes the hang. The hang happens in the ftrace self-test: initcall utsname_sysctl_init+0x0/0x19 returned 0 after 0 msecs calling init_sched_switch_trace+0x0/0x4c Testing tracer sched_switch: PASSED initcall init_sched_switch_trace+0x0/0x4c returned 0 after 167 msecs calling init_function_trace+0x0/0x12 Testing tracer ftrace: [hard hang] it should have continued like this: Testing tracer ftrace: PASSED initcall init_function_trace+0x0/0x12 returned 0 after 198 msecs calling init_irqsoff_tracer+0x0/0x14 Testing tracer irqsoff: PASSED initcall init_irqsoff_tracer+0x0/0x14 returned 0 after 3 msecs calling init_mmio_trace+0x0/0x12 initcall init_mmio_trace+0x0/0x12 returned 0 after 0 msecs the problem is that such lowlevel primitives as spinlocks should never be built with -pg (which ftrace does). Marking paravirt.o as non-pg and marking all spinlock ops as always-inline solve the hang. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5112c84f542..78d52171400 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -7,10 +7,11 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FTRACE -# Do not profile debug utilities +# Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc_64.o = -pg CFLAGS_REMOVE_tsc_32.o = -pg CFLAGS_REMOVE_rtc.o = -pg +CFLAGS_REMOVE_paravirt.o = -pg endif # -- cgit v1.2.3 From 6718d0d6da2749d3bff522e6057e97e6aa85e4d1 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 9 Jul 2008 01:07:02 -0700 Subject: x86 ptrace: block-step fix The enable_single_step() logic bails out early if TF is already set. That skips some of the bookkeeping that keeps things straight. This makes PTRACE_SINGLEBLOCK break the behavior of a user task that was already setting TF itself in user mode. Fix the bookkeeping to notice the old TF setting as it should. Test case at: http://sources.redhat.com/cgi-bin/cvsweb.cgi/~checkout~/tests/ptrace-tests/tests/step-jump-cont-strict.c?cvsroot=systemtap Signed-off-by: Roland McGrath --- arch/x86/kernel/step.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 92c20fee678..0d2cb363ea7 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -105,6 +105,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) static int enable_single_step(struct task_struct *child) { struct pt_regs *regs = task_pt_regs(child); + unsigned long oflags; /* * Always set TIF_SINGLESTEP - this guarantees that @@ -113,11 +114,7 @@ static int enable_single_step(struct task_struct *child) */ set_tsk_thread_flag(child, TIF_SINGLESTEP); - /* - * If TF was already set, don't do anything else - */ - if (regs->flags & X86_EFLAGS_TF) - return 0; + oflags = regs->flags; /* Set TF on the kernel stack.. */ regs->flags |= X86_EFLAGS_TF; @@ -126,9 +123,22 @@ static int enable_single_step(struct task_struct *child) * ..but if TF is changed by the instruction we will trace, * don't mark it as being "us" that set it, so that we * won't clear it by hand later. + * + * Note that if we don't actually execute the popf because + * of a signal arriving right now or suchlike, we will lose + * track of the fact that it really was "us" that set it. */ - if (is_setting_trap_flag(child, regs)) + if (is_setting_trap_flag(child, regs)) { + clear_tsk_thread_flag(child, TIF_FORCED_TF); return 0; + } + + /* + * If TF was already set, check whether it was us who set it. + * If not, we should never attempt a block step. + */ + if (oflags & X86_EFLAGS_TF) + return test_tsk_thread_flag(child, TIF_FORCED_TF); set_tsk_thread_flag(child, TIF_FORCED_TF); -- cgit v1.2.3 From 64f097331928b01d704047c1dbc738bb6d2a9bf9 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 9 Jul 2008 01:33:14 -0700 Subject: x86 ptrace: unify TIF_SINGLESTEP This unifies the treatment of TIF_SINGLESTEP on i386 and x86_64. The bit is now excluded from _TIF_WORK_MASK on i386 as it has been on x86_64. This means the do_notify_resume() path using it is never used, so TIF_SINGLESTEP is not cleared on returning to user mode. Both now leave TIF_SINGLESTEP set when returning to user, so that it's already set on an int $0x80 system call entry. This removes the need for testing TF on the system_call path. Doing it this way fixes the regression for PTRACE_SINGLESTEP into a sigreturn syscall, introduced by commit 1e2e99f0e4aa6363e8515ed17011c210c8f1b52a. The clear_TF_reenable case that sets TIF_SINGLESTEP can only happen on a non-exception kernel entry, i.e. sysenter/syscall instruction. That will always get to the syscall exit tracing path. Signed-off-by: Roland McGrath --- arch/x86/kernel/entry_32.S | 4 ---- arch/x86/kernel/signal_32.c | 6 ------ arch/x86/kernel/signal_64.c | 6 ------ 3 files changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6bc07f0f120..0ad987d02b7 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -383,10 +383,6 @@ syscall_exit: # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF - testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit - jz no_singlestep - orl $_TIF_SINGLESTEP,TI_flags(%ebp) -no_singlestep: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d9237363096..295b5f5c938 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -657,12 +657,6 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { - /* Pending single-step? */ - if (thread_info_flags & _TIF_SINGLESTEP) { - regs->flags |= X86_EFLAGS_TF; - clear_thread_flag(TIF_SINGLESTEP); - } - /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index e53b267662e..bf87684474f 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -487,12 +487,6 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { - /* Pending single-step? */ - if (thread_info_flags & _TIF_SINGLESTEP) { - regs->flags |= X86_EFLAGS_TF; - clear_thread_flag(TIF_SINGLESTEP); - } - #ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) -- cgit v1.2.3 From d4d67150165df8bf1cc05e532f6efca96f907cab Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 9 Jul 2008 02:38:07 -0700 Subject: x86 ptrace: unify syscall tracing This unifies and cleans up the syscall tracing code on i386 and x86_64. Using a single function for entry and exit tracing on 32-bit made the do_syscall_trace() into some terrible spaghetti. The logic is clear and simple using separate syscall_trace_enter() and syscall_trace_leave() functions as on 64-bit. The unification adds PTRACE_SYSEMU and PTRACE_SYSEMU_SINGLESTEP support on x86_64, for 32-bit ptrace() callers and for 64-bit ptrace() callers tracing either 32-bit or 64-bit tasks. It behaves just like 32-bit. Changing syscall_trace_enter() to return the syscall number shortens all the assembly paths, while adding the SYSEMU feature in a simple way. Signed-off-by: Roland McGrath --- arch/x86/ia32/ia32entry.S | 17 +++--- arch/x86/kernel/entry_32.S | 19 +++--- arch/x86/kernel/entry_64.S | 14 +++-- arch/x86/kernel/ptrace.c | 141 +++++++++++++++------------------------------ 4 files changed, 71 insertions(+), 120 deletions(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 20371d0635e..8796d190525 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -37,6 +37,11 @@ movq %rax,R8(%rsp) .endm + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %eax because syscall_trace_enter() returned + * the value it wants us to use in the table lookup. + */ .macro LOAD_ARGS32 offset movl \offset(%rsp),%r11d movl \offset+8(%rsp),%r10d @@ -46,7 +51,6 @@ movl \offset+48(%rsp),%edx movl \offset+56(%rsp),%esi movl \offset+64(%rsp),%edi - movl \offset+72(%rsp),%eax .endm .macro CFI_STARTPROC32 simple @@ -137,13 +141,12 @@ ENTRY(ia32_sysenter_target) .previous GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ - TI_flags(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz sysenter_tracesys -sysenter_do_call: cmpl $(IA32_NR_syscalls-1),%eax ja ia32_badsys +sysenter_do_call: IA32_ARG_FIXUP 1 call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) @@ -242,8 +245,7 @@ ENTRY(ia32_cstar_target) .previous GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ - TI_flags(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz cstar_tracesys cstar_do_call: @@ -336,8 +338,7 @@ ENTRY(ia32_syscall) SAVE_ARGS 0,0,1 GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status(%r10) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ - TI_flags(%r10) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 0ad987d02b7..cadf73f70d3 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -332,7 +332,7 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -370,7 +370,7 @@ ENTRY(system_call) GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -510,12 +510,8 @@ END(work_pending) syscall_trace_entry: movl $-ENOSYS,PT_EAX(%esp) movl %esp, %eax - xorl %edx,%edx - call do_syscall_trace - cmpl $0, %eax - jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, - # so must skip actual syscall - movl PT_ORIG_EAX(%esp), %eax + call syscall_trace_enter + /* What it returned is what we'll actually use. */ cmpl $(nr_syscalls), %eax jnae syscall_call jmp syscall_exit @@ -524,14 +520,13 @@ END(syscall_trace_entry) # perform syscall exit tracing ALIGN syscall_exit_work: - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl + testb $_TIF_WORK_SYSCALL_EXIT, %cl jz work_pending TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call + ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call # schedule() instead movl %esp, %eax - movl $1, %edx - call do_syscall_trace + call syscall_trace_leave jmp resume_userspace END(syscall_exit_work) CFI_ENDPROC diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ae63e584c34..63001c6ecf6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -349,8 +349,7 @@ ENTRY(system_call_after_swapgs) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ - TI_flags(%rcx) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -430,7 +429,12 @@ tracesys: FIXUP_TOP_OF_STACK %rdi movq %rsp,%rdi call syscall_trace_enter - LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %rax because syscall_trace_enter() returned + * the value it wants us to use in the table lookup. + */ + LOAD_ARGS ARGOFFSET, 1 RESTORE_REST cmpq $__NR_syscall_max,%rax ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ @@ -483,7 +487,7 @@ int_very_careful: ENABLE_INTERRUPTS(CLBR_NONE) SAVE_REST /* Check for syscall exit trace */ - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx + testl $_TIF_WORK_SYSCALL_EXIT,%edx jz int_signal pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -491,7 +495,7 @@ int_very_careful: call syscall_trace_leave popq %rdi CFI_ADJUST_CFA_OFFSET -8 - andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi + andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi jmp int_restore_rest int_signal: diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 77040b6070e..34e77b16a42 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -#ifdef CONFIG_X86_32 - void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) { struct siginfo info; @@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) force_sig_info(SIGTRAP, &info, tsk); } -/* notification of system call entry/exit - * - triggered by current->work.syscall_trace - */ -int do_syscall_trace(struct pt_regs *regs, int entryexit) -{ - int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); - /* - * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall - * interception - */ - int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); - int ret = 0; - - /* do the secure computing check first */ - if (!entryexit) - secure_computing(regs->orig_ax); - - if (unlikely(current->audit_context)) { - if (entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), - regs->ax); - /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only - * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is - * not used, entry.S will call us only on syscall exit, not - * entry; so when TIF_SYSCALL_AUDIT is used we must avoid - * calling send_sigtrap() on syscall entry. - * - * Note that when PTRACE_SYSEMU_SINGLESTEP is used, - * is_singlestep is false, despite his name, so we will still do - * the correct thing. - */ - else if (is_singlestep) - goto out; - } - - if (!(current->ptrace & PT_PTRACED)) - goto out; - - /* If a process stops on the 1st tracepoint with SYSCALL_TRACE - * and then is resumed with SYSEMU_SINGLESTEP, it will come in - * here. We have to check this and return */ - if (is_sysemu && entryexit) - return 0; - - /* Fake a debug trap */ - if (is_singlestep) - send_sigtrap(current, regs, 0); - - if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) - goto out; - - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - /* Note that the debugger could change the result of test_thread_flag!*/ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - ret = is_sysemu; -out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, - regs->bx, regs->cx, regs->dx, regs->si); - if (ret == 0) - return 0; - - regs->orig_ax = -1; /* force skip of syscall restarting */ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); - return 1; -} - -#else /* CONFIG_X86_64 */ - static void syscall_trace(struct pt_regs *regs) { + if (!(current->ptrace & PT_PTRACED)) + return; #if 0 printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", @@ -1481,39 +1400,71 @@ static void syscall_trace(struct pt_regs *regs) } } -asmlinkage void syscall_trace_enter(struct pt_regs *regs) +#ifdef CONFIG_X86_32 +# define IS_IA32 1 +#elif defined CONFIG_IA32_EMULATION +# define IS_IA32 test_thread_flag(TIF_IA32) +#else +# define IS_IA32 0 +#endif + +/* + * We must return the syscall number to actually look up in the table. + * This can be -1L to skip running any syscall at all. + */ +asmregparm long syscall_trace_enter(struct pt_regs *regs) { + long ret = 0; + /* do the secure computing check first */ secure_computing(regs->orig_ax); - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) + if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + ret = -1L; + + if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) syscall_trace(regs); if (unlikely(current->audit_context)) { - if (test_thread_flag(TIF_IA32)) { + if (IS_IA32) audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, regs->bx, regs->cx, regs->dx, regs->si); - } else { +#ifdef CONFIG_X86_64 + else audit_syscall_entry(AUDIT_ARCH_X86_64, regs->orig_ax, regs->di, regs->si, regs->dx, regs->r10); - } +#endif } + + return ret ?: regs->orig_ax; } -asmlinkage void syscall_trace_leave(struct pt_regs *regs) +asmregparm void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); - if ((test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SINGLESTEP)) - && (current->ptrace & PT_PTRACED)) + if (test_thread_flag(TIF_SYSCALL_TRACE)) syscall_trace(regs); -} -#endif /* CONFIG_X86_32 */ + /* + * If TIF_SYSCALL_EMU is set, we only get here because of + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). + * We already reported this syscall instruction in + * syscall_trace_enter(), so don't do any more now. + */ + if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) + return; + + /* + * If we are single-stepping, synthesize a trap to follow the + * system call instruction. + */ + if (test_thread_flag(TIF_SINGLESTEP) && + (current->ptrace & PT_PTRACED)) + send_sigtrap(current, regs, 0); +} -- cgit v1.2.3 From 380fdd7585a4c2f41b48925eba85c0654b7b858b Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 9 Jul 2008 02:39:29 -0700 Subject: x86 ptrace: user-sets-TF nits This closes some arcane holes in single-step handling that can arise only when user programs set TF directly (via popf or sigreturn) and then use vDSO (syscall/sysenter) system call entry. In those entry paths, the clear_TF_reenable case hits and we must check TIF_SINGLESTEP to be sure our bookkeeping stays correct wrt the user's view of TF. Signed-off-by: Roland McGrath --- arch/x86/kernel/ptrace.c | 10 ++++++++++ arch/x86/kernel/step.c | 13 +++++++++++++ 2 files changed, 23 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 34e77b16a42..e37dccce85d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1416,6 +1416,16 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + /* + * If we stepped into a sysenter/syscall insn, it trapped in + * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. + * If user-mode had set TF itself, then it's still clear from + * do_debug() and we need to set it again to restore the user + * state. If we entered on the slow path, TF was already set. + */ + if (test_thread_flag(TIF_SINGLESTEP)) + regs->flags |= X86_EFLAGS_TF; + /* do the secure computing check first */ secure_computing(regs->orig_ax); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 0d2cb363ea7..e8b9863ef8c 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -107,6 +107,19 @@ static int enable_single_step(struct task_struct *child) struct pt_regs *regs = task_pt_regs(child); unsigned long oflags; + /* + * If we stepped into a sysenter/syscall insn, it trapped in + * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. + * If user-mode had set TF itself, then it's still clear from + * do_debug() and we need to set it again to restore the user + * state so we don't wrongly set TIF_FORCED_TF below. + * If enable_single_step() was used last and that is what + * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are + * already set and our bookkeeping is fine. + */ + if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP))) + regs->flags |= X86_EFLAGS_TF; + /* * Always set TIF_SINGLESTEP - this guarantees that * we single-step system calls etc.. This will also -- cgit v1.2.3 From fab3b58d3b242b5903f78d60d86803a8aecdf6de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 17 Jul 2008 13:50:15 +0200 Subject: x86 reboot quirks: add Dell Precision WorkStation T5400 as reported in: "reboot=bios is mandatory on Dell T5400 server." http://bugzilla.kernel.org/show_bug.cgi?id=11108 add a DMI reboot quirk. Signed-off-by: Ingo Molnar Cc: --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8a62160e15..9dcf39c0297 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), }, }, + { /* Handle problems with rebooting on Dell T5400's */ + .callback = set_bios_reboot, + .ident = "Dell Precision T5400", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), + }, + }, { /* Handle problems with rebooting on HP laptops */ .callback = set_bios_reboot, .ident = "HP Compaq Laptop", -- cgit v1.2.3 From 93a0886e2368eafb9df5e2021fb185195cee88b2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 15 Jul 2008 13:43:42 -0700 Subject: x86, xen, power: fix up config dependencies on PM Xen save/restore needs bits of code enabled by PM_SLEEP, and PM_SLEEP depends on PM. So make XEN_SAVE_RESTORE depend on PM and PM_SLEEP depend on XEN_SAVE_RESTORE. Signed-off-by: Jeremy Fitzhardinge Acked-by: Rafael J. Wysocki Signed-off-by: Ingo Molnar --- arch/x86/xen/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 20b49729bed..3815e425f47 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -23,3 +23,8 @@ config XEN_MAX_DOMAIN_MEMORY according to the maximum possible memory size of a Xen domain. This array uses 1 page per gigabyte, so there's no need to be too stingy here. + +config XEN_SAVE_RESTORE + bool + depends on PM + default y \ No newline at end of file -- cgit v1.2.3 From 60192db82952ad56ef7bbc4a318e2041ca65ba7d Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 17 Jul 2008 11:11:17 -0700 Subject: [IA64] improper printk format in acpi-cpufreq When dprintk is enabled the following warnings are generated: arch/ia64/kernel/cpufreq/acpi-cpufreq.c: In function 'processor_set_pstate': arch/ia64/kernel/cpufreq/acpi-cpufreq.c:54: warning: format '%x' expects type 'unsigned int', but argumen t 3 has type 's64' arch/ia64/kernel/cpufreq/acpi-cpufreq.c: In function 'processor_get_pstate': arch/ia64/kernel/cpufreq/acpi-cpufreq.c:76: warning: format '%x' expects type 'unsigned int', but argumen t 2 has type 's64' Signed-off-by: Denis V. Lunev Signed-off-by: Tony Luck --- arch/ia64/kernel/cpufreq/acpi-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c index b8498ea6206..7b435451b3d 100644 --- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -51,7 +51,7 @@ processor_set_pstate ( retval = ia64_pal_set_pstate((u64)value); if (retval) { - dprintk("Failed to set freq to 0x%x, with error 0x%x\n", + dprintk("Failed to set freq to 0x%x, with error 0x%lx\n", value, retval); return -ENODEV; } @@ -74,7 +74,7 @@ processor_get_pstate ( if (retval) dprintk("Failed to get current freq with " - "error 0x%x, idx 0x%x\n", retval, *value); + "error 0x%lx, idx 0x%x\n", retval, *value); return (int)retval; } -- cgit v1.2.3 From 740a8de0796dd12890b3c8ddcfabfcb528b78d40 Mon Sep 17 00:00:00 2001 From: "Akiyama, Nobuyuki" Date: Thu, 17 Jul 2008 11:22:01 -0700 Subject: [IA64] adding parameter check to module_free() module_free() refers the first parameter before checking. But it is called like below(in kernel/kprobes). The first parameter is always NULL. This happens when many probe points(>1024) are set by kprobes. I encountered this with using SystemTap. It can set many probes easily. static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) { ... if (kip->nused == 0) { hlist_del(&kip->hlist); if (hlist_empty(&kprobe_insn_pages)) { ... } else { module_free(NULL, kip->insns); //<<< 1st param always NULL kfree(kip); } return 1; } return 0; } Signed-off-by: Akiyama, Nobuyuki Signed-off-by: Tony Luck --- arch/ia64/kernel/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index e83e2ea3b3e..29aad349e0c 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -321,7 +321,8 @@ module_alloc (unsigned long size) void module_free (struct module *mod, void *module_region) { - if (mod->arch.init_unw_table && module_region == mod->module_init) { + if (mod && mod->arch.init_unw_table && + module_region == mod->module_init) { unw_remove_unwind_table(mod->arch.init_unw_table); mod->arch.init_unw_table = NULL; } -- cgit v1.2.3 From efc7508c9e29944fb3d9edf166d3d584557c33d1 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 16 Jul 2008 12:47:08 -0600 Subject: [IA64] Avoid overflowing ia64_cpu_to_sapicid in acpi_map_lsapic() acpi_map_lsapic tries to stuff a long into ia64_cpu_to_sapicid[], which can only hold ints, so let's fix that. We need to update the signature of acpi_map_cpu2node() too. Signed-off-by: Alex Chiang Signed-off-by: Tony Luck --- arch/ia64/kernel/acpi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 43687cc60df..5d1eb7ee2bf 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -774,7 +774,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) */ #ifdef CONFIG_ACPI_HOTPLUG_CPU static -int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) +int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int pxm_id; @@ -854,8 +854,7 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu) union acpi_object *obj; struct acpi_madt_local_sapic *lsapic; cpumask_t tmp_map; - long physid; - int cpu; + int cpu, physid; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return -EINVAL; -- cgit v1.2.3 From 4fdf08b5bf8d449cc9897395895157c6ff8ddc41 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 17 Jul 2008 11:29:24 -0700 Subject: x86: unify and correct the GDT_ENTRY() macro Merge the GDT_ENTRY() macro between arch/x86/boot/pm.c and arch/x86/kernel/acpi/sleep.c and put the new one in . While we're at it, correct the bitmasks for the limit and flags. The new version relies on using ULL constants in order to cause type promotion rather than explicit casts; this avoids having to include in . Signed-off-by: H. Peter Anvin --- arch/x86/boot/pm.c | 6 ------ arch/x86/kernel/acpi/sleep.c | 10 +--------- 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c index 328956fdb59..85a1cd8a8ff 100644 --- a/arch/x86/boot/pm.c +++ b/arch/x86/boot/pm.c @@ -98,12 +98,6 @@ static void reset_coprocessor(void) /* * Set up the GDT */ -#define GDT_ENTRY(flags, base, limit) \ - (((u64)(base & 0xff000000) << 32) | \ - ((u64)flags << 40) | \ - ((u64)(limit & 0x00ff0000) << 32) | \ - ((u64)(base & 0x00ffffff) << 16) | \ - ((u64)(limit & 0x0000ffff))) struct gdt_ptr { u16 len; diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 868de3d5c39..a3ddad18aaa 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "realmode/wakeup.h" #include "sleep.h" @@ -23,15 +24,6 @@ static unsigned long acpi_realmode; static char temp_stack[10240]; #endif -/* XXX: this macro should move to asm-x86/segment.h and be shared with the - boot code... */ -#define GDT_ENTRY(flags, base, limit) \ - (((u64)(base & 0xff000000) << 32) | \ - ((u64)flags << 40) | \ - ((u64)(limit & 0x00ff0000) << 32) | \ - ((u64)(base & 0x00ffffff) << 16) | \ - ((u64)(limit & 0x0000ffff))) - /** * acpi_save_state_mem - save kernel state * -- cgit v1.2.3 From fb86611f8f3251865784d5938a485a0238ec1427 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Thu, 26 Jun 2008 14:53:11 +0200 Subject: [IA64] Remove experimental status of kdump This patch removes the experimental status of kdump on IA64. kdump is on IA64 now since more than one year and it has proven to be stable. Signed-off-by: Bernhard Walle Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 18bcc10903b..451f2ffb137 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -540,8 +540,8 @@ config KEXEC strongly in flux, so no good recommendation can be made. config CRASH_DUMP - bool "kernel crash dumps (EXPERIMENTAL)" - depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + bool "kernel crash dumps" + depends on IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) help Generate crash dump after being started by kexec. -- cgit v1.2.3 From 64d206d896ff70b828138577d5ff39deda5f1c4d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Jul 2008 00:26:59 +0200 Subject: x86: rename CONFIG_NONPROMISC_DEVMEM to CONFIG_PROMISC_DEVMEM Linus observed: > The real bug is that we shouldn't have "double negatives", and > certainly not negative config options. Making that "promiscuous > /dev/mem" option a negated thing as a config option was bad. right ... lets rename this option. There should never be a negation in config options. [ that reminds me of CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER, but that is for another commit ;-) ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 7 ++++--- arch/x86/mm/pat.c | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ae36bfa814e..f0cf5d99079 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -5,10 +5,11 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config NONPROMISC_DEVMEM - bool "Filter access to /dev/mem" +config PROMISC_DEVMEM + bool "Allow unlimited access to /dev/mem" + default y help - If this option is left off, you allow userspace access to all + If this option is left on, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental access to this is obviously disastrous, but specific access can be used by people debugging the kernel. diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d4585077977..c34dc483839 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -373,8 +373,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, return vma_prot; } -#ifdef CONFIG_NONPROMISC_DEVMEM -/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/ +#ifndef CONFIG_PROMISC_DEVMEM +/* This check is done in drivers/char/mem.c in case of !PROMISC_DEVMEM*/ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; @@ -398,7 +398,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } return 1; } -#endif /* CONFIG_NONPROMISC_DEVMEM */ +#endif /* CONFIG_PROMISC_DEVMEM */ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) -- cgit v1.2.3 From 6879827f4e08da219c99b91e4e1d793a924103e3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Jul 2008 01:21:53 +0200 Subject: x86: remove arch/x86/kernel/smpcommon_32.c Yinghai Lu noticed that arch/x86/kernel/smpcommon_32.c got renamed to arch/x86/kernel/smpcommon.c but the old almost-empty file stayed around. Zap it. Reported-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpcommon_32.c | 1 - 1 file changed, 1 deletion(-) delete mode 100644 arch/x86/kernel/smpcommon_32.c (limited to 'arch') diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c deleted file mode 100644 index 8b137891791..00000000000 --- a/arch/x86/kernel/smpcommon_32.c +++ /dev/null @@ -1 +0,0 @@ - -- cgit v1.2.3 From c61c65cdcd1021cfbd7be8685ff1cf4f86c68c44 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 5 Jun 2008 11:40:58 -0700 Subject: sparc/kernel/: possible cleanups This patch contains the following possible cleanups: - make the following needlessly global code static: - apc.c: apc_swift_idle() - ebus.c: ebus_blacklist_irq() - ebus.c: fill_ebus_child() - ebus.c: fill_ebus_device() - entry.S: syscall_is_too_hard - etra: tsetup_sun4c_stackchk - head.S: cputyp - head.S: prom_vector_p - idprom.c: Sun_Machines[] - ioport.c: _sparc_find_resource() - ioport.c: create_proc_read_entry() - irq.c: struct sparc_irq[] - rtrap.S: sun4c_rett_stackchk - setup.c: prom_sync_me() - setup.c: boot_flags - sun4c_irq.c: sun4c_sbint_to_irq() - sun4d_irq.c: sbus_tid[] - sun4d_irq.c: struct sbus_actions - sun4d_irq.c: sun4d_sbint_to_irq() - sun4m_irq.c: sun4m_sbint_to_irq() - sun4m_irq.c: sun4m_get_irqmask() - sun4m_irq.c: sun4m_timers - sun4m_smp.c: smp4m_cross_call() - sun4m_smp.c: smp4m_blackbox_id() - sun4m_smp.c: smp4m_blackbox_current() - time.c: sp_clock_typ - time.c: sbus_time_init() - traps.c: instruction_dump() - wof.S: spwin_sun4c_stackchk - wuf.S: sun4c_fwin_stackchk - #if 0 the following unused code: - process.c: sparc_backtrace_lock - process.c: __show_backtrace() - process.c: show_backtrace() - process.c: smp_show_backtrace_all_cpus() - remove the following unused code: - entry.S: __handle_exception - smp.c: smp_num_cpus - smp.c: smp_activated - smp.c: __cpu_number_map[] - smp.c: __cpu_logical_map[] - smp.c: bitops_spinlock - traps.c: trap_curbuf - traps.c: trapbuf[] - traps.c: linux_smp_still_initting - traps.c: thiscpus_tbr - traps.c: thiscpus_mid Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- arch/sparc/kernel/apc.c | 2 +- arch/sparc/kernel/ebus.c | 9 +++++---- arch/sparc/kernel/entry.S | 4 +--- arch/sparc/kernel/etrap.S | 1 - arch/sparc/kernel/head.S | 2 -- arch/sparc/kernel/idprom.c | 2 +- arch/sparc/kernel/ioport.c | 13 +++++++------ arch/sparc/kernel/irq.c | 2 +- arch/sparc/kernel/process.c | 3 ++- arch/sparc/kernel/rtrap.S | 2 -- arch/sparc/kernel/setup.c | 4 ++-- arch/sparc/kernel/smp.c | 7 ------- arch/sparc/kernel/sun4c_irq.c | 3 ++- arch/sparc/kernel/sun4d_irq.c | 7 ++++--- arch/sparc/kernel/sun4m_irq.c | 7 ++++--- arch/sparc/kernel/sun4m_smp.c | 9 +++++---- arch/sparc/kernel/time.c | 4 ++-- arch/sparc/kernel/traps.c | 9 +-------- arch/sparc/kernel/wof.S | 1 - arch/sparc/kernel/wuf.S | 1 - 20 files changed, 38 insertions(+), 54 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index 6707422c984..54f7ccd7455 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -56,7 +56,7 @@ __setup("apc=", apc_setup); * CPU idle callback function * See .../arch/sparc/kernel/process.c */ -void apc_swift_idle(void) +static void apc_swift_idle(void) { #ifdef APC_DEBUG_LED set_auxio(0x00, AUXIO_LED); diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c index 92c6fc07e59..97294232259 100644 --- a/arch/sparc/kernel/ebus.c +++ b/arch/sparc/kernel/ebus.c @@ -69,7 +69,7 @@ static inline unsigned long ebus_alloc(size_t size) /* */ -int __init ebus_blacklist_irq(const char *name) +static int __init ebus_blacklist_irq(const char *name) { struct ebus_device_irq *dp; @@ -83,8 +83,8 @@ int __init ebus_blacklist_irq(const char *name) return 0; } -void __init fill_ebus_child(struct device_node *dp, - struct linux_ebus_child *dev) +static void __init fill_ebus_child(struct device_node *dp, + struct linux_ebus_child *dev) { const int *regs; const int *irqs; @@ -144,7 +144,8 @@ void __init fill_ebus_child(struct device_node *dp, } } -void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_device *dev) +static void __init fill_ebus_device(struct device_node *dp, + struct linux_ebus_device *dev) { const struct linux_prom_registers *regs; struct linux_ebus_child *child; diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 4bcfe54f878..55d3be1b5d8 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -1317,7 +1317,6 @@ linux_sparc_syscall: bne linux_fast_syscall /* Just do first insn from SAVE_ALL in the delay slot */ - .globl syscall_is_too_hard syscall_is_too_hard: SAVE_ALL_HEAD rd %wim, %l3 @@ -1544,8 +1543,7 @@ kgdb_trap_low: #endif .align 4 - .globl __handle_exception, flush_patch_exception -__handle_exception: + .globl flush_patch_exception flush_patch_exception: FLUSH_ALL_KERNEL_WINDOWS; ldd [%o0], %o6 diff --git a/arch/sparc/kernel/etrap.S b/arch/sparc/kernel/etrap.S index f37d961d67a..e806fcdc46d 100644 --- a/arch/sparc/kernel/etrap.S +++ b/arch/sparc/kernel/etrap.S @@ -228,7 +228,6 @@ tsetup_mmu_patchme: */ #define glob_tmp g1 - .globl tsetup_sun4c_stackchk tsetup_sun4c_stackchk: /* Done by caller: andcc %sp, 0x7, %g0 */ bne trap_setup_user_stack_is_bolixed diff --git a/arch/sparc/kernel/head.S b/arch/sparc/kernel/head.S index 3bfd6085a91..50d9a16af79 100644 --- a/arch/sparc/kernel/head.S +++ b/arch/sparc/kernel/head.S @@ -32,7 +32,6 @@ */ .align 4 - .globl cputyp cputyp: .word 1 @@ -1280,7 +1279,6 @@ halt_me: * gets initialized in c-code so all routines can use it. */ - .globl prom_vector_p prom_vector_p: .word 0 diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c index 7220562cdb3..fc511f3c4c1 100644 --- a/arch/sparc/kernel/idprom.c +++ b/arch/sparc/kernel/idprom.c @@ -24,7 +24,7 @@ static struct idprom idprom_buffer; * of the Sparc CPU and have a meaningful IDPROM machtype value that we * know about. See asm-sparc/machines.h for empirical constants. */ -struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = { +static struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = { /* First, Sun4's */ { "Sun 4/100 Series", (SM_SUN4 | SM_4_110) }, { "Sun 4/200 Series", (SM_SUN4 | SM_4_260) }, diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 7b17522f59b..487960919f1 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -49,13 +49,16 @@ #define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */ -struct resource *_sparc_find_resource(struct resource *r, unsigned long); +static struct resource *_sparc_find_resource(struct resource *r, + unsigned long); static void __iomem *_sparc_ioremap(struct resource *res, u32 bus, u32 pa, int sz); static void __iomem *_sparc_alloc_io(unsigned int busno, unsigned long phys, unsigned long size, char *name); static void _sparc_free_io(struct resource *res); +static void register_proc_sparc_ioport(void); + /* This points to the next to use virtual memory for DVMA mappings */ static struct resource _sparc_dvma = { .name = "sparc_dvma", .start = DVMA_VADDR, .end = DVMA_END - 1 @@ -539,8 +542,6 @@ void __init sbus_setup_arch_props(struct sbus_bus *sbus, struct device_node *dp) int __init sbus_arch_preinit(void) { - extern void register_proc_sparc_ioport(void); - register_proc_sparc_ioport(); #ifdef CONFIG_SUN4 @@ -853,8 +854,8 @@ _sparc_io_get_info(char *buf, char **start, off_t fpos, int length, int *eof, * XXX Too slow. Can have 8192 DVMA pages on sun4m in the worst case. * This probably warrants some sort of hashing. */ -struct resource * -_sparc_find_resource(struct resource *root, unsigned long hit) +static struct resource *_sparc_find_resource(struct resource *root, + unsigned long hit) { struct resource *tmp; @@ -865,7 +866,7 @@ _sparc_find_resource(struct resource *root, unsigned long hit) return NULL; } -void register_proc_sparc_ioport(void) +static void register_proc_sparc_ioport(void) { #ifdef CONFIG_PROC_FS create_proc_read_entry("io_map",0,NULL,_sparc_io_get_info,&sparc_iomap); diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c index 087390b092b..93e1d1c6529 100644 --- a/arch/sparc/kernel/irq.c +++ b/arch/sparc/kernel/irq.c @@ -154,7 +154,7 @@ void (*sparc_init_timers)(irq_handler_t ) = struct irqaction static_irqaction[MAX_STATIC_ALLOC]; int static_irq_count; -struct { +static struct { struct irqaction *action; int flags; } sparc_irq[NR_IRQS]; diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index da48d248cc1..e18a5da025d 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -177,6 +177,8 @@ void machine_power_off(void) machine_halt(); } +#if 0 + static DEFINE_SPINLOCK(sparc_backtrace_lock); void __show_backtrace(unsigned long fp) @@ -228,7 +230,6 @@ void smp_show_backtrace_all_cpus(void) } #endif -#if 0 void show_stackframe(struct sparc_stackf *sf) { unsigned long size; diff --git a/arch/sparc/kernel/rtrap.S b/arch/sparc/kernel/rtrap.S index ce30082ab26..891f460b7b9 100644 --- a/arch/sparc/kernel/rtrap.S +++ b/arch/sparc/kernel/rtrap.S @@ -224,8 +224,6 @@ ret_trap_user_stack_is_bolixed: b signal_p ld [%curptr + TI_FLAGS], %g2 - - .globl sun4c_rett_stackchk sun4c_rett_stackchk: be 1f and %fp, 0xfff, %g1 ! delay slot diff --git a/arch/sparc/kernel/setup.c b/arch/sparc/kernel/setup.c index a0ea0bc6f47..9e451b21202 100644 --- a/arch/sparc/kernel/setup.c +++ b/arch/sparc/kernel/setup.c @@ -67,7 +67,7 @@ struct screen_info screen_info = { extern unsigned long trapbase; /* Pretty sick eh? */ -void prom_sync_me(void) +static void prom_sync_me(void) { unsigned long prom_tbr, flags; @@ -97,7 +97,7 @@ void prom_sync_me(void) return; } -unsigned int boot_flags __initdata = 0; +static unsigned int boot_flags __initdata = 0; #define BOOTME_DEBUG 0x1 /* Exported for mm/init.c:paging_init. */ diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c index 6724ab90f82..1619ec15c09 100644 --- a/arch/sparc/kernel/smp.c +++ b/arch/sparc/kernel/smp.c @@ -35,13 +35,9 @@ #include "irq.h" -int smp_num_cpus = 1; volatile unsigned long cpu_callin_map[NR_CPUS] __initdata = {0,}; unsigned char boot_cpu_id = 0; unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ -int smp_activated = 0; -volatile int __cpu_number_map[NR_CPUS]; -volatile int __cpu_logical_map[NR_CPUS]; cpumask_t cpu_online_map = CPU_MASK_NONE; cpumask_t phys_cpu_present_map = CPU_MASK_NONE; @@ -55,9 +51,6 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE; * instruction which is much better... */ -/* Used to make bitops atomic */ -unsigned char bitops_spinlock = 0; - void __cpuinit smp_store_cpu_info(int id) { int cpu_node; diff --git a/arch/sparc/kernel/sun4c_irq.c b/arch/sparc/kernel/sun4c_irq.c index c6ac9fc5256..340fc395fe2 100644 --- a/arch/sparc/kernel/sun4c_irq.c +++ b/arch/sparc/kernel/sun4c_irq.c @@ -68,7 +68,8 @@ unsigned char *interrupt_enable = NULL; static int sun4c_pil_map[] = { 0, 1, 2, 3, 5, 7, 8, 9 }; -unsigned int sun4c_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint) +static unsigned int sun4c_sbint_to_irq(struct sbus_dev *sdev, + unsigned int sbint) { if (sbint >= sizeof(sun4c_pil_map)) { printk(KERN_ERR "%s: bogus SBINT %d\n", sdev->prom_name, sbint); diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c index 8ac5661caff..1290b5998f8 100644 --- a/arch/sparc/kernel/sun4d_irq.c +++ b/arch/sparc/kernel/sun4d_irq.c @@ -52,13 +52,13 @@ extern struct irqaction static_irqaction[MAX_STATIC_ALLOC]; extern int static_irq_count; unsigned char cpu_leds[32]; #ifdef CONFIG_SMP -unsigned char sbus_tid[32]; +static unsigned char sbus_tid[32]; #endif static struct irqaction *irq_action[NR_IRQS]; extern spinlock_t irq_action_lock; -struct sbus_action { +static struct sbus_action { struct irqaction *action; /* For SMP this needs to be extended */ } *sbus_actions; @@ -267,7 +267,8 @@ unsigned int sun4d_build_irq(struct sbus_dev *sdev, int irq) return irq; } -unsigned int sun4d_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint) +static unsigned int sun4d_sbint_to_irq(struct sbus_dev *sdev, + unsigned int sbint) { if (sbint >= sizeof(sbus_to_pil)) { printk(KERN_ERR "%s: bogus SBINT %d\n", sdev->prom_name, sbint); diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c index b92d6d2d5b0..94e02de960e 100644 --- a/arch/sparc/kernel/sun4m_irq.c +++ b/arch/sparc/kernel/sun4m_irq.c @@ -154,7 +154,8 @@ static unsigned long irq_mask[] = { static int sun4m_pil_map[] = { 0, 2, 3, 5, 7, 9, 11, 13 }; -unsigned int sun4m_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint) +static unsigned int sun4m_sbint_to_irq(struct sbus_dev *sdev, + unsigned int sbint) { if (sbint >= sizeof(sun4m_pil_map)) { printk(KERN_ERR "%s: bogus SBINT %d\n", sdev->prom_name, sbint); @@ -163,7 +164,7 @@ unsigned int sun4m_sbint_to_irq(struct sbus_dev *sdev, unsigned int sbint) return sun4m_pil_map[sbint] | 0x30; } -inline unsigned long sun4m_get_irqmask(unsigned int irq) +static unsigned long sun4m_get_irqmask(unsigned int irq) { unsigned long mask; @@ -281,7 +282,7 @@ static void sun4m_set_udt(int cpu) #define TIMER_IRQ (OBIO_INTR | 10) #define PROFILE_IRQ (OBIO_INTR | 14) -struct sun4m_timer_regs *sun4m_timers; +static struct sun4m_timer_regs *sun4m_timers; unsigned int lvl14_resolution = (((1000000/HZ) + 1) << 10); static void sun4m_clear_clock_irq(void) diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index ffb875aacb7..406ac1abc83 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -244,8 +244,9 @@ static struct smp_funcall { static DEFINE_SPINLOCK(cross_call_lock); /* Cross calls must be serialized, at least currently. */ -void smp4m_cross_call(smpfunc_t func, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, unsigned long arg5) +static void smp4m_cross_call(smpfunc_t func, unsigned long arg1, + unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) { register int ncpus = SUN4M_NCPUS; unsigned long flags; @@ -344,7 +345,7 @@ static void __init smp_setup_percpu_timer(void) enable_pil_irq(14); } -void __init smp4m_blackbox_id(unsigned *addr) +static void __init smp4m_blackbox_id(unsigned *addr) { int rd = *addr & 0x3e000000; int rs1 = rd >> 11; @@ -354,7 +355,7 @@ void __init smp4m_blackbox_id(unsigned *addr) addr[2] = 0x80082003 | rd | rs1; /* and reg, 3, reg */ } -void __init smp4m_blackbox_current(unsigned *addr) +static void __init smp4m_blackbox_current(unsigned *addr) { int rd = *addr & 0x3e000000; int rs1 = rd >> 11; diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c index 53caacbb398..ab3dd0b257d 100644 --- a/arch/sparc/kernel/time.c +++ b/arch/sparc/kernel/time.c @@ -46,7 +46,7 @@ #include "irq.h" DEFINE_SPINLOCK(rtc_lock); -enum sparc_clock_type sp_clock_typ; +static enum sparc_clock_type sp_clock_typ; DEFINE_SPINLOCK(mostek_lock); void __iomem *mstk48t02_regs = NULL; static struct mostek48t08 __iomem *mstk48t08_regs = NULL; @@ -366,7 +366,7 @@ static int __init clock_init(void) fs_initcall(clock_init); #endif /* !CONFIG_SUN4 */ -void __init sbus_time_init(void) +static void __init sbus_time_init(void) { BTFIXUPSET_CALL(bus_do_settimeofday, sbus_do_settimeofday, BTFIXUPCALL_NORM); diff --git a/arch/sparc/kernel/traps.c b/arch/sparc/kernel/traps.c index 978e9d85949..ac8ee6ab133 100644 --- a/arch/sparc/kernel/traps.c +++ b/arch/sparc/kernel/traps.c @@ -33,9 +33,6 @@ struct trap_trace_entry { unsigned long type; }; -int trap_curbuf = 0; -struct trap_trace_entry trapbuf[1024]; - void syscall_trace_entry(struct pt_regs *regs) { printk("%s[%d]: ", current->comm, task_pid_nr(current)); @@ -72,7 +69,7 @@ void sun4d_nmi(struct pt_regs *regs) prom_halt(); } -void instruction_dump (unsigned long *pc) +static void instruction_dump(unsigned long *pc) { int i; @@ -479,10 +476,6 @@ void do_BUG(const char *file, int line) extern void sparc_cpu_startup(void); -int linux_smp_still_initting; -unsigned int thiscpus_tbr; -int thiscpus_mid; - void trap_init(void) { extern void thread_info_offsets_are_bolixed_pete(void); diff --git a/arch/sparc/kernel/wof.S b/arch/sparc/kernel/wof.S index 4bce38dfe3c..3bbcd8dc9ab 100644 --- a/arch/sparc/kernel/wof.S +++ b/arch/sparc/kernel/wof.S @@ -306,7 +306,6 @@ spwin_bad_ustack_from_kernel: * As noted above %curptr cannot be touched by this routine at all. */ - .globl spwin_sun4c_stackchk spwin_sun4c_stackchk: /* LOCATION: Window to be saved on the stack */ diff --git a/arch/sparc/kernel/wuf.S b/arch/sparc/kernel/wuf.S index 82e5145b0f7..779ff750603 100644 --- a/arch/sparc/kernel/wuf.S +++ b/arch/sparc/kernel/wuf.S @@ -243,7 +243,6 @@ fwin_user_finish_up: */ .align 4 - .globl sun4c_fwin_stackchk sun4c_fwin_stackchk: /* LOCATION: Window 'W' */ -- cgit v1.2.3 From 50215d6511265d46ba14038640b16c5dd7731ff4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 5 Jun 2008 11:41:51 -0700 Subject: sparc/mm/: possible cleanups This patch contains the following possible cleanups: - make the following needlessly global code static: - fault.c: force_user_fault() - init.c: calc_max_low_pfn() - init.c: pgt_cache_water[] - init.c: map_high_region() - srmmu.c: hwbug_bitmask - srmmu.c: srmmu_swapper_pg_dir - srmmu.c: srmmu_context_table - srmmu.c: is_hypersparc - srmmu.c: srmmu_cache_pagetables - srmmu.c: srmmu_nocache_size - srmmu.c: srmmu_nocache_end - srmmu.c: srmmu_get_nocache() - srmmu.c: srmmu_free_nocache() - srmmu.c: srmmu_early_allocate_ptable_skeleton() - srmmu.c: srmmu_nocache_calcsize() - srmmu.c: srmmu_nocache_init() - srmmu.c: srmmu_alloc_thread_info() - srmmu.c: early_pgtable_allocfail() - srmmu.c: srmmu_early_allocate_ptable_skeleton() - srmmu.c: srmmu_allocate_ptable_skeleton() - srmmu.c: srmmu_inherit_prom_mappings() - sunami.S: tsunami_copy_1page - remove the following unused code: - init.c: struct sparc_aliases Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- arch/sparc/mm/fault.c | 2 +- arch/sparc/mm/init.c | 8 +++----- arch/sparc/mm/srmmu.c | 38 +++++++++++++++++++++----------------- arch/sparc/mm/tsunami.S | 1 - 4 files changed, 25 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c index 0a3cd8f6cfe..3604c2e8670 100644 --- a/arch/sparc/mm/fault.c +++ b/arch/sparc/mm/fault.c @@ -451,7 +451,7 @@ asmlinkage void do_sun4c_fault(struct pt_regs *regs, int text_fault, int write, } /* This always deals with user addresses. */ -inline void force_user_fault(unsigned long address, int write) +static void force_user_fault(unsigned long address, int write) { struct vm_area_struct *vma; struct task_struct *tsk = current; diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index 7794ecb896e..8f94a2d62f1 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c @@ -128,7 +128,7 @@ unsigned long calc_highpages(void) return nr; } -unsigned long calc_max_low_pfn(void) +static unsigned long calc_max_low_pfn(void) { int i; unsigned long tmp = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT); @@ -292,7 +292,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) * * We simply copy the 2.4 implementation for now. */ -int pgt_cache_water[2] = { 25, 50 }; +static int pgt_cache_water[2] = { 25, 50 }; void check_pgt_cache(void) { @@ -356,8 +356,6 @@ void __init paging_init(void) device_scan(); } -struct cache_palias *sparc_aliases; - static void __init taint_real_pages(void) { int i; @@ -375,7 +373,7 @@ static void __init taint_real_pages(void) } } -void map_high_region(unsigned long start_pfn, unsigned long end_pfn) +static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) { unsigned long tmp; diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 23d3291a3e8..c624e04ff03 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -50,7 +50,7 @@ #include enum mbus_module srmmu_modtype; -unsigned int hwbug_bitmask; +static unsigned int hwbug_bitmask; int vac_cache_size; int vac_line_size; @@ -60,7 +60,7 @@ extern unsigned long last_valid_pfn; extern unsigned long page_kernel; -pgd_t *srmmu_swapper_pg_dir; +static pgd_t *srmmu_swapper_pg_dir; #ifdef CONFIG_SMP #define FLUSH_BEGIN(mm) @@ -83,12 +83,12 @@ BTFIXUPDEF_CALL(void, local_flush_page_for_dma, unsigned long) char *srmmu_name; ctxd_t *srmmu_ctx_table_phys; -ctxd_t *srmmu_context_table; +static ctxd_t *srmmu_context_table; int viking_mxcc_present; static DEFINE_SPINLOCK(srmmu_context_spinlock); -int is_hypersparc; +static int is_hypersparc; /* * In general all page table modifications should use the V8 atomic @@ -112,11 +112,11 @@ static inline int srmmu_device_memory(unsigned long x) return ((x & 0xF0000000) != 0); } -int srmmu_cache_pagetables; +static int srmmu_cache_pagetables; /* these will be initialized in srmmu_nocache_calcsize() */ -unsigned long srmmu_nocache_size; -unsigned long srmmu_nocache_end; +static unsigned long srmmu_nocache_size; +static unsigned long srmmu_nocache_end; /* 1 bit <=> 256 bytes of nocache <=> 64 PTEs */ #define SRMMU_NOCACHE_BITMAP_SHIFT (PAGE_SHIFT - 4) @@ -324,7 +324,7 @@ static unsigned long __srmmu_get_nocache(int size, int align) return (SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT)); } -unsigned inline long srmmu_get_nocache(int size, int align) +static unsigned long srmmu_get_nocache(int size, int align) { unsigned long tmp; @@ -336,7 +336,7 @@ unsigned inline long srmmu_get_nocache(int size, int align) return tmp; } -void srmmu_free_nocache(unsigned long vaddr, int size) +static void srmmu_free_nocache(unsigned long vaddr, int size) { int offset; @@ -369,7 +369,8 @@ void srmmu_free_nocache(unsigned long vaddr, int size) bit_map_clear(&srmmu_nocache_map, offset, size); } -void srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end); +static void srmmu_early_allocate_ptable_skeleton(unsigned long start, + unsigned long end); extern unsigned long probe_memory(void); /* in fault.c */ @@ -377,7 +378,7 @@ extern unsigned long probe_memory(void); /* in fault.c */ * Reserve nocache dynamically proportionally to the amount of * system RAM. -- Tomas Szepe , June 2002 */ -void srmmu_nocache_calcsize(void) +static void srmmu_nocache_calcsize(void) { unsigned long sysmemavail = probe_memory() / 1024; int srmmu_nocache_npages; @@ -398,7 +399,7 @@ void srmmu_nocache_calcsize(void) srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size; } -void __init srmmu_nocache_init(void) +static void __init srmmu_nocache_init(void) { unsigned int bitmap_bits; pgd_t *pgd; @@ -645,7 +646,7 @@ static void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len) * mappings on the kernel stack without any special code as we did * need on the sun4c. */ -struct thread_info *srmmu_alloc_thread_info(void) +static struct thread_info *srmmu_alloc_thread_info(void) { struct thread_info *ret; @@ -1045,13 +1046,14 @@ extern void hypersparc_setup_blockops(void); * around 8mb mapped for us. */ -void __init early_pgtable_allocfail(char *type) +static void __init early_pgtable_allocfail(char *type) { prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type); prom_halt(); } -void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end) +static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, + unsigned long end) { pgd_t *pgdp; pmd_t *pmdp; @@ -1081,7 +1083,8 @@ void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned l } } -void __init srmmu_allocate_ptable_skeleton(unsigned long start, unsigned long end) +static void __init srmmu_allocate_ptable_skeleton(unsigned long start, + unsigned long end) { pgd_t *pgdp; pmd_t *pmdp; @@ -1116,7 +1119,8 @@ void __init srmmu_allocate_ptable_skeleton(unsigned long start, unsigned long en * looking at the prom's page table directly which is what most * other OS's do. Yuck... this is much better. */ -void __init srmmu_inherit_prom_mappings(unsigned long start,unsigned long end) +static void __init srmmu_inherit_prom_mappings(unsigned long start, + unsigned long end) { pgd_t *pgdp; pmd_t *pmdp; diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S index db0d6de33a8..4e55e8f7664 100644 --- a/arch/sparc/mm/tsunami.S +++ b/arch/sparc/mm/tsunami.S @@ -93,7 +93,6 @@ tsunami_flush_tlb_page_out: ldd [src + offset + 0x00], t2; \ std t2, [dst + offset + 0x00]; - .globl tsunami_copy_1page tsunami_copy_1page: /* NOTE: This routine has to be shorter than 70insns --jj */ or %g0, (PAGE_SIZE >> 8), %g1 -- cgit v1.2.3 From 908f5162ca59ed46a928e8416db159777d432e77 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 5 Jun 2008 11:42:40 -0700 Subject: sparc64/kernel/: make code static This patch makes the following needlessly global code static: - central.c: struct central_bus - central.c: struct fhc_list - central.c: apply_fhc_ranges() - central.c: apply_central_ranges() - ds.c: struct ds_states_template[] - pci_msi.c: sparc64_setup_msi_irq() - pci_msi.c: sparc64_teardown_msi_irq() - pci_sun4v.c: struct sun4v_dma_ops - sys_sparc32.c: cp_compat_stat64() Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- arch/sparc64/kernel/central.c | 14 +++++++------- arch/sparc64/kernel/ds.c | 2 +- arch/sparc64/kernel/pci_msi.c | 10 +++++----- arch/sparc64/kernel/pci_sun4v.c | 2 +- arch/sparc64/kernel/sys_sparc32.c | 3 ++- 5 files changed, 16 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/central.c b/arch/sparc64/kernel/central.c index b61b8dfb09c..f2e87d0d7e1 100644 --- a/arch/sparc64/kernel/central.c +++ b/arch/sparc64/kernel/central.c @@ -16,8 +16,8 @@ #include #include -struct linux_central *central_bus = NULL; -struct linux_fhc *fhc_list = NULL; +static struct linux_central *central_bus = NULL; +static struct linux_fhc *fhc_list = NULL; #define IS_CENTRAL_FHC(__fhc) ((__fhc) == central_bus->child) @@ -79,9 +79,9 @@ static void adjust_regs(struct linux_prom_registers *regp, int nregs, } /* Apply probed fhc ranges to registers passed, if no ranges return. */ -void apply_fhc_ranges(struct linux_fhc *fhc, - struct linux_prom_registers *regs, - int nregs) +static void apply_fhc_ranges(struct linux_fhc *fhc, + struct linux_prom_registers *regs, + int nregs) { if (fhc->num_fhc_ranges) adjust_regs(regs, nregs, fhc->fhc_ranges, @@ -89,8 +89,8 @@ void apply_fhc_ranges(struct linux_fhc *fhc, } /* Apply probed central ranges to registers passed, if no ranges return. */ -void apply_central_ranges(struct linux_central *central, - struct linux_prom_registers *regs, int nregs) +static void apply_central_ranges(struct linux_central *central, + struct linux_prom_registers *regs, int nregs) { if (central->num_central_ranges) adjust_regs(regs, nregs, central->central_ranges, diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c index edb74f5a118..d0fa5aa3893 100644 --- a/arch/sparc64/kernel/ds.c +++ b/arch/sparc64/kernel/ds.c @@ -159,7 +159,7 @@ static void ds_var_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf, int len); -struct ds_cap_state ds_states_template[] = { +static struct ds_cap_state ds_states_template[] = { { .service_id = "md-update", .data = md_update_data, diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c index db5e8fd8f67..60c71e35021 100644 --- a/arch/sparc64/kernel/pci_msi.c +++ b/arch/sparc64/kernel/pci_msi.c @@ -120,9 +120,9 @@ static struct irq_chip msi_irq = { /* XXX affinity XXX */ }; -int sparc64_setup_msi_irq(unsigned int *virt_irq_p, - struct pci_dev *pdev, - struct msi_desc *entry) +static int sparc64_setup_msi_irq(unsigned int *virt_irq_p, + struct pci_dev *pdev, + struct msi_desc *entry) { struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; const struct sparc64_msiq_ops *ops = pbm->msi_ops; @@ -179,8 +179,8 @@ out_err: return err; } -void sparc64_teardown_msi_irq(unsigned int virt_irq, - struct pci_dev *pdev) +static void sparc64_teardown_msi_irq(unsigned int virt_irq, + struct pci_dev *pdev) { struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; const struct sparc64_msiq_ops *ops = pbm->msi_ops; diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c index e2bb9790039..a104c80d319 100644 --- a/arch/sparc64/kernel/pci_sun4v.c +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -531,7 +531,7 @@ static void dma_4v_sync_sg_for_cpu(struct device *dev, /* Nothing to do... */ } -const struct dma_ops sun4v_dma_ops = { +static const struct dma_ops sun4v_dma_ops = { .alloc_coherent = dma_4v_alloc_coherent, .free_coherent = dma_4v_free_coherent, .map_single = dma_4v_map_single, diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index ba5bd626b39..590679795ce 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -359,7 +359,8 @@ int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) return err; } -int cp_compat_stat64(struct kstat *stat, struct compat_stat64 __user *statbuf) +static int cp_compat_stat64(struct kstat *stat, + struct compat_stat64 __user *statbuf) { int err; -- cgit v1.2.3 From 9ae95bce73ef2d12fbe32a03ed230a9bef667328 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 7 Jul 2008 22:30:35 +0200 Subject: sparc: add -m64 when building vmlinux.lds David Miller noticed that the build of vmlinux.lds failed to use the -m64 specifier. This caused the build to break with a bi-arch gcc with unified headers. Add the -m64 option to CPPFLAGS_vmlinux.lds so we have the correct defines available when building vmliux.lds. Signed-off-by: Sam Ravnborg --- arch/sparc64/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc64/Makefile b/arch/sparc64/Makefile index 4b8f2b084c2..b785a395b12 100644 --- a/arch/sparc64/Makefile +++ b/arch/sparc64/Makefile @@ -9,7 +9,9 @@ CHECKFLAGS += -D__sparc__ -D__sparc_v9__ -m64 -CPPFLAGS_vmlinux.lds += -Usparc +# Undefine sparc when processing vmlinux.lds - it is used +# And teach CPP we are doing 64 bit builds (for this case) +CPPFLAGS_vmlinux.lds += -m64 -Usparc LDFLAGS := -m elf64_sparc -- cgit v1.2.3 From f92ffa12f41efab4d4ad2827422d2e0a6c4e0fd2 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 6 Jun 2008 20:51:20 +0200 Subject: sparc: Merge asm-sparc{,64}/mman.h Renaming the function sparc64_mmap_check() to sparc_mmap_check() was enough to make the two header files identical. :$ diff -u include/asm-sparc/mman.h include/asm-sparc64/mman.h :-- include/asm-sparc/mman.h 2008-06-13 06:46:39.000000000 +0200 :++ include/asm-sparc64/mman.h 2008-06-13 06:46:39.000000000 +0200 :@@ -1,5 +1,5 @@ :-#ifndef __SPARC_MMAN_H__ :-#define __SPARC_MMAN_H__ :+#ifndef __SPARC64_MMAN_H__ :+#define __SPARC64_MMAN_H__ : : #include : :@@ -23,9 +23,9 @@ : : #ifdef __KERNEL__ : #ifndef __ASSEMBLY__ :-#define arch_mmap_check(addr,len,flags) sparc_mmap_check(addr,len) :-int sparc_mmap_check(unsigned long addr, unsigned long len); :+#define arch_mmap_check(addr,len,flags) sparc64_mmap_check(addr,len) :+int sparc64_mmap_check(unsigned long addr, unsigned long len); : #endif : #endif : :-#endif /* __SPARC_MMAN_H__ */ :+#endif /* __SPARC64_MMAN_H__ */ Signed-off-by: Sam Ravnborg --- arch/sparc64/kernel/sys_sparc.c | 6 +++--- arch/sparc64/kernel/sys_sparc32.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index ac1bff58c1a..e1f4eba2e57 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -542,7 +542,7 @@ asmlinkage long sparc64_personality(unsigned long personality) return ret; } -int sparc64_mmap_check(unsigned long addr, unsigned long len) +int sparc_mmap_check(unsigned long addr, unsigned long len) { if (test_thread_flag(TIF_32BIT)) { if (len >= STACK_TOP32) @@ -614,9 +614,9 @@ asmlinkage unsigned long sys64_mremap(unsigned long addr, goto out; if (unlikely(new_len >= VA_EXCLUDE_START)) goto out; - if (unlikely(sparc64_mmap_check(addr, old_len))) + if (unlikely(sparc_mmap_check(addr, old_len))) goto out; - if (unlikely(sparc64_mmap_check(new_addr, new_len))) + if (unlikely(sparc_mmap_check(new_addr, new_len))) goto out; down_write(¤t->mm->mmap_sem); diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 590679795ce..97b77fb5c50 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -871,9 +871,9 @@ asmlinkage unsigned long sys32_mremap(unsigned long addr, unsigned long ret = -EINVAL; unsigned long new_addr = __new_addr; - if (unlikely(sparc64_mmap_check(addr, old_len))) + if (unlikely(sparc_mmap_check(addr, old_len))) goto out; - if (unlikely(sparc64_mmap_check(new_addr, new_len))) + if (unlikely(sparc_mmap_check(new_addr, new_len))) goto out; down_write(¤t->mm->mmap_sem); ret = do_mremap(addr, old_len, new_len, flags, new_addr); -- cgit v1.2.3 From b1a8bf92a0303301f3e013e2a2f45a4916453ce7 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 13 Jun 2008 20:20:54 +0200 Subject: sparc: export openprom.h to userspace sparc64 exports openprom.h to userspace so let sparc follow the example. As openprom.h pulled in another not-for-export vaddrs.h header file it required a few changes to fix the build. The definition af VMALLOC_* were moved to pgtable as this is where sparc64 has them. Signed-off-by: Sam Ravnborg --- arch/sparc/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 55d3be1b5d8..2f96256dc51 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_SUN4 #include #else -- cgit v1.2.3 From ab772027ca42a06f7e480077182434a2a30a8037 Mon Sep 17 00:00:00 2001 From: Stoyan Gaydarov Date: Mon, 14 Jul 2008 22:12:29 -0700 Subject: sparc: arch/sparc/kernel/apc.c to unlocked_ioctl This changes arch/sparc/kernel/apc.c to use unlocked_ioctl Signed-off-by: Stoyan Gaydarov Signed-off-by: David S. Miller --- arch/sparc/kernel/apc.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index 54f7ccd7455..5267d48fb2c 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -85,54 +85,70 @@ static int apc_release(struct inode *inode, struct file *f) return 0; } -static int apc_ioctl(struct inode *inode, struct file *f, - unsigned int cmd, unsigned long __arg) +static long apc_ioctl(struct file *f, unsigned int cmd, unsigned long __arg) { __u8 inarg, __user *arg; arg = (__u8 __user *) __arg; + + lock_kernel(); + switch (cmd) { case APCIOCGFANCTL: - if (put_user(apc_readb(APC_FANCTL_REG) & APC_REGMASK, arg)) - return -EFAULT; + if (put_user(apc_readb(APC_FANCTL_REG) & APC_REGMASK, arg)) { + unlock_kernel(); + return -EFAULT; + } break; case APCIOCGCPWR: - if (put_user(apc_readb(APC_CPOWER_REG) & APC_REGMASK, arg)) + if (put_user(apc_readb(APC_CPOWER_REG) & APC_REGMASK, arg)) { + unlock_kernel(); return -EFAULT; + } break; case APCIOCGBPORT: - if (put_user(apc_readb(APC_BPORT_REG) & APC_BPMASK, arg)) + if (put_user(apc_readb(APC_BPORT_REG) & APC_BPMASK, arg)) { + unlock_kernel(); return -EFAULT; + } break; case APCIOCSFANCTL: - if (get_user(inarg, arg)) + if (get_user(inarg, arg)) { + unlock_kernel(); return -EFAULT; + } apc_writeb(inarg & APC_REGMASK, APC_FANCTL_REG); break; case APCIOCSCPWR: - if (get_user(inarg, arg)) + if (get_user(inarg, arg)) { + unlock_kernel(); return -EFAULT; + } apc_writeb(inarg & APC_REGMASK, APC_CPOWER_REG); break; case APCIOCSBPORT: - if (get_user(inarg, arg)) + if (get_user(inarg, arg)) { + unlock_kernel(); return -EFAULT; + } apc_writeb(inarg & APC_BPMASK, APC_BPORT_REG); break; default: + unlock_kernel(); return -EINVAL; }; + unlock_kernel(); return 0; } static const struct file_operations apc_fops = { - .ioctl = apc_ioctl, - .open = apc_open, - .release = apc_release, + .unlocked_ioctl = apc_ioctl, + .open = apc_open, + .release = apc_release, }; static struct miscdevice apc_miscdev = { APC_MINOR, APC_DEVNAME, &apc_fops }; -- cgit v1.2.3 From 932d06139443ebebcd8d9232941b7216091966d5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Jul 2008 22:46:33 -0700 Subject: sparc64: Config category "Processor type and features" absent kernel bugzilla #11059: sparc64 config menu is missing "Processor type and features", so add that and move General Setup before Processor menu. Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- arch/sparc64/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index fca9246470b..794d22fdf46 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -81,6 +81,10 @@ config GENERIC_HARDIRQS_NO__DO_IRQ bool def_bool y +source "init/Kconfig" + +menu "Processor type and features" + choice prompt "Kernel page size" default SPARC64_PAGE_SIZE_8KB @@ -136,14 +140,10 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. -source "init/Kconfig" - config GENERIC_HARDIRQS bool default y -menu "General machine setup" - source "kernel/time/Kconfig" config SMP -- cgit v1.2.3 From f538f3df4f92f34f5d8bc024d54c12387541cdee Mon Sep 17 00:00:00 2001 From: Robert Reif Date: Mon, 14 Jul 2008 22:57:29 -0700 Subject: sparc32: fix init.c allnoconfig build error Fix allnoconfig build error. Signed-off-by: Robert Reif Signed-off-by: David S. Miller --- arch/sparc/mm/init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index 8f94a2d62f1..e103f1bb377 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 4fe3ebec122f23a095cc1d17557c175caaa55ca1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 22:11:32 -0700 Subject: sparc: Use new '%pS' infrastructure to print symbols. Signed-off-by: David S. Miller --- arch/sparc/kernel/process.c | 11 +++++------ arch/sparc/kernel/traps.c | 7 +++---- arch/sparc64/kernel/process.c | 26 +++++++++----------------- arch/sparc64/kernel/traps.c | 37 +++++++++++++++---------------------- arch/sparc64/kernel/unaligned.c | 7 +++---- arch/sparc64/mm/fault.c | 5 ++--- 6 files changed, 37 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index e18a5da025d..4bb430940a6 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -1,6 +1,6 @@ /* linux/arch/sparc/kernel/process.c * - * Copyright (C) 1995 David S. Miller (davem@davemloft.net) + * Copyright (C) 1995, 2008 David S. Miller (davem@davemloft.net) * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) */ @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -198,7 +197,7 @@ void __show_backtrace(unsigned long fp) rw->ins[4], rw->ins[5], rw->ins[6], rw->ins[7]); - print_symbol("%s\n", rw->ins[7]); + printk("%pS\n", (void *) rw->ins[7]); rw = (struct reg_window *) rw->ins[6]; } spin_unlock_irqrestore(&sparc_backtrace_lock, flags); @@ -265,14 +264,14 @@ void show_regs(struct pt_regs *r) printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx %s\n", r->psr, r->pc, r->npc, r->y, print_tainted()); - print_symbol("PC: <%s>\n", r->pc); + printk("PC: <%pS>\n", (void *) r->pc); printk("%%G: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", r->u_regs[0], r->u_regs[1], r->u_regs[2], r->u_regs[3], r->u_regs[4], r->u_regs[5], r->u_regs[6], r->u_regs[7]); printk("%%O: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", r->u_regs[8], r->u_regs[9], r->u_regs[10], r->u_regs[11], r->u_regs[12], r->u_regs[13], r->u_regs[14], r->u_regs[15]); - print_symbol("RPC: <%s>\n", r->u_regs[15]); + printk("RPC: <%pS>\n", (void *) r->u_regs[15]); printk("%%L: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", rw->locals[0], rw->locals[1], rw->locals[2], rw->locals[3], @@ -307,7 +306,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) rw = (struct reg_window *) fp; pc = rw->ins[7]; printk("[%08lx : ", pc); - print_symbol("%s ] ", pc); + printk("%pS ] ", (void *) pc); fp = rw->ins[6]; } while (++count < 16); printk("\n"); diff --git a/arch/sparc/kernel/traps.c b/arch/sparc/kernel/traps.c index ac8ee6ab133..5d45d5fd8c9 100644 --- a/arch/sparc/kernel/traps.c +++ b/arch/sparc/kernel/traps.c @@ -1,7 +1,7 @@ /* * arch/sparc/kernel/traps.c * - * Copyright 1995 David S. Miller (davem@caip.rutgers.edu) + * Copyright 1995, 2008 David S. Miller (davem@davemloft.net) * Copyright 2000 Jakub Jelinek (jakub@redhat.com) */ @@ -11,7 +11,6 @@ #include /* for jiffies */ #include -#include #include #include #include @@ -116,8 +115,8 @@ void die_if_kernel(char *str, struct pt_regs *regs) count++ < 30 && (((unsigned long) rw) >= PAGE_OFFSET) && !(((unsigned long) rw) & 0x7)) { - printk("Caller[%08lx]", rw->ins[7]); - print_symbol(": %s\n", rw->ins[7]); + printk("Caller[%08lx]: %pS\n", rw->ins[7], + (void *) rw->ins[7]); rw = (struct reg_window *)rw->ins[6]; } } diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 2084f81a76e..c1f1ff29e97 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -211,7 +210,7 @@ static void show_regwindow(struct pt_regs *regs) printk("i4: %016lx i5: %016lx i6: %016lx i7: %016lx\n", rwk->ins[4], rwk->ins[5], rwk->ins[6], rwk->ins[7]); if (regs->tstate & TSTATE_PRIV) - print_symbol("I7: <%s>\n", rwk->ins[7]); + printk("I7: <%pS>\n", (void *) rwk->ins[7]); } #ifdef CONFIG_SMP @@ -232,7 +231,7 @@ void __show_regs(struct pt_regs * regs) #endif printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x %s\n", regs->tstate, regs->tpc, regs->tnpc, regs->y, print_tainted()); - print_symbol("TPC: <%s>\n", regs->tpc); + printk("TPC: <%pS>\n", (void *) regs->tpc); printk("g0: %016lx g1: %016lx g2: %016lx g3: %016lx\n", regs->u_regs[0], regs->u_regs[1], regs->u_regs[2], regs->u_regs[3]); @@ -245,7 +244,7 @@ void __show_regs(struct pt_regs * regs) printk("o4: %016lx o5: %016lx sp: %016lx ret_pc: %016lx\n", regs->u_regs[12], regs->u_regs[13], regs->u_regs[14], regs->u_regs[15]); - print_symbol("RPC: <%s>\n", regs->u_regs[15]); + printk("RPC: <%pS>\n", (void *) regs->u_regs[15]); show_regwindow(regs); #ifdef CONFIG_SMP spin_unlock(®dump_lock); @@ -346,9 +345,6 @@ static void sysrq_handle_globreg(int key, struct tty_struct *tty) { struct thread_info *tp = current_thread_info(); struct pt_regs *regs = get_irq_regs(); -#ifdef CONFIG_KALLSYMS - char buffer[KSYM_SYMBOL_LEN]; -#endif unsigned long flags; int this_cpu, cpu; @@ -377,17 +373,13 @@ static void sysrq_handle_globreg(int key, struct tty_struct *tty) gp->tstate, gp->tpc, gp->tnpc, ((tp && tp->task) ? tp->task->comm : "NULL"), ((tp && tp->task) ? tp->task->pid : -1)); -#ifdef CONFIG_KALLSYMS + if (gp->tstate & TSTATE_PRIV) { - sprint_symbol(buffer, gp->tpc); - printk(" TPC[%s] ", buffer); - sprint_symbol(buffer, gp->o7); - printk("O7[%s] ", buffer); - sprint_symbol(buffer, gp->i7); - printk("I7[%s]\n", buffer); - } else -#endif - { + printk(" TPC[%pS] O7[%pS] I7[%pS]\n", + (void *) gp->tpc, + (void *) gp->o7, + (void *) gp->i7); + } else { printk(" TPC[%lx] O7[%lx] I7[%lx]\n", gp->tpc, gp->o7, gp->i7); } diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 36974926265..0aa819c29db 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -1,6 +1,6 @@ /* arch/sparc64/kernel/traps.c * - * Copyright (C) 1995,1997 David S. Miller (davem@davemloft.net) + * Copyright (C) 1995,1997,2008 David S. Miller (davem@davemloft.net) * Copyright (C) 1997,1999,2000 Jakub Jelinek (jakub@redhat.com) */ @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -74,7 +73,7 @@ static void dump_tl1_traplog(struct tl1_traplog *p) i + 1, p->trapstack[i].tstate, p->trapstack[i].tpc, p->trapstack[i].tnpc, p->trapstack[i].tt); - print_symbol("TRAPLOG: TPC<%s>\n", p->trapstack[i].tpc); + printk("TRAPLOG: TPC<%pS>\n", (void *) p->trapstack[i].tpc); } } @@ -1081,7 +1080,7 @@ static void cheetah_log_errors(struct pt_regs *regs, struct cheetah_err_info *in regs->tpc, regs->tnpc, regs->u_regs[UREG_I7], regs->tstate); printk("%s" "ERROR(%d): ", (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id()); - print_symbol("TPC<%s>\n", regs->tpc); + printk("TPC<%pS>\n", (void *) regs->tpc); printk("%s" "ERROR(%d): M_SYND(%lx), E_SYND(%lx)%s%s\n", (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT, @@ -1689,7 +1688,7 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) smp_processor_id(), (type & 0x1) ? 'I' : 'D', regs->tpc); - print_symbol(KERN_EMERG "TPC<%s>\n", regs->tpc); + printk(KERN_EMERG "TPC<%pS>\n", (void *) regs->tpc); panic("Irrecoverable Cheetah+ parity error."); } @@ -1697,7 +1696,7 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) smp_processor_id(), (type & 0x1) ? 'I' : 'D', regs->tpc); - print_symbol(KERN_WARNING "TPC<%s>\n", regs->tpc); + printk(KERN_WARNING "TPC<%pS>\n", (void *) regs->tpc); } struct sun4v_error_entry { @@ -1904,9 +1903,10 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); - print_symbol(KERN_EMERG "SUN4V-ITLB: TPC<%s>\n", regs->tpc); + printk(KERN_EMERG "SUN4V-ITLB: TPC<%pS>\n", (void *) regs->tpc); printk(KERN_EMERG "SUN4V-ITLB: O7[%lx]\n", regs->u_regs[UREG_I7]); - print_symbol(KERN_EMERG "SUN4V-ITLB: O7<%s>\n", regs->u_regs[UREG_I7]); + printk(KERN_EMERG "SUN4V-ITLB: O7<%pS>\n", + (void *) regs->u_regs[UREG_I7]); printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] " "pte[%lx] error[%lx]\n", sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, @@ -1927,9 +1927,10 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); - print_symbol(KERN_EMERG "SUN4V-DTLB: TPC<%s>\n", regs->tpc); + printk(KERN_EMERG "SUN4V-DTLB: TPC<%pS>\n", (void *) regs->tpc); printk(KERN_EMERG "SUN4V-DTLB: O7[%lx]\n", regs->u_regs[UREG_I7]); - print_symbol(KERN_EMERG "SUN4V-DTLB: O7<%s>\n", regs->u_regs[UREG_I7]); + printk(KERN_EMERG "SUN4V-DTLB: O7<%pS>\n", + (void *) regs->u_regs[UREG_I7]); printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] " "pte[%lx] error[%lx]\n", sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, @@ -2111,10 +2112,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) fp = ksp + STACK_BIAS; thread_base = (unsigned long) tp; - printk("Call Trace:"); -#ifdef CONFIG_KALLSYMS - printk("\n"); -#endif + printk("Call Trace:\n"); do { struct sparc_stackf *sf; struct pt_regs *regs; @@ -2137,12 +2135,8 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) fp = (unsigned long)sf->fp + STACK_BIAS; } - printk(" [%016lx] ", pc); - print_symbol("%s\n", pc); + printk(" [%016lx] %pS\n", pc, (void *) pc); } while (++count < 16); -#ifndef CONFIG_KALLSYMS - printk("\n"); -#endif } void dump_stack(void) @@ -2211,9 +2205,8 @@ void die_if_kernel(char *str, struct pt_regs *regs) while (rw && count++ < 30&& is_kernel_stack(current, rw)) { - printk("Caller[%016lx]", rw->ins[7]); - print_symbol(": %s", rw->ins[7]); - printk("\n"); + printk("Caller[%016lx]: %pS\n", rw->ins[7], + (void *) rw->ins[7]); rw = kernel_stack_up(rw); } diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index afa7fc4f519..203ddfad9f2 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c @@ -2,7 +2,7 @@ * unaligned.c: Unaligned load/store trap handling with special * cases for the kernel to do them more quickly. * - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996,2008 David S. Miller (davem@davemloft.net) * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ @@ -20,7 +20,6 @@ #include #include #include -#include #include /* #define DEBUG_MNA */ @@ -289,8 +288,8 @@ static void log_unaligned(struct pt_regs *regs) if (count < 5) { last_time = jiffies; count++; - printk("Kernel unaligned access at TPC[%lx] ", regs->tpc); - print_symbol("%s\n", regs->tpc); + printk("Kernel unaligned access at TPC[%lx] %pS\n", + regs->tpc, (void *) regs->tpc); } } diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index 236f4d228d2..ea7d7ae76bc 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -1,7 +1,7 @@ /* * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc. * - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net) * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) */ @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -115,7 +114,7 @@ static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", regs->tpc); printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]); - print_symbol("RPC: <%s>\n", regs->u_regs[15]); + printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]); printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr); dump_stack(); unhandled_fault(regs->tpc, current, regs); -- cgit v1.2.3 From d172ad18f9914f70c761a6cad470efc986d5e07e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 23:44:50 -0700 Subject: sparc64: Convert to generic helpers for IPI function calls. Signed-off-by: David S. Miller --- arch/sparc64/Kconfig | 1 + arch/sparc64/kernel/smp.c | 87 ++++++++----------------------------- arch/sparc64/kernel/sparc64_ksyms.c | 2 - arch/sparc64/kernel/ttable.S | 7 ++- arch/sparc64/mm/ultra.S | 5 +++ 5 files changed, 29 insertions(+), 73 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 794d22fdf46..1aeb1da9829 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -16,6 +16,7 @@ config SPARC64 select HAVE_IDE select HAVE_LMB select HAVE_ARCH_KGDB + select USE_GENERIC_SMP_HELPERS if SMP config GENERIC_TIME bool diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index c099d96f123..7cf72b4bb10 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -788,89 +788,36 @@ static void smp_start_sync_tick_client(int cpu) 0, 0, 0, mask); } -/* Send cross call to all processors except self. */ -#define smp_cross_call(func, ctx, data1, data2) \ - smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map) - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t finished; - int wait; -}; - -static struct call_data_struct *call_data; - extern unsigned long xcall_call_function; -/** - * smp_call_function(): Run a function on all other CPUs. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. Does not return until - * remote CPUs are nearly ready to execute <> or are or have executed. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -static int sparc64_smp_call_function_mask(void (*func)(void *info), void *info, - int wait, cpumask_t mask) +void arch_send_call_function_ipi(cpumask_t mask) { - struct call_data_struct data; - int cpus; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - data.func = func; - data.info = info; - atomic_set(&data.finished, 0); - data.wait = wait; - - spin_lock(&call_lock); - - cpu_clear(smp_processor_id(), mask); - cpus = cpus_weight(mask); - if (!cpus) - goto out_unlock; - - call_data = &data; - mb(); - smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask); +} - /* Wait for response */ - while (atomic_read(&data.finished) != cpus) - cpu_relax(); +extern unsigned long xcall_call_function_single; -out_unlock: - spin_unlock(&call_lock); +void arch_send_call_function_single_ipi(int cpu) +{ + cpumask_t mask = cpumask_of_cpu(cpu); - return 0; + smp_cross_call_masked(&xcall_call_function_single, 0, 0, 0, mask); } -int smp_call_function(void (*func)(void *info), void *info, int wait) -{ - return sparc64_smp_call_function_mask(func, info, wait, cpu_online_map); -} +/* Send cross call to all processors except self. */ +#define smp_cross_call(func, ctx, data1, data2) \ + smp_cross_call_masked(func, ctx, data1, data2, cpu_online_map) void smp_call_function_client(int irq, struct pt_regs *regs) { - void (*func) (void *info) = call_data->func; - void *info = call_data->info; + clear_softint(1 << irq); + generic_smp_call_function_interrupt(); +} +void smp_call_function_single_client(int irq, struct pt_regs *regs) +{ clear_softint(1 << irq); - if (call_data->wait) { - /* let initiator proceed only after completion */ - func(info); - atomic_inc(&call_data->finished); - } else { - /* let initiator proceed after getting data */ - atomic_inc(&call_data->finished); - func(info); - } + generic_smp_call_function_single_interrupt(); } static void tsb_sync(void *info) @@ -890,7 +837,7 @@ static void tsb_sync(void *info) void smp_tsb_sync(struct mm_struct *mm) { - sparc64_smp_call_function_mask(tsb_sync, mm, 1, mm->cpu_vm_mask); + smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1); } extern unsigned long xcall_flush_tlb_mm; diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 49d3ea50c24..504e678ee12 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -108,8 +108,6 @@ EXPORT_SYMBOL(__read_unlock); EXPORT_SYMBOL(__write_lock); EXPORT_SYMBOL(__write_unlock); EXPORT_SYMBOL(__write_trylock); - -EXPORT_SYMBOL(smp_call_function); #endif /* CONFIG_SMP */ #ifdef CONFIG_MCOUNT diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 450053af039..1ade3d6fb7f 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S @@ -58,7 +58,12 @@ tl0_irq3: BTRAP(0x43) tl0_irq4: BTRAP(0x44) #endif tl0_irq5: TRAP_IRQ(handler_irq, 5) -tl0_irq6: BTRAP(0x46) BTRAP(0x47) BTRAP(0x48) BTRAP(0x49) +#ifdef CONFIG_SMP +tl0_irq6: TRAP_IRQ(smp_call_function_single_client, 6) +#else +tl0_irq6: BTRAP(0x46) +#endif +tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49) tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d) tl0_irq14: TRAP_IRQ(timer_interrupt, 14) tl0_irq15: TRAP_IRQ(handler_irq, 15) diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 9bb2d90a9df..4c8ca131ffa 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -688,6 +688,11 @@ xcall_call_function: wr %g0, (1 << PIL_SMP_CALL_FUNC), %set_softint retry + .globl xcall_call_function_single +xcall_call_function_single: + wr %g0, (1 << PIL_SMP_CALL_FUNC_SNGL), %set_softint + retry + .globl xcall_receive_signal xcall_receive_signal: wr %g0, (1 << PIL_SMP_RECEIVE_SIGNAL), %set_softint -- cgit v1.2.3 From f7fe93344fd3f4ccd406a35f751a61b77f94b0fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Jul 2008 23:43:55 -0700 Subject: sparc64: Remove 4MB and 512K base page size options. Adrian Bunk reported that enabling 4MB page size breaks the build. The problem is that MAX_ORDER combined with the page shift exceeds the SECTION_SIZE_BITS we use in asm-sparc64/sparsemem.h There are several ways I suppose we could work around this. For one we could define a CONFIG_FORCE_MAX_ZONEORDER to decrease MAX_ORDER in these higher page size cases. But I also know that these page size cases are broken wrt. TLB miss handling especially on pre-hypervisor systems, and there isn't an easy way to fix that. These options were meant to be fun experimental hacks anyways, and only 8K and 64K make any sense to support. So remove 512K and 4M base page size support. Of course, we still support these page sizes for huge pages. Signed-off-by: David S. Miller --- arch/sparc64/Kconfig | 11 +---------- arch/sparc64/lib/copy_page.S | 6 +++--- arch/sparc64/mm/tsb.c | 6 ------ 3 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 1aeb1da9829..7c88263256a 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -98,19 +98,11 @@ config SPARC64_PAGE_SIZE_8KB 8KB and 64KB work quite well, since SPARC ELF sections provide for up to 64KB alignment. - Therefore, 512KB and 4MB are for expert hackers only. - If you don't know what to do, choose 8KB. config SPARC64_PAGE_SIZE_64KB bool "64KB" -config SPARC64_PAGE_SIZE_512KB - bool "512KB" - -config SPARC64_PAGE_SIZE_4MB - bool "4MB" - endchoice config SECCOMP @@ -226,11 +218,10 @@ config HUGETLB_PAGE_SIZE_4MB bool "4MB" config HUGETLB_PAGE_SIZE_512K - depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB bool "512K" config HUGETLB_PAGE_SIZE_64K - depends on !SPARC64_PAGE_SIZE_4MB && !SPARC64_PAGE_SIZE_512KB && !SPARC64_PAGE_SIZE_64KB + depends on !SPARC64_PAGE_SIZE_64KB bool "64K" endchoice diff --git a/arch/sparc64/lib/copy_page.S b/arch/sparc64/lib/copy_page.S index 37460666a5c..b243d3b606b 100644 --- a/arch/sparc64/lib/copy_page.S +++ b/arch/sparc64/lib/copy_page.S @@ -25,9 +25,9 @@ #define DCACHE_SIZE (PAGE_SIZE * 2) -#if (PAGE_SHIFT == 13) || (PAGE_SHIFT == 19) +#if (PAGE_SHIFT == 13) #define PAGE_SIZE_REM 0x80 -#elif (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) +#elif (PAGE_SHIFT == 16) #define PAGE_SIZE_REM 0x100 #else #error Wrong PAGE_SHIFT specified @@ -198,7 +198,7 @@ cheetah_copy_page_insn: cmp %o2, PAGE_SIZE_REM bne,pt %xcc, 1b add %o0, 0x40, %o0 -#if (PAGE_SHIFT == 16) || (PAGE_SHIFT == 22) +#if (PAGE_SHIFT == 16) TOUCH(f0, f2, f4, f6, f8, f10, f12, f14) ldda [%o1] ASI_BLK_P, %f32 stda %f48, [%o0] %asi diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index fe70c8a557b..3547937b17a 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -96,12 +96,6 @@ void flush_tsb_user(struct mmu_gather *mp) #elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K -#elif defined(CONFIG_SPARC64_PAGE_SIZE_512KB) -#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_512K -#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_512K -#elif defined(CONFIG_SPARC64_PAGE_SIZE_4MB) -#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_4MB -#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_4MB #else #error Broken base page size setting... #endif -- cgit v1.2.3 From 432e8765f0206de5bbddcbd4eb1d9611c79b1eaa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jul 2008 00:43:52 -0700 Subject: sparc64: Add missing hypervisor service group numbers. Signed-off-by: David S. Miller --- arch/sparc64/kernel/hvapi.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/sparc64/kernel/hvapi.c b/arch/sparc64/kernel/hvapi.c index f34f5d6181e..691760b5b01 100644 --- a/arch/sparc64/kernel/hvapi.c +++ b/arch/sparc64/kernel/hvapi.c @@ -34,8 +34,12 @@ static struct api_info api_table[] = { { .group = HV_GRP_LDOM, }, { .group = HV_GRP_SVC_CHAN, .flags = FLAG_PRE_API }, { .group = HV_GRP_NCS, .flags = FLAG_PRE_API }, + { .group = HV_GRP_RNG, }, { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, { .group = HV_GRP_FIRE_PERF, }, + { .group = HV_GRP_N2_CPU, }, + { .group = HV_GRP_NIU, }, + { .group = HV_GRP_VF_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, }; -- cgit v1.2.3 From e7eb32eb3d28788fd66c233618e3f7fbe7beb9e4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Jul 2008 01:49:06 -0700 Subject: sparc64: Update defconfig. Signed-off-by: David S. Miller --- arch/sparc64/defconfig | 145 ++++++++++++++++++++++++++----------------------- 1 file changed, 76 insertions(+), 69 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 76eb832527f..82cab5cc807 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.26-rc2 -# Fri May 16 13:36:07 2008 +# Linux kernel version: 2.6.26 +# Fri Jul 18 00:47:07 2008 # CONFIG_SPARC=y CONFIG_SPARC64=y @@ -22,18 +22,6 @@ CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_ARCH_NO_VIRT_TO_BUS=y CONFIG_OF=y CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y -CONFIG_SPARC64_PAGE_SIZE_8KB=y -# CONFIG_SPARC64_PAGE_SIZE_64KB is not set -# CONFIG_SPARC64_PAGE_SIZE_512KB is not set -# CONFIG_SPARC64_PAGE_SIZE_4MB is not set -CONFIG_SECCOMP=y -CONFIG_HZ_100=y -# CONFIG_HZ_250 is not set -# CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=100 -# CONFIG_SCHED_HRTICK is not set -CONFIG_HOTPLUG_CPU=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -105,6 +93,7 @@ CONFIG_KRETPROBES=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y # CONFIG_HAVE_DMA_ATTRS is not set +CONFIG_USE_GENERIC_SMP_HELPERS=y CONFIG_PROC_PAGE_MONITOR=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -121,6 +110,7 @@ CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_BSG=y +# CONFIG_BLK_DEV_INTEGRITY is not set CONFIG_BLOCK_COMPAT=y # @@ -136,11 +126,21 @@ CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_CLASSIC_RCU=y -CONFIG_GENERIC_HARDIRQS=y # -# General machine setup +# Processor type and features # +CONFIG_SPARC64_PAGE_SIZE_8KB=y +# CONFIG_SPARC64_PAGE_SIZE_64KB is not set +CONFIG_SECCOMP=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=100 +# CONFIG_SCHED_HRTICK is not set +CONFIG_HOTPLUG_CPU=y +CONFIG_GENERIC_HARDIRQS=y CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -342,6 +342,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set @@ -366,6 +368,7 @@ CONFIG_CDROM_PKTCDVD_BUFFERS=8 CONFIG_CDROM_PKTCDVD_WCACHE=y CONFIG_ATA_OVER_ETH=m CONFIG_SUNVDC=m +# CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y # CONFIG_PHANTOM is not set # CONFIG_EEPROM_93CX6 is not set @@ -379,6 +382,7 @@ CONFIG_BLK_DEV_IDE=y # # Please see Documentation/ide/ide.txt for help/info on IDE drives # +CONFIG_IDE_TIMINGS=y # CONFIG_BLK_DEV_IDE_SATA is not set CONFIG_BLK_DEV_IDEDISK=y # CONFIG_IDEDISK_MULTI_MODE is not set @@ -429,8 +433,6 @@ CONFIG_BLK_DEV_ALI15X3=y # CONFIG_BLK_DEV_VIA82CXXX is not set # CONFIG_BLK_DEV_TC86C001 is not set CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_BLK_DEV_HD_ONLY is not set -# CONFIG_BLK_DEV_HD is not set # # SCSI device support @@ -504,6 +506,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_SUNESP is not set # CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_DH is not set # CONFIG_ATA is not set CONFIG_MD=y CONFIG_BLK_DEV_MD=m @@ -529,6 +532,10 @@ CONFIG_DM_ZERO=m # # IEEE 1394 (FireWire) support # + +# +# Enable only one of the two stacks, unless you know what you are doing +# # CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set # CONFIG_I2O is not set @@ -745,7 +752,8 @@ CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set -# CONFIG_HW_RANDOM is not set +CONFIG_HW_RANDOM=m +CONFIG_HW_RANDOM_N2RNG=m # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_RAW_DRIVER is not set @@ -759,38 +767,58 @@ CONFIG_I2C_ALGOBIT=y # # I2C Hardware Bus support # + +# +# PC SMBus host controller drivers +# # CONFIG_I2C_ALI1535 is not set # CONFIG_I2C_ALI1563 is not set # CONFIG_I2C_ALI15X3 is not set # CONFIG_I2C_AMD756 is not set # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set -# CONFIG_I2C_I810 is not set +# CONFIG_I2C_ISCH is not set # CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_NFORCE2 is not set -# CONFIG_I2C_OCORES is not set -# CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PROSAVAGE is not set -# CONFIG_I2C_SAVAGE4 is not set -# CONFIG_I2C_SIMTEC is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_TAOS_EVM is not set -# CONFIG_I2C_STUB is not set -# CONFIG_I2C_TINY_USB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set +# CONFIG_I2C_TINY_USB is not set + +# +# Graphics adapter I2C/DDC channel drivers +# # CONFIG_I2C_VOODOO3 is not set + +# +# Other I2C/SMBus bus drivers +# # CONFIG_I2C_PCA_PLATFORM is not set +# CONFIG_I2C_STUB is not set # # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set +# CONFIG_AT24 is not set # CONFIG_SENSORS_EEPROM is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCA9539 is not set # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_MAX6875 is not set # CONFIG_SENSORS_TSL2550 is not set @@ -856,6 +884,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_W83627EHF is not set # CONFIG_HWMON_DEBUG_CHIP is not set # CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set # CONFIG_WATCHDOG is not set # @@ -985,15 +1014,7 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set # CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_LOGO_SUN_CLUT224=y - -# -# Sound -# CONFIG_SOUND=m - -# -# Advanced Linux Sound Architecture -# CONFIG_SND=m CONFIG_SND_TIMER=m CONFIG_SND_PCM=m @@ -1010,21 +1031,17 @@ CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set - -# -# Generic devices -# +CONFIG_SND_VMASTER=y CONFIG_SND_MPU401_UART=m CONFIG_SND_AC97_CODEC=m +CONFIG_SND_DRIVERS=y CONFIG_SND_DUMMY=m CONFIG_SND_VIRMIDI=m CONFIG_SND_MTPAV=m # CONFIG_SND_SERIAL_U16550 is not set # CONFIG_SND_MPU401 is not set - -# -# PCI devices -# +# CONFIG_SND_AC97_POWER_SAVE is not set +CONFIG_SND_PCI=y # CONFIG_SND_AD1889 is not set # CONFIG_SND_ALS300 is not set CONFIG_SND_ALI5451=m @@ -1084,37 +1101,14 @@ CONFIG_SND_ALI5451=m # CONFIG_SND_VIRTUOSO is not set # CONFIG_SND_VX222 is not set # CONFIG_SND_YMFPCI is not set -# CONFIG_SND_AC97_POWER_SAVE is not set - -# -# USB devices -# +CONFIG_SND_USB=y # CONFIG_SND_USB_AUDIO is not set # CONFIG_SND_USB_CAIAQ is not set - -# -# ALSA Sparc devices -# +CONFIG_SND_SPARC=y # CONFIG_SND_SUN_AMD7930 is not set CONFIG_SND_SUN_CS4231=m # CONFIG_SND_SUN_DBRI is not set - -# -# System on Chip audio support -# # CONFIG_SND_SOC is not set - -# -# ALSA SoC audio for Freescale SOCs -# - -# -# SoC Audio for the Texas Instruments OMAP -# - -# -# Open Sound System -# # CONFIG_SOUND_PRIME is not set CONFIG_AC97_BUS=m CONFIG_HID_SUPPORT=y @@ -1167,6 +1161,7 @@ CONFIG_USB_UHCI_HCD=m # # CONFIG_USB_ACM is not set # CONFIG_USB_PRINTER is not set +# CONFIG_USB_WDM is not set # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -1226,6 +1221,7 @@ CONFIG_USB_STORAGE=m # CONFIG_USB_TRANCEVIBRATOR is not set # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set +# CONFIG_USB_ISIGHTFW is not set # CONFIG_USB_GADGET is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set @@ -1420,6 +1416,12 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set +CONFIG_HAVE_FTRACE=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +# CONFIG_FTRACE is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1486,6 +1488,10 @@ CONFIG_CRYPTO_CRC32C=m CONFIG_CRYPTO_MD4=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m @@ -1527,6 +1533,7 @@ CONFIG_BITREVERSE=y # CONFIG_GENERIC_FIND_FIRST_BIT is not set CONFIG_CRC_CCITT=m CONFIG_CRC16=m +# CONFIG_CRC_T10DIF is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC7 is not set -- cgit v1.2.3 From 29cbeb0e17d9d2ca824f62f71cfa7360b3157112 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 17 Jul 2008 21:50:23 -0700 Subject: x86: use cpu_clear in remove_cpu_from_maps Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 687376ab07e..27456574f07 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1311,7 +1311,7 @@ static void __ref remove_cpu_from_maps(int cpu) cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); /* was set by cpu_init() */ - clear_bit(cpu, (unsigned long *)&cpu_initialized); + cpu_clear(cpu, cpu_initialized); numa_remove_cpu(cpu); } -- cgit v1.2.3 From 95c7c23b06bc92f1772b9c9460845f179ba8c39e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 15 Jul 2008 13:42:34 -0700 Subject: xen: report hypervisor version Various versions of the hypervisor have differences in what ABIs and features they support. Print some details into the boot log to help with remote debugging. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bb508456ef5..5328e46d9cf 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -167,10 +167,14 @@ void xen_vcpu_restore(void) static void __init xen_banner(void) { + unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); + struct xen_extraversion extra; + HYPERVISOR_xen_version(XENVER_extraversion, &extra); + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", pv_info.name); - printk(KERN_INFO "Hypervisor signature: %s%s\n", - xen_start_info->magic, + printk(KERN_INFO "Xen version: %d.%d%s%s\n", + version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } -- cgit v1.2.3 From fbdb7da91b0382d4b148d8b43c2eb4bab642bb5b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 14 Jul 2008 15:34:09 -0700 Subject: x86_64: ia32_signal.c: use macro instead of immediate Make and use macro FIX_EFLAGS, instead of immediate value 0x40DD5 in ia32_restore_sigcontext(). Signed-off-by: Hiroshi Shimamoto Acked-by: "H. Peter Anvin" Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index cb3856a18c8..dc9b9b9803f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -36,6 +36,11 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ + X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ + X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ + X86_EFLAGS_CF) + asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); void signal_fault(struct pt_regs *regs, void __user *frame, char *where); @@ -248,7 +253,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, regs->ss |= 3; err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); /* disable syscall checks */ regs->orig_ax = -1; -- cgit v1.2.3 From 1f067167a83d1c7f80437fd1d32b55508aaca009 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 15 Jul 2008 00:02:28 -0700 Subject: x86: seperate memtest from init_64.c it's separate functionality that deserves its own file. This also prepares 32-bit memtest support. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/Makefile | 1 + arch/x86/mm/init_64.c | 112 --------------------------------------------- arch/x86/mm/memtest.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 112 deletions(-) create mode 100644 arch/x86/mm/memtest.c (limited to 'arch') diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 9873716e9f7..1fbb844c3d7 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.o endif obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o +obj-$(CONFIG_MEMTEST) += memtest.o diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 306049edd55..ec37121f670 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -517,118 +517,6 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -#ifdef CONFIG_MEMTEST - -static void __init memtest(unsigned long start_phys, unsigned long size, - unsigned pattern) -{ - unsigned long i; - unsigned long *start; - unsigned long start_bad; - unsigned long last_bad; - unsigned long val; - unsigned long start_phys_aligned; - unsigned long count; - unsigned long incr; - - switch (pattern) { - case 0: - val = 0UL; - break; - case 1: - val = -1UL; - break; - case 2: - val = 0x5555555555555555UL; - break; - case 3: - val = 0xaaaaaaaaaaaaaaaaUL; - break; - default: - return; - } - - incr = sizeof(unsigned long); - start_phys_aligned = ALIGN(start_phys, incr); - count = (size - (start_phys_aligned - start_phys))/incr; - start = __va(start_phys_aligned); - start_bad = 0; - last_bad = 0; - - for (i = 0; i < count; i++) - start[i] = val; - for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { - if (*start != val) { - if (start_phys_aligned == last_bad + incr) { - last_bad += incr; - } else { - if (start_bad) { - printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved", - val, start_bad, last_bad + incr); - reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); - } - start_bad = last_bad = start_phys_aligned; - } - } - } - if (start_bad) { - printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved", - val, start_bad, last_bad + incr); - reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); - } - -} - -/* default is disabled */ -static int memtest_pattern __initdata; - -static int __init parse_memtest(char *arg) -{ - if (arg) - memtest_pattern = simple_strtoul(arg, NULL, 0); - return 0; -} - -early_param("memtest", parse_memtest); - -static void __init early_memtest(unsigned long start, unsigned long end) -{ - u64 t_start, t_size; - unsigned pattern; - - if (!memtest_pattern) - return; - - printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); - for (pattern = 0; pattern < memtest_pattern; pattern++) { - t_start = start; - t_size = 0; - while (t_start < end) { - t_start = find_e820_area_size(t_start, &t_size, 1); - - /* done ? */ - if (t_start >= end) - break; - if (t_start + t_size > end) - t_size = end - t_start; - - printk(KERN_CONT "\n %016llx - %016llx pattern %d", - (unsigned long long)t_start, - (unsigned long long)t_start + t_size, pattern); - - memtest(t_start, t_size, pattern); - - t_start += t_size; - } - } - printk(KERN_CONT "\n"); -} -#else -static void __init early_memtest(unsigned long start, unsigned long end) -{ -} -#endif - static unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c new file mode 100644 index 00000000000..672e17f8262 --- /dev/null +++ b/arch/x86/mm/memtest.c @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void __init memtest(unsigned long start_phys, unsigned long size, + unsigned pattern) +{ + unsigned long i; + unsigned long *start; + unsigned long start_bad; + unsigned long last_bad; + unsigned long val; + unsigned long start_phys_aligned; + unsigned long count; + unsigned long incr; + + switch (pattern) { + case 0: + val = 0UL; + break; + case 1: + val = -1UL; + break; + case 2: +#ifdef CONFIG_X86_64 + val = 0x5555555555555555UL; +#else + val = 0x55555555UL; +#endif + break; + case 3: +#ifdef CONFIG_X86_64 + val = 0xaaaaaaaaaaaaaaaaUL; +#else + val = 0xaaaaaaaaUL; +#endif + break; + default: + return; + } + + incr = sizeof(unsigned long); + start_phys_aligned = ALIGN(start_phys, incr); + count = (size - (start_phys_aligned - start_phys))/incr; + start = __va(start_phys_aligned); + start_bad = 0; + last_bad = 0; + + for (i = 0; i < count; i++) + start[i] = val; + for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { + if (*start != val) { + if (start_phys_aligned == last_bad + incr) { + last_bad += incr; + } else { + if (start_bad) { + printk(KERN_CONT "\n %010lx bad mem addr %010lx - %010lx reserved", + val, start_bad, last_bad + incr); + reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); + } + start_bad = last_bad = start_phys_aligned; + } + } + } + if (start_bad) { + printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", + val, start_bad, last_bad + incr); + reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); + } + +} + +/* default is disabled */ +static int memtest_pattern __initdata; + +static int __init parse_memtest(char *arg) +{ + if (arg) + memtest_pattern = simple_strtoul(arg, NULL, 0); + return 0; +} + +early_param("memtest", parse_memtest); + +void __init early_memtest(unsigned long start, unsigned long end) +{ + u64 t_start, t_size; + unsigned pattern; + + if (!memtest_pattern) + return; + + printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); + for (pattern = 0; pattern < memtest_pattern; pattern++) { + t_start = start; + t_size = 0; + while (t_start < end) { + t_start = find_e820_area_size(t_start, &t_size, 1); + + /* done ? */ + if (t_start >= end) + break; + if (t_start + t_size > end) + t_size = end - t_start; + + printk(KERN_CONT "\n %010llx - %010llx pattern %d", + (unsigned long long)t_start, + (unsigned long long)t_start + t_size, pattern); + + memtest(t_start, t_size, pattern); + + t_start += t_size; + } + } + printk(KERN_CONT "\n"); +} -- cgit v1.2.3 From caadbdce240c43e3e46c82fce6c00eb7f01e1beb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 15 Jul 2008 00:03:44 -0700 Subject: x86: enable memory tester support on 32-bit only supports memory below max_low_pfn. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - arch/x86/mm/init_32.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 96e0c2ebc38..03980cb0429 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -447,7 +447,6 @@ config PARAVIRT_DEBUG config MEMTEST bool "Memtest" - depends on X86_64 help This option adds a kernel parameter 'memtest', which allows memtest to be set. diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9689a5138e6..3eeab6d0065 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -844,6 +844,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, reserve_early(table_start << PAGE_SHIFT, table_end << PAGE_SHIFT, "PGTABLE"); + if (!after_init_bootmem) + early_memtest(start, end); + return end >> PAGE_SHIFT; } -- cgit v1.2.3 From 78cbac65fd77242f3e5d77f4d7a71e8bc869fe4d Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Thu, 10 Jul 2008 21:14:52 +0200 Subject: x86: traps_xx: refactor die() like in x86_64 Make the diff between the traps_32.c and traps_64.c a bit smaller. Change traps_32.c to look more like traps_64.c: - move lock information to file scope - split out oops_begin() and oops_end() from die() - increment nest counter in oops_begin Only whitespace change in traps_64.c No functional changes intended. Signed-off-by: Alexander van Heukelum Acked-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 95 +++++++++++++++++++++++++--------------------- arch/x86/kernel/traps_64.c | 2 +- 2 files changed, 52 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 8a768973c4f..51cccde376a 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -383,6 +383,54 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; + +unsigned __kprobes long oops_begin(void) +{ + unsigned long flags; + + oops_enter(); + + if (die_owner != raw_smp_processor_id()) { + console_verbose(); + raw_local_irq_save(flags); + __raw_spin_lock(&die_lock); + die_owner = smp_processor_id(); + die_nest_count = 0; + bust_spinlocks(1); + } else { + raw_local_irq_save(flags); + } + die_nest_count++; + return flags; +} + +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); + + if (!regs) + return; + + if (kexec_should_crash(current)) + crash_kexec(regs); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) + panic("Fatal exception"); + + oops_exit(); + do_exit(signr); +} + int __kprobes __die(const char *str, struct pt_regs *regs, long err) { unsigned short ss; @@ -423,31 +471,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) */ void die(const char *str, struct pt_regs *regs, long err) { - static struct { - raw_spinlock_t lock; - u32 lock_owner; - int lock_owner_depth; - } die = { - .lock = __RAW_SPIN_LOCK_UNLOCKED, - .lock_owner = -1, - .lock_owner_depth = 0 - }; - unsigned long flags; - - oops_enter(); - - if (die.lock_owner != raw_smp_processor_id()) { - console_verbose(); - raw_local_irq_save(flags); - __raw_spin_lock(&die.lock); - die.lock_owner = smp_processor_id(); - die.lock_owner_depth = 0; - bust_spinlocks(1); - } else { - raw_local_irq_save(flags); - } + unsigned long flags = oops_begin(); - if (++die.lock_owner_depth < 3) { + if (die_nest_count < 3) { report_bug(regs->ip, regs); if (__die(str, regs, err)) @@ -456,26 +482,7 @@ void die(const char *str, struct pt_regs *regs, long err) printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); } - bust_spinlocks(0); - die.lock_owner = -1; - add_taint(TAINT_DIE); - __raw_spin_unlock(&die.lock); - raw_local_irq_restore(flags); - - if (!regs) - return; - - if (kexec_should_crash(current)) - crash_kexec(regs); - - if (in_interrupt()) - panic("Fatal exception in interrupt"); - - if (panic_on_oops) - panic("Fatal exception"); - - oops_exit(); - do_exit(SIGSEGV); + oops_end(flags, regs, SIGSEGV); } static inline void diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 2696a683778..babdbe673b7 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -518,7 +518,7 @@ unsigned __kprobes long oops_begin(void) } void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ +{ die_owner = -1; bust_spinlocks(0); die_nest_count--; -- cgit v1.2.3 From 7dedcee394a3f61475d08002bd12e8068d044216 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Thu, 10 Jul 2008 21:16:39 +0200 Subject: x86: traps_xx: modify x86_64 to use _log_lvl variants i386 has show_trace_log_lvl and show_stack_log_lvl, allowing traces to be emitted with log-level annotations. This patch introduces them to x86_64, but log_lvl is only ever set to an empty string. Output of traces is unchanged. i386-chunk is whitespace-only. Signed-off-by: Alexander van Heukelum Cc: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 2 +- arch/x86/kernel/traps_64.c | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 51cccde376a..c971dce3847 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -256,7 +256,7 @@ static const struct stacktrace_ops print_trace_ops = { static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) + unsigned long *stack, unsigned long bp, char *log_lvl) { dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); printk("%s =======================\n", log_lvl); diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index babdbe673b7..c664e696200 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -355,17 +355,24 @@ static const struct stacktrace_ops print_trace_ops = { .address = print_trace_address, }; -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) +static void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl) { printk("\nCall Trace:\n"); - dump_trace(task, regs, stack, bp, &print_trace_ops, NULL); + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); printk("\n"); } +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp) +{ + show_trace_log_lvl(task, regs, stack, bp, ""); +} + static void -_show_stack(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp) +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -399,12 +406,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } - show_trace(task, regs, sp, bp); + show_trace_log_lvl(task, regs, sp, bp, log_lvl); } void show_stack(struct task_struct *task, unsigned long *sp) { - _show_stack(task, NULL, sp, 0); + show_stack_log_lvl(task, NULL, sp, 0, ""); } /* @@ -454,7 +461,8 @@ void show_registers(struct pt_regs *regs) u8 *ip; printk("Stack: "); - _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); + show_stack_log_lvl(NULL, regs, (unsigned long *)sp, + regs->bp, ""); printk("\n"); printk(KERN_EMERG "Code: "); -- cgit v1.2.3 From 3f9b5cc018566ad9562df0648395649aebdbc5e0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Jul 2008 16:30:05 +0200 Subject: x86: re-enable OPTIMIZE_INLINING re-enable OPTIMIZE_INLINING more widely. Jeff Dike fixed the remaining outstanding issue in this commit: | commit 4f81c5350b44bcc501ab6f8a089b16d064b4d2f6 | Author: Jeff Dike | Date: Mon Jul 7 13:36:56 2008 -0400 | | [UML] fix gcc ICEs and unresolved externs [...] | This patch reintroduces unit-at-a-time for gcc >= 4.0, bringing back the | possibility of Uli's crash. If that happens, we'll debug it. it's still default-off and thus opt-in. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ae36bfa814e..ffd5913b35d 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -287,7 +287,6 @@ config CPA_DEBUG config OPTIMIZE_INLINING bool "Allow gcc to uninline functions marked 'inline'" - depends on BROKEN help This option determines if the kernel forces gcc to inline the functions developers have marked 'inline'. Doing so takes away freedom from gcc to @@ -298,5 +297,7 @@ config OPTIMIZE_INLINING become the default in the future, until then this option is there to test gcc for this. + If unsure, say N. + endmenu -- cgit v1.2.3 From 8b2b9c1af065a45ef00c26964420489a53581779 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 15 Jul 2008 17:09:03 +0900 Subject: x86, intel_cacheinfo: fix use-after-free cache_kobject This avoids calling kobject_uevent() with cache_kobject that has already been deallocated in an error path. Signed-off-by: Akinobu Mita Cc: "H. Peter Anvin" Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 2c8afafa18e..ff517f0b8cc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -780,15 +780,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_put(per_cpu(cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); - break; + return retval; } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } - if (!retval) - cpu_set(cpu, cache_dev_map); + cpu_set(cpu, cache_dev_map); kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); - return retval; + return 0; } static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) -- cgit v1.2.3 From 47129654226b5bd418afe533ce4e11d6a0b6d6e4 Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Sun, 6 Jul 2008 20:13:49 +0400 Subject: x86 setup.c: cleanup includes x86: remove double includes in setup.c Signed-off-by: Alexander Beregalov Cc: yhlu.kernel@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 531b55b8e81..4a2b8acc1d9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -57,12 +57,8 @@ #include #include #include -#include #include -#include -#include -#include #include #include #include @@ -104,7 +100,6 @@ #include #include -#include #include #include #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 9781f39fd209cd93ab98b669814191acc67f32fd Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 10 Jul 2008 17:13:19 +0200 Subject: x86: consolidate the definition of the force_mwait variable The force_mwait variable iss defined either in arch/x86/kernel/cpu/amd.c or in arch/x86/kernel/setup_64.c, but it is only initialized and used in arch/x86/kernel/process.c. This patch moves the declaration to arch/x86/kernel/process.c. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 2 -- arch/x86/kernel/process.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 81a07ca65d4..cae9cabc303 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,8 +24,6 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -int force_mwait __cpuinitdata; - static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { if (cpuid_eax(0x80000000) >= 0x80000007) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4d629c62f4f..74f2d196adb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,6 +15,7 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; +static int force_mwait __cpuinitdata; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { -- cgit v1.2.3 From 5ff4789d045cdaec7629e027e4f8ff8e34308b81 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 14 Jul 2008 20:11:18 +0200 Subject: AMD IOMMU: set iommu for device from ACPI code too The device<->iommu relationship has to be set from the information in the ACPI table too. This patch adds this logic to the driver. Signed-off-by: Joerg Roedel Cc: iommu@lists.linux-foundation.org Cc: bhavna.sarathy@amd.com Cc: robert.richter@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/amd_iommu_init.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 9bf1b8111b0..7661b02d720 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -426,11 +426,18 @@ static void set_dev_entry_bit(u16 devid, u8 bit) amd_iommu_dev_table[devid].data[i] |= (1 << _bit); } +/* Writes the specific IOMMU for a device into the rlookup table */ +static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) +{ + amd_iommu_rlookup_table[devid] = iommu; +} + /* * This function takes the device specific flags read from the ACPI * table and sets up the device table entry with that information */ -static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) +static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, + u16 devid, u32 flags, u32 ext_flags) { if (flags & ACPI_DEVFLAG_INITPASS) set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); @@ -446,12 +453,8 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); if (flags & ACPI_DEVFLAG_LINT1) set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); -} -/* Writes the specific IOMMU for a device into the rlookup table */ -static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) -{ - amd_iommu_rlookup_table[devid] = iommu; + set_iommu_for_device(iommu, devid); } /* @@ -550,11 +553,12 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_ALL: for (dev_i = iommu->first_device; dev_i <= iommu->last_device; ++dev_i) - set_dev_entry_from_acpi(dev_i, e->flags, 0); + set_dev_entry_from_acpi(iommu, dev_i, + e->flags, 0); break; case IVHD_DEV_SELECT: devid = e->devid; - set_dev_entry_from_acpi(devid, e->flags, 0); + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); break; case IVHD_DEV_SELECT_RANGE_START: devid_start = e->devid; @@ -565,7 +569,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, case IVHD_DEV_ALIAS: devid = e->devid; devid_to = e->ext >> 8; - set_dev_entry_from_acpi(devid, e->flags, 0); + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); amd_iommu_alias_table[devid] = devid_to; break; case IVHD_DEV_ALIAS_RANGE: @@ -577,7 +581,8 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, break; case IVHD_DEV_EXT_SELECT: devid = e->devid; - set_dev_entry_from_acpi(devid, e->flags, e->ext); + set_dev_entry_from_acpi(iommu, devid, e->flags, + e->ext); break; case IVHD_DEV_EXT_SELECT_RANGE: devid_start = e->devid; @@ -590,7 +595,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, for (dev_i = devid_start; dev_i <= devid; ++dev_i) { if (alias) amd_iommu_alias_table[dev_i] = devid_to; - set_dev_entry_from_acpi( + set_dev_entry_from_acpi(iommu, amd_iommu_alias_table[dev_i], flags, ext_flags); } -- cgit v1.2.3 From 6ac8d51f01d345af5ea4209004a9ea29b2f20891 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Tue, 15 Jul 2008 21:09:13 +0530 Subject: x86: introducing asm-x86/traps.h Declaring x86 traps under one hood. Declaring x86 do_traps before defining them. Signed-off-by: Jaswinder Singh Cc: Andi Kleen Cc: David Woodhouse Cc: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 21 +-------------------- arch/x86/kernel/traps_64.c | 22 +--------------------- 2 files changed, 2 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index c971dce3847..03df8e45e5a 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -58,6 +58,7 @@ #include #include #include +#include #include "mach_traps.h" @@ -77,26 +78,6 @@ char ignore_fpu_irq; gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; -asmlinkage void divide_error(void); -asmlinkage void debug(void); -asmlinkage void nmi(void); -asmlinkage void int3(void); -asmlinkage void overflow(void); -asmlinkage void bounds(void); -asmlinkage void invalid_op(void); -asmlinkage void device_not_available(void); -asmlinkage void coprocessor_segment_overrun(void); -asmlinkage void invalid_TSS(void); -asmlinkage void segment_not_present(void); -asmlinkage void stack_segment(void); -asmlinkage void general_protection(void); -asmlinkage void page_fault(void); -asmlinkage void coprocessor_error(void); -asmlinkage void simd_coprocessor_error(void); -asmlinkage void alignment_check(void); -asmlinkage void spurious_interrupt_bug(void); -asmlinkage void machine_check(void); - int panic_on_unrecovered_nmi; int kstack_depth_to_print = 24; static unsigned int code_bytes = 64; diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index c664e696200..3f18d73f420 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -51,30 +51,10 @@ #include #include #include +#include #include -asmlinkage void divide_error(void); -asmlinkage void debug(void); -asmlinkage void nmi(void); -asmlinkage void int3(void); -asmlinkage void overflow(void); -asmlinkage void bounds(void); -asmlinkage void invalid_op(void); -asmlinkage void device_not_available(void); -asmlinkage void double_fault(void); -asmlinkage void coprocessor_segment_overrun(void); -asmlinkage void invalid_TSS(void); -asmlinkage void segment_not_present(void); -asmlinkage void stack_segment(void); -asmlinkage void general_protection(void); -asmlinkage void page_fault(void); -asmlinkage void coprocessor_error(void); -asmlinkage void simd_coprocessor_error(void); -asmlinkage void alignment_check(void); -asmlinkage void spurious_interrupt_bug(void); -asmlinkage void machine_check(void); - int panic_on_unrecovered_nmi; int kstack_depth_to_print = 12; static unsigned int code_bytes = 64; -- cgit v1.2.3 From 1181f8b5f0302580af0958169ef4497c3eb57a61 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 3 Jul 2008 13:12:13 -0700 Subject: x86_32: remove redundant KERN_INFO This printk has a KERN_ facility level in the format string. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d9237363096..d633d801f85 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) badframe: if (show_unhandled_signals && printk_ratelimit()) { - printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:" + printk("%s%s[%d] bad frame in sigreturn frame:" "%p ip:%lx sp:%lx oeax:%lx", task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, current->comm, task_pid_nr(current), frame, regs->ip, -- cgit v1.2.3 From fa10c51a04a43ced5fd6033f19a74d2c82198b34 Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Wed, 9 Jul 2008 22:28:24 +0400 Subject: arch/x86/kernel/cpu/common_64.c: remove double inclusions x86: remove double inclusions in arch/x86/kernel/cpu/common_64.c Signed-off-by: Alexander Beregalov Cc: yhlu.kernel@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7b8cc72feb4..2a4475beea4 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -7,12 +7,9 @@ #include #include #include -#include #include #include -#include #include -#include #include #include #include -- cgit v1.2.3 From 812b121d55316333a3480b294523d4e52f9dd366 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 16 Jul 2008 19:21:31 -0700 Subject: x86_64: ia32_signal.c: remove signal number conversion This was old code that was needed for iBCS and x86-64 never supported that. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index dc9b9b9803f..20af4c79579 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -520,7 +520,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, compat_sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - struct exec_domain *ed = current_thread_info()->exec_domain; void __user *restorer; int err = 0; @@ -543,8 +542,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - err |= __put_user((ed && ed->signal_invmap && sig < 32 - ? ed->signal_invmap[sig] : sig), &frame->sig); + err |= __put_user(sig, &frame->sig); err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); err |= copy_siginfo_to_user32(&frame->info, info); -- cgit v1.2.3 From f2ba93929fdb91fd806be20e959a50f7db82790e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Jul 2008 13:35:37 +0100 Subject: x86: check function status in EDD boot code Without checking the return value of get_edd_info() and adding the entry only in the success case, 6 devices show up under /sys/firmware/edd/, no matter how many devices are actually present. Signed-off-by: Jan Beulich Signed-off-by: H. Peter Anvin --- arch/x86/boot/edd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 03399d64013..d93cbc6464d 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -167,9 +167,8 @@ void query_edd(void) * Scan the BIOS-supported hard disks and query EDD * information... */ - get_edd_info(devno, &ei); - - if (boot_params.eddbuf_entries < EDDMAXNR) { + if (!get_edd_info(devno, &ei) + && boot_params.eddbuf_entries < EDDMAXNR) { memcpy(edp, &ei, sizeof ei); edp++; boot_params.eddbuf_entries++; -- cgit v1.2.3 From 369c99205f633d1e4038b15f5dc4a5500a4359c3 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Jul 2008 13:37:53 +0100 Subject: x86: fix two modpost warnings Even though it's only the difference of the two __initdata symbols that's being calculated, modpost still doesn't like this. So rather calculate the size once in an __init function and store it for later use. Signed-off-by: Jan Beulich Signed-off-by: H. Peter Anvin --- arch/x86/vdso/vma.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 19a6cfaf5db..257ba4a10ab 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -21,7 +21,8 @@ unsigned int __read_mostly vdso_enabled = 1; extern char vdso_start[], vdso_end[]; extern unsigned short vdso_sync_cpuid; -struct page **vdso_pages; +static struct page **vdso_pages; +static unsigned vdso_size; static inline void *var_ref(void *p, char *name) { @@ -38,6 +39,7 @@ static int __init init_vdso_vars(void) int i; char *vbase; + vdso_size = npages << PAGE_SHIFT; vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); if (!vdso_pages) goto oom; @@ -101,20 +103,19 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) struct mm_struct *mm = current->mm; unsigned long addr; int ret; - unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE); if (!vdso_enabled) return 0; down_write(&mm->mmap_sem); - addr = vdso_addr(mm->start_stack, len); - addr = get_unmapped_area(NULL, addr, len, 0, 0); + addr = vdso_addr(mm->start_stack, vdso_size); + addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } - ret = install_special_mapping(mm, addr, len, + ret = install_special_mapping(mm, addr, vdso_size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, -- cgit v1.2.3 From 08e1a13e7d14ba5d6a22bf4b8c6e11128d3bcdfe Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Jul 2008 13:44:16 +0100 Subject: x86: reduce forbid_dac's visibility It's not used anywhere outside its declaring file. Signed-off-by: Jan Beulich Signed-off-by: H. Peter Anvin --- arch/x86/kernel/pci-dma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8467ec2320f..702714bd151 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -9,8 +9,7 @@ #include #include -int forbid_dac __read_mostly; -EXPORT_SYMBOL(forbid_dac); +static int forbid_dac __read_mostly; const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -- cgit v1.2.3 From 08ad8afaa0f7343e9c64eec5dbbb178e390e03a2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Jul 2008 13:45:20 +0100 Subject: x86: reduce force_mwait visibility It's not used anywhere outside its single referencing file. Signed-off-by: Jan Beulich Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 2 -- arch/x86/kernel/process.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 81a07ca65d4..cae9cabc303 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -24,8 +24,6 @@ extern void vide(void); __asm__(".align 4\nvide: ret"); -int force_mwait __cpuinitdata; - static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { if (cpuid_eax(0x80000000) >= 0x80000007) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 158bd6a16f6..9f94bb1c811 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -199,6 +199,7 @@ static void poll_idle(void) * * idle=mwait overrides this decision and forces the usage of mwait. */ +static int __cpuinitdata force_mwait; #define MWAIT_INFO 0x05 #define MWAIT_ECX_EXTENDED_INFO 0x01 -- cgit v1.2.3 From 2ddf9b7b3e6660199269e34cfa27148440ddc3bf Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Jul 2008 13:32:23 +0100 Subject: i386/xen: add proper unwind annotations to xen_sysenter_target Signed-off-by: Jan Beulich Cc: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6bc07f0f120..ad5264c29e9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1024,6 +1024,7 @@ ENDPROC(kernel_thread_helper) ENTRY(xen_sysenter_target) RING0_INT_FRAME addl $5*4, %esp /* remove xen-provided frame */ + CFI_ADJUST_CFA_OFFSET -5*4 jmp sysenter_past_esp CFI_ENDPROC -- cgit v1.2.3 From ae79cdaacb5599781f8bb49f4bdd5723029669cf Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 18 Jul 2008 16:08:13 -0700 Subject: x86: Add a arch directory for x86 under debugfs Add a directory for x86 arch under debugfs. Can be used to accumulate all x86 specific debugfs files. Signed-off-by: Venkatesh Pallipadi Signed-off-by: H. Peter Anvin --- arch/x86/kernel/kdebugfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index c0320599171..f2d43bc7551 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -12,9 +12,13 @@ #include #include #include +#include #include +struct dentry *arch_debugfs_dir; +EXPORT_SYMBOL(arch_debugfs_dir); + #ifdef CONFIG_DEBUG_BOOT_PARAMS struct setup_data_node { u64 paddr; @@ -209,6 +213,10 @@ static int __init arch_kdebugfs_init(void) { int error = 0; + arch_debugfs_dir = debugfs_create_dir("x86", NULL); + if (!arch_debugfs_dir) + return -ENOMEM; + #ifdef CONFIG_DEBUG_BOOT_PARAMS error = boot_params_kdebugfs_init(); #endif -- cgit v1.2.3 From fec0962e0bed407927b9ff54bb0596a3ab7e4b61 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 18 Jul 2008 16:08:14 -0700 Subject: x86: Add a debugfs interface to dump PAT memtype Add a debugfs interface to list out all the PAT memtype reservations. Appears at debugfs x86/pat_memtype_list and output format is type @ - We do not hold the lock while printing the entire list. So, the list may not be a consistent copy in case where regions are getting added or deleted at the same time. Signed-off-by: Venkatesh Pallipadi Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d4585077977..0917a540a55 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -489,3 +491,89 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) free_memtype(addr, addr + size); } + +#if defined(CONFIG_DEBUG_FS) + +/* get Nth element of the linked list */ +static struct memtype *memtype_get_idx(loff_t pos) +{ + struct memtype *list_node, *print_entry; + int i = 1; + + print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); + if (!print_entry) + return NULL; + + spin_lock(&memtype_lock); + list_for_each_entry(list_node, &memtype_list, nd) { + if (pos == i) { + *print_entry = *list_node; + spin_unlock(&memtype_lock); + return print_entry; + } + ++i; + } + spin_unlock(&memtype_lock); + kfree(print_entry); + return NULL; +} + +static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos == 0) { + ++*pos; + seq_printf(seq, "PAT memtype list:\n"); + } + + return memtype_get_idx(*pos); +} + +static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return memtype_get_idx(*pos); +} + +static void memtype_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int memtype_seq_show(struct seq_file *seq, void *v) +{ + struct memtype *print_entry = (struct memtype *)v; + + seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), + print_entry->start, print_entry->end); + kfree(print_entry); + return 0; +} + +static struct seq_operations memtype_seq_ops = { + .start = memtype_seq_start, + .next = memtype_seq_next, + .stop = memtype_seq_stop, + .show = memtype_seq_show, +}; + +static int memtype_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &memtype_seq_ops); +} + +static const struct file_operations memtype_fops = { + .open = memtype_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init pat_memtype_list_init(void) +{ + debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, + NULL, &memtype_fops); + return 0; +} + +late_initcall(pat_memtype_list_init); + +#endif /* CONFIG_DEBUG_FS */ -- cgit v1.2.3 From e5849e71adcbb774ce40f09c1bcb48acca3b6da7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 18 Jul 2008 17:28:40 -0700 Subject: x86: remove arch_get_ram_range no user now Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/e820.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 28c29180b38..df1b32fa88d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1367,24 +1367,3 @@ void __init setup_memory_map(void) printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map(who); } - -#ifdef CONFIG_X86_64 -int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) -{ - int i; - - if (slot < 0 || slot >= e820.nr_map) - return -1; - for (i = slot; i < e820.nr_map; i++) { - if (e820.map[i].type != E820_RAM) - continue; - break; - } - if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) - return -1; - *addr = e820.map[i].addr; - *size = min_t(u64, e820.map[i].size + e820.map[i].addr, - max_pfn << PAGE_SHIFT) - *addr; - return i + 1; -} -#endif -- cgit v1.2.3 From e4f25060b87a627f5cda84b8134911d43c919458 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 19 Jul 2008 00:44:32 -0700 Subject: sparc: Remove Sparc's asm-offsets for sclow.S Remove Sparc's asm-offsets for sclow.S as the (E)UID/(E)GID size and offset definitions will cease to be correct if COW credentials are merged. Signed-off-by: David Howells Signed-off-by: David S. Miller --- arch/sparc/kernel/asm-offsets.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c index cd3f7694e9b..b5bb99ed892 100644 --- a/arch/sparc/kernel/asm-offsets.c +++ b/arch/sparc/kernel/asm-offsets.c @@ -18,18 +18,6 @@ int foo(void) { DEFINE(AOFF_task_thread, offsetof(struct task_struct, thread)); BLANK(); - /* XXX This is the stuff for sclow.S, kill it. */ - DEFINE(AOFF_task_pid, offsetof(struct task_struct, pid)); - DEFINE(AOFF_task_uid, offsetof(struct task_struct, uid)); - DEFINE(AOFF_task_gid, offsetof(struct task_struct, gid)); - DEFINE(AOFF_task_euid, offsetof(struct task_struct, euid)); - DEFINE(AOFF_task_egid, offsetof(struct task_struct, egid)); - /* DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); */ - DEFINE(ASIZ_task_uid, sizeof(current->uid)); - DEFINE(ASIZ_task_gid, sizeof(current->gid)); - DEFINE(ASIZ_task_euid, sizeof(current->euid)); - DEFINE(ASIZ_task_egid, sizeof(current->egid)); - BLANK(); DEFINE(AOFF_thread_fork_kpsr, offsetof(struct thread_struct, fork_kpsr)); BLANK(); -- cgit v1.2.3 From d092633bff3b19faffc480fe9810805e7792a029 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Jul 2008 00:26:59 +0200 Subject: Subject: devmem, x86: fix rename of CONFIG_NONPROMISC_DEVMEM From: Arjan van de Ven Date: Sat, 19 Jul 2008 15:47:17 -0700 CONFIG_NONPROMISC_DEVMEM was a rather confusing name - but renaming it to CONFIG_PROMISC_DEVMEM causes problems on architectures that do not support this feature; this patch renames it to CONFIG_STRICT_DEVMEM, so that architectures can opt-in into it. ( the polarity of the option is still the same as it was originally; it needs to be for now to not break architectures that don't have the infastructure yet to support this feature) Signed-off-by: Arjan van de Ven Cc: "V.Radhakrishnan" Signed-off-by: Ingo Molnar --- --- arch/x86/Kconfig.debug | 9 +++++---- arch/x86/configs/i386_defconfig | 2 +- arch/x86/configs/x86_64_defconfig | 2 +- arch/x86/mm/pat.c | 6 +++--- 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index f0cf5d99079..51c82147795 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -5,14 +5,15 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config PROMISC_DEVMEM - bool "Allow unlimited access to /dev/mem" - default y +config STRICT_DEVMEM + bool "Filter access to /dev/mem" help If this option is left on, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental access to this is obviously disastrous, but specific access can - be used by people debugging the kernel. + be used by people debugging the kernel. Note that with PAT support + enabled, even in this case there are restrictions on /dev/mem + use due to the cache aliasing requirements. If this option is switched on, the /dev/mem file only allows userspace access to PCI space and the BIOS code and data regions. diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 9bc34e2033e..4d73f53287b 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -2047,7 +2047,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y # CONFIG_SAMPLES is not set # CONFIG_KGDB is not set CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_NONPROMISC_DEVMEM is not set +# CONFIG_STRICT_DEVMEM is not set CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index ae5124e064d..a4045242962 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -2012,7 +2012,7 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y # CONFIG_SAMPLES is not set # CONFIG_KGDB is not set CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_NONPROMISC_DEVMEM is not set +# CONFIG_STRICT_DEVMEM is not set CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index c34dc483839..6bb597f4d70 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -373,8 +373,8 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, return vma_prot; } -#ifndef CONFIG_PROMISC_DEVMEM -/* This check is done in drivers/char/mem.c in case of !PROMISC_DEVMEM*/ +#ifdef CONFIG_STRICT_DEVMEM +/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; @@ -398,7 +398,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } return 1; } -#endif /* CONFIG_PROMISC_DEVMEM */ +#endif /* CONFIG_STRICT_DEVMEM */ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) -- cgit v1.2.3 From 5f1f2b3d9dbaee82cd532f28da459adcbf611499 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 18 Jul 2008 16:16:23 -0700 Subject: x86: improve debug printout: add target bootmem range in early_res_to_bootmem() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index df1b32fa88d..6c60aeaac15 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end) for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) count++; - printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); + printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", + count, start, end); for (i = 0; i < count; i++) { struct early_res *r = &early_res[i]; printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, -- cgit v1.2.3 From 3c9cb6de1e5ad37d1558fdb0d9d2bed5a7bac0d9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 19 Jul 2008 02:07:25 -0700 Subject: x86: introduce x86_quirks introduce x86_quirks array of boot-time quirk methods. No change in functionality intended. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 9 ++------- arch/x86/kernel/mpparse.c | 17 +++++------------ arch/x86/kernel/setup.c | 4 ++++ arch/x86/kernel/visws_quirks.c | 42 ++++++++++++++++++++---------------------- arch/x86/mach-default/setup.c | 24 ++++++++---------------- 5 files changed, 39 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 6c60aeaac15..9af89078f7b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1299,11 +1299,6 @@ void __init e820_reserve_resources(void) } } -/* - * Non-standard memory setup can be specified via this quirk: - */ -char * (*arch_memory_setup_quirk)(void); - char *__init default_machine_specific_memory_setup(void) { char *who = "BIOS-e820"; @@ -1344,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void) char *__init __attribute__((weak)) machine_specific_memory_setup(void) { - if (arch_memory_setup_quirk) { - char *who = arch_memory_setup_quirk(); + if (x86_quirks->arch_memory_setup) { + char *who = x86_quirks->arch_memory_setup(); if (who) return who; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49380c..3cbd2df3abe 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_32 @@ -725,12 +726,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) static struct intel_mp_floating *mpf_found; -/* - * Machine specific quirk for finding the SMP config before other setup - * activities destroy the table: - */ -int (*mach_get_smp_config_quirk)(unsigned int early); - /* * Scan the memory blocks for an SMP configuration block. */ @@ -738,8 +733,8 @@ static void __init __get_smp_config(unsigned int early) { struct intel_mp_floating *mpf = mpf_found; - if (mach_get_smp_config_quirk) { - if (mach_get_smp_config_quirk(early)) + if (x86_quirks->mach_get_smp_config) { + if (x86_quirks->mach_get_smp_config(early)) return; } if (acpi_lapic && early) @@ -899,14 +894,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 0; } -int (*mach_find_smp_config_quirk)(unsigned int reserve); - static void __init __find_smp_config(unsigned int reserve) { unsigned int address; - if (mach_find_smp_config_quirk) { - if (mach_find_smp_config_quirk(reserve)) + if (x86_quirks->mach_find_smp_config) { + if (x86_quirks->mach_find_smp_config(reserve)) return; } /* diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4a2b8acc1d9..bbcc13d0b56 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -574,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg) early_param("elfcorehdr", setup_elfcorehdr); #endif +static struct x86_quirks default_x86_quirks __initdata; + +struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index e94bdb6add1..41e01b145c4 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -73,7 +73,7 @@ int is_visws_box(void) return visws_board_type >= 0; } -static int __init visws_time_init_quirk(void) +static int __init visws_time_init(void) { printk(KERN_INFO "Starting Cobalt Timer system clock\n"); @@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void) return 0; } -static int __init visws_pre_intr_init_quirk(void) +static int __init visws_pre_intr_init(void) { init_VISWS_APIC_irqs(); @@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size); long long mem_size __initdata = 0; -static char * __init visws_memory_setup_quirk(void) +static char * __init visws_memory_setup(void) { long long gfx_mem_size = 8 * MB; @@ -176,7 +176,7 @@ static void visws_machine_power_off(void) outl(PIIX_SPECIAL_STOP, 0xCFC); } -static int __init visws_get_smp_config_quirk(unsigned int early) +static int __init visws_get_smp_config(unsigned int early) { /* * Prevent MP-table parsing by the generic code: @@ -192,7 +192,7 @@ extern unsigned int __cpuinitdata maxcpus; * No problem for Linux. */ -static void __init MP_processor_info (struct mpc_config_processor *m) +static void __init MP_processor_info(struct mpc_config_processor *m) { int ver, logical_apicid; physid_mask_t apic_cpus; @@ -232,7 +232,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) apic_version[m->mpc_apicid] = ver; } -int __init visws_find_smp_config_quirk(unsigned int reserve) +static int __init visws_find_smp_config(unsigned int reserve) { struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); @@ -258,7 +258,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve) return 1; } -extern int visws_trap_init_quirk(void); +static int visws_trap_init(void); + +static struct x86_quirks visws_x86_quirks __initdata = { + .arch_time_init = visws_time_init, + .arch_pre_intr_init = visws_pre_intr_init, + .arch_memory_setup = visws_memory_setup, + .arch_intr_init = NULL, + .arch_trap_init = visws_trap_init, + .mach_get_smp_config = visws_get_smp_config, + .mach_find_smp_config = visws_find_smp_config, +}; void __init visws_early_detect(void) { @@ -272,16 +282,10 @@ void __init visws_early_detect(void) /* * Install special quirks for timer, interrupt and memory setup: - */ - arch_time_init_quirk = visws_time_init_quirk; - arch_pre_intr_init_quirk = visws_pre_intr_init_quirk; - arch_memory_setup_quirk = visws_memory_setup_quirk; - - /* * Fall back to generic behavior for traps: + * Override generic MP-table parsing: */ - arch_intr_init_quirk = NULL; - arch_trap_init_quirk = visws_trap_init_quirk; + x86_quirks = &visws_x86_quirks; /* * Install reboot quirks: @@ -294,12 +298,6 @@ void __init visws_early_detect(void) */ no_broadcast = 0; - /* - * Override generic MP-table parsing: - */ - mach_get_smp_config_quirk = visws_get_smp_config_quirk; - mach_find_smp_config_quirk = visws_find_smp_config_quirk; - #ifdef CONFIG_X86_IO_APIC /* * Turn off IO-APIC detection and initialization: @@ -426,7 +424,7 @@ static __init void cobalt_init(void) co_apic_read(CO_APIC_ID)); } -int __init visws_trap_init_quirk(void) +static int __init visws_trap_init(void) { lithium_init(); cobalt_init(); diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 48278fa7d3d..631dbed9fb9 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -10,14 +10,6 @@ #include #include -/* - * Any quirks to be performed to initialize timers/irqs/etc? - */ -int (*arch_time_init_quirk)(void); -int (*arch_pre_intr_init_quirk)(void); -int (*arch_intr_init_quirk)(void); -int (*arch_trap_init_quirk)(void); - #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) #else @@ -37,8 +29,8 @@ int no_broadcast=DEFAULT_SEND_IPI; **/ void __init pre_intr_init_hook(void) { - if (arch_pre_intr_init_quirk) { - if (arch_pre_intr_init_quirk()) + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) return; } init_ISA_irqs(); @@ -64,8 +56,8 @@ static struct irqaction irq2 = { **/ void __init intr_init_hook(void) { - if (arch_intr_init_quirk) { - if (arch_intr_init_quirk()) + if (x86_quirks->arch_intr_init) { + if (x86_quirks->arch_intr_init()) return; } #ifdef CONFIG_X86_LOCAL_APIC @@ -97,8 +89,8 @@ void __init pre_setup_arch_hook(void) **/ void __init trap_init_hook(void) { - if (arch_trap_init_quirk) { - if (arch_trap_init_quirk()) + if (x86_quirks->arch_trap_init) { + if (x86_quirks->arch_trap_init()) return; } } @@ -119,13 +111,13 @@ static struct irqaction irq0 = { **/ void __init time_init_hook(void) { - if (arch_time_init_quirk) { + if (x86_quirks->arch_time_init) { /* * A nonzero return code does not mean failure, it means * that the architecture quirk does not want any * generic (timer) setup to be performed after this: */ - if (arch_time_init_quirk()) + if (x86_quirks->arch_time_init()) return; } -- cgit v1.2.3 From 64898a8bad8c94ad7a4bd5cc86b66edfbb081f4a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 19 Jul 2008 18:01:16 -0700 Subject: x86: extend and use x86_quirks to clean up NUMAQ code add these new x86_quirks methods: int *mpc_record; int (*mpc_apic_id)(struct mpc_config_processor *m); void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name); void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, unsigned short oemsize); ... and move NUMAQ related mps table handling to numaq_32.c. also move the call to smp_read_mpc_oem() to smp_read_mpc() directly. Should not change functionality, albeit it would be nice to get it tested on real NUMAQ as well ... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 191 +++++---------------------------------------- arch/x86/kernel/numaq_32.c | 190 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 200 insertions(+), 181 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3cbd2df3abe..6ae005ccaed 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -49,76 +49,6 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -#ifdef CONFIG_X86_NUMAQ -int found_numaq; -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ -struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - - -static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] - __cpuinitdata; - -static inline int generate_logical_apicid(int quad, int phys_apicid) -{ - return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); -} - - -static inline int mpc_apic_id(struct mpc_config_processor *m, - struct mpc_config_translation *translation_record) -{ - int quad = translation_record->trans_quad; - int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); - - printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver, quad, logical_apicid); - return logical_apicid; -} - -int mp_bus_id_to_node[MAX_MP_BUSSES]; - -int mp_bus_id_to_local[MAX_MP_BUSSES]; - -static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - mp_bus_id_to_node[m->mpc_busid] = quad; - mp_bus_id_to_local[m->mpc_busid] = local; - printk(KERN_INFO "Bus #%d is %s (node %d)\n", - m->mpc_busid, name, quad); -} - -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; -static void mpc_oem_pci_bus(struct mpc_config_bus *m, - struct mpc_config_translation *translation) -{ - int quad = translation->trans_quad; - int local = translation->trans_local; - - quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; -} - -#endif - static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int apicid; @@ -128,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) disabled_cpus++; return; } -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) - apicid = mpc_apic_id(m, translation_table[mpc_record]); + + if (x86_quirks->mpc_apic_id) + apicid = x86_quirks->mpc_apic_id(m); else apicid = m->mpc_apicid; -#else - apicid = m->mpc_apicid; -#endif + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { bootup_cpu = " (Bootup-CPU)"; boot_cpu_physical_apicid = m->mpc_apicid; @@ -152,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) memcpy(str, m->mpc_bustype, 6); str[6] = 0; -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) - mpc_oem_bus_info(m, str, translation_table[mpc_record]); -#else - printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); -#endif + if (x86_quirks->mpc_oem_bus_info) + x86_quirks->mpc_oem_bus_info(m, str); + else + printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); #if MAX_MP_BUSSES < 256 if (m->mpc_busid >= MAX_MP_BUSSES) { @@ -174,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { -#ifdef CONFIG_X86_NUMAQ - if (found_numaq) - mpc_oem_pci_bus(m, translation_table[mpc_record]); -#endif + if (x86_quirks->mpc_oem_pci_bus) + x86_quirks->mpc_oem_pci_bus(m); + clear_bit(m->mpc_busid, mp_bus_not_pci); #if defined(CONFIG_EISA) || defined (CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; @@ -317,83 +242,6 @@ static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); } -#ifdef CONFIG_X86_NUMAQ -static void __init MP_translation_info(struct mpc_config_translation *m) -{ - printk(KERN_INFO - "Translation: record %d, type %d, quad %d, global %d, local %d\n", - mpc_record, m->trans_type, m->trans_quad, m->trans_global, - m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) - node_set_online(m->trans_quad); -} - -/* - * Read/parse the MPC oem tables - */ - -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, - unsigned short oemsize) -{ - int count = sizeof(*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable) + count; - - mpc_record = 0; - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", - oemtable); - if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { - printk(KERN_WARNING - "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->oem_signature[0], oemtable->oem_signature[1], - oemtable->oem_signature[2], oemtable->oem_signature[3]); - return; - } - if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); - return; - } - while (count < oemtable->oem_length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_config_translation *m = - (struct mpc_config_translation *)oemptr; - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - { - printk(KERN_WARNING - "Unrecognised OEM table entry type! - %d\n", - (int)*oemptr); - return; - } - } - } -} - -void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! Not a NUMA-Q system!\n"); - else - found_numaq = 1; - - if (mpc->mpc_oemptr) - smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, - mpc->mpc_oemsize); -} -#endif /* CONFIG_X86_NUMAQ */ - /* * Read/parse the MPC */ @@ -458,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) } else mps_oem_check(mpc, oem, str); #endif - /* save the local APIC address, it might be non-default */ if (!acpi_lapic) mp_lapic_addr = mpc->mpc_lapic; @@ -466,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) if (early) return 1; + if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { + struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; + x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); + } + /* * Now process the configuration blocks. */ -#ifdef CONFIG_X86_NUMAQ - mpc_record = 0; -#endif + if (x86_quirks->mpc_record) + *x86_quirks->mpc_record = 0; + while (count < mpc->mpc_length) { switch (*mpt) { case MP_PROCESSOR: @@ -537,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) count = mpc->mpc_length; break; } -#ifdef CONFIG_X86_NUMAQ - ++mpc_record; -#endif + if (x86_quirks->mpc_record) + (*x86_quirks->mpc_record)++; } #ifdef CONFIG_X86_GENERICARCH diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index a23e8233b9a..7f4e00d1d89 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -33,6 +33,7 @@ #include #include #include +#include #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -71,6 +72,181 @@ static void __init smp_dump_qct(void) } } + +void __init numaq_tsc_disable(void) +{ + if (!found_numaq) + return; + + if (num_online_nodes() > 1) { + printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); + setup_clear_cpu_cap(X86_FEATURE_TSC); + } +} + +int found_numaq; +/* + * Have to match translation table entries to main table entries by counter + * hence the mpc_record variable .... can't see a less disgusting way of + * doing this .... + */ +struct mpc_config_translation { + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + +/* x86_quirks member */ +static int mpc_record; +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] + __cpuinitdata; + +static inline int generate_logical_apicid(int quad, int phys_apicid) +{ + return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); +} + +/* x86_quirks member */ +static int mpc_apic_id(struct mpc_config_processor *m) +{ + int quad = translation_table[mpc_record]->trans_quad; + int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); + + printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver, quad, logical_apicid); + return logical_apicid; +} + +int mp_bus_id_to_node[MAX_MP_BUSSES]; + +int mp_bus_id_to_local[MAX_MP_BUSSES]; + +/* x86_quirks member */ +static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name) +{ + int quad = translation_table[mpc_record]->trans_quad; + int local = translation_table[mpc_record]->trans_local; + + mp_bus_id_to_node[m->mpc_busid] = quad; + mp_bus_id_to_local[m->mpc_busid] = local; + printk(KERN_INFO "Bus #%d is %s (node %d)\n", + m->mpc_busid, name, quad); +} + +int quad_local_to_mp_bus_id [NR_CPUS/4][4]; + +/* x86_quirks member */ +static void mpc_oem_pci_bus(struct mpc_config_bus *m) +{ + int quad = translation_table[mpc_record]->trans_quad; + int local = translation_table[mpc_record]->trans_local; + + quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; +} + +static void __init MP_translation_info(struct mpc_config_translation *m) +{ + printk(KERN_INFO + "Translation: record %d, type %d, quad %d, global %d, local %d\n", + mpc_record, m->trans_type, m->trans_quad, m->trans_global, + m->trans_local); + + if (mpc_record >= MAX_MPC_ENTRY) + printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); + else + translation_table[mpc_record] = m; /* stash this for later */ + if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) + node_set_online(m->trans_quad); +} + +static int __init mpf_checksum(unsigned char *mp, int len) +{ + int sum = 0; + + while (len--) + sum += *mp++; + + return sum & 0xFF; +} + +/* + * Read/parse the MPC oem tables + */ + +static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, + unsigned short oemsize) +{ + int count = sizeof(*oemtable); /* the header size */ + unsigned char *oemptr = ((unsigned char *)oemtable) + count; + + mpc_record = 0; + printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", + oemtable); + if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { + printk(KERN_WARNING + "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", + oemtable->oem_signature[0], oemtable->oem_signature[1], + oemtable->oem_signature[2], oemtable->oem_signature[3]); + return; + } + if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { + printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); + return; + } + while (count < oemtable->oem_length) { + switch (*oemptr) { + case MP_TRANSLATION: + { + struct mpc_config_translation *m = + (struct mpc_config_translation *)oemptr; + MP_translation_info(m); + oemptr += sizeof(*m); + count += sizeof(*m); + ++mpc_record; + break; + } + default: + { + printk(KERN_WARNING + "Unrecognised OEM table entry type! - %d\n", + (int)*oemptr); + return; + } + } + } +} + +static struct x86_quirks numaq_x86_quirks __initdata = { + .arch_time_init = NULL, + .arch_pre_intr_init = NULL, + .arch_memory_setup = NULL, + .arch_intr_init = NULL, + .arch_trap_init = NULL, + .mach_get_smp_config = NULL, + .mach_find_smp_config = NULL, + .mpc_record = &mpc_record, + .mpc_apic_id = mpc_apic_id, + .mpc_oem_bus_info = mpc_oem_bus_info, + .mpc_oem_pci_bus = mpc_oem_pci_bus, + .smp_read_mpc_oem = smp_read_mpc_oem, +}; + +void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + if (strncmp(oem, "IBM NUMA", 8)) + printk("Warning! Not a NUMA-Q system!\n"); + else + found_numaq = 1; +} + static __init void early_check_numaq(void) { /* @@ -82,6 +258,9 @@ static __init void early_check_numaq(void) */ if (smp_found_config) early_get_smp_config(); + + if (found_numaq) + x86_quirks = &numaq_x86_quirks; } int __init get_memcfg_numaq(void) @@ -92,14 +271,3 @@ int __init get_memcfg_numaq(void) smp_dump_qct(); return 1; } - -void __init numaq_tsc_disable(void) -{ - if (!found_numaq) - return; - - if (num_online_nodes() > 1) { - printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); - setup_clear_cpu_cap(X86_FEATURE_TSC); - } -} -- cgit v1.2.3 From 63b5d7af2556a7de6bf72c5dd0b85a32fb4c3767 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 19 Jul 2008 18:02:26 -0700 Subject: x86: add ->pre_time_init to x86_quirks so NUMAQ can use that to call numaq_pre_time_init() This allows us to remove a NUMAQ special from arch/x86/kernel/setup.c. (and paves the way to remove the NUMAQ subarch) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 7 +++++++ arch/x86/kernel/setup.c | 8 -------- arch/x86/kernel/time_32.c | 1 + arch/x86/mach-default/setup.c | 10 ++++++++++ 4 files changed, 18 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 7f4e00d1d89..b8c45610b20 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -84,6 +84,12 @@ void __init numaq_tsc_disable(void) } } +static int __init numaq_pre_time_init(void) +{ + numaq_tsc_disable(); + return 0; +} + int found_numaq; /* * Have to match translation table entries to main table entries by counter @@ -224,6 +230,7 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, } static struct x86_quirks numaq_x86_quirks __initdata = { + .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, .arch_pre_intr_init = NULL, .arch_memory_setup = NULL, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbcc13d0b56..4064616cfa8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -853,14 +853,6 @@ void __init setup_arch(char **cmdline_p) init_cpu_to_node(); #endif -#ifdef CONFIG_X86_NUMAQ - /* - * need to check online nodes num, call it - * here before time_init/tsc_init - */ - numaq_tsc_disable(); -#endif - init_apic_mappings(); ioapic_init_mappings(); diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 059ca6ee59b..ffe3c664afc 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -129,6 +129,7 @@ void __init hpet_time_init(void) */ void __init time_init(void) { + pre_time_init_hook(); tsc_init(); late_time_init = choose_time_init(); } diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 631dbed9fb9..3d317836be9 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -102,6 +102,16 @@ static struct irqaction irq0 = { .name = "timer" }; +/** + * pre_time_init_hook - do any specific initialisations before. + * + **/ +void __init pre_time_init_hook(void) +{ + if (x86_quirks->arch_pre_time_init) + x86_quirks->arch_pre_time_init(); +} + /** * time_init_hook - do any specific initialisations for the system timer. * -- cgit v1.2.3 From e3a61b0a8c0e342e700a61cd554b01050f333a36 Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Sat, 19 Jul 2008 23:32:54 +0100 Subject: x86: add unknown_nmi_panic kernel parameter It's not possible to enable the unknown_nmi_panic sysctl option until init is run. It's useful to be able to panic the kernel during boot too, this adds a parameter to enable this option. Signed-off-by: Simon Arlott Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ec024b3baad..e0b44b7b717 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -448,6 +448,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) #ifdef CONFIG_SYSCTL +static int __init setup_unknown_nmi_panic(char *str) +{ + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); + static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) { unsigned char reason = get_nmi_reason(); -- cgit v1.2.3 From 8b2cf73cc11cf29a21c51c453a3205f23d888915 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sun, 27 Apr 2008 12:14:13 -0700 Subject: KVM: add statics were possible, function definition in lapic.h Noticed by sparse: arch/x86/kvm/vmx.c:1583:6: warning: symbol 'vmx_disable_intercept_for_msr' was not declared. Should it be static? arch/x86/kvm/x86.c:3406:5: warning: symbol 'kvm_task_switch_16' was not declared. Should it be static? arch/x86/kvm/x86.c:3429:5: warning: symbol 'kvm_task_switch_32' was not declared. Should it be static? arch/x86/kvm/mmu.c:1968:6: warning: symbol 'kvm_mmu_remove_one_alloc_mmu_page' was not declared. Should it be static? arch/x86/kvm/mmu.c:2014:6: warning: symbol 'mmu_destroy_caches' was not declared. Should it be static? arch/x86/kvm/lapic.c:862:5: warning: symbol 'kvm_lapic_get_base' was not declared. Should it be static? arch/x86/kvm/i8254.c:94:5: warning: symbol 'pit_get_gate' was not declared. Should it be static? arch/x86/kvm/i8254.c:196:5: warning: symbol '__pit_timer_fn' was not declared. Should it be static? arch/x86/kvm/i8254.c:561:6: warning: symbol '__inject_pit_timer_intr' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 6 +++--- arch/x86/kvm/lapic.h | 1 + arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 3829aa7b663..735ec9a0b36 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -91,7 +91,7 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val) c->gate = val; } -int pit_get_gate(struct kvm *kvm, int channel) +static int pit_get_gate(struct kvm *kvm, int channel) { WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); @@ -193,7 +193,7 @@ static void pit_latch_status(struct kvm *kvm, int channel) } } -int __pit_timer_fn(struct kvm_kpit_state *ps) +static int __pit_timer_fn(struct kvm_kpit_state *ps) { struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; struct kvm_kpit_timer *pt = &ps->pit_timer; @@ -575,7 +575,7 @@ void kvm_free_pit(struct kvm *kvm) } } -void __inject_pit_timer_intr(struct kvm *kvm) +static void __inject_pit_timer_intr(struct kvm *kvm) { mutex_lock(&kvm->lock); kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 676c396c9ce..81858881287 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -31,6 +31,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); +u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7e7c3969f7a..8e449dbcc59 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1948,7 +1948,7 @@ void kvm_mmu_zap_all(struct kvm *kvm) kvm_flush_remote_tlbs(kvm); } -void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) +static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) { struct kvm_mmu_page *page; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10ce6ee4c49..39739305980 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1821,7 +1821,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } -void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) +static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) { void *va; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0faa2546b1c..45dc2b6a9c8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3449,7 +3449,7 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, return 0; } -int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, +static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, struct desc_struct *cseg_desc, struct desc_struct *nseg_desc) { @@ -3472,7 +3472,7 @@ out: return ret; } -int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, +static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, struct desc_struct *cseg_desc, struct desc_struct *nseg_desc) { -- cgit v1.2.3 From c7bf23babc959b186335d2640959a1b8633588de Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 30 Apr 2008 17:55:59 +0200 Subject: KVM: VMX: move APIC_ACCESS trace entry to generic code This patch moves the trace entry for APIC accesses from the VMX code to the generic lapic code. This way APIC accesses from SVM will also be traced. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 4 ++++ arch/x86/kvm/vmx.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ebc03f5ae16..f9201fbc61d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -572,6 +572,8 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) { u32 val = 0; + KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler); + if (offset >= LAPIC_MMIO_LENGTH) return 0; @@ -695,6 +697,8 @@ static void apic_mmio_write(struct kvm_io_device *this, offset &= 0xff0; + KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler); + switch (offset) { case APIC_ID: /* Local APIC ID */ apic_set_reg(apic, APIC_ID, val); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 39739305980..8c951d3eab3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2554,8 +2554,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_read64(EXIT_QUALIFICATION); offset = exit_qualification & 0xffful; - KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler); - er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); if (er != EMULATE_DONE) { -- cgit v1.2.3 From c47f098d69ed2bd7343e54095ff4aa2533253bee Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 30 Apr 2008 17:56:00 +0200 Subject: KVM: SVM: implement dedicated NMI exit handler With an exit handler for NMI intercepts its possible to account them using kvmtrace. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6b0d5fa5bab..8a2118b09fd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1081,6 +1081,11 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); } +static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + return 1; +} + static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { return 1; @@ -1365,7 +1370,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, [SVM_EXIT_INTR] = nop_on_interception, - [SVM_EXIT_NMI] = nop_on_interception, + [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_INIT] = nop_on_interception, [SVM_EXIT_VINTR] = interrupt_window_interception, -- cgit v1.2.3 From a069805579a390f0fa91694f6963bcc4b2cecc6b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 30 Apr 2008 17:56:01 +0200 Subject: KVM: SVM: implement dedicated INTR exit handler With an exit handler for INTR intercepts its possible to account them using kvmtrace. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8a2118b09fd..0eac1a5060a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1086,6 +1086,12 @@ static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } +static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + ++svm->vcpu.stat.irq_exits; + return 1; +} + static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { return 1; @@ -1369,7 +1375,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, - [SVM_EXIT_INTR] = nop_on_interception, + [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_INIT] = nop_on_interception, -- cgit v1.2.3 From 54e445ca8411ec892f986d9f8c11b8c1806ecde4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 30 Apr 2008 17:56:02 +0200 Subject: KVM: add missing kvmtrace bits This patch adds some kvmtrace bits to the generic x86 code where it is instrumented from SVM. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 45dc2b6a9c8..59084a3981c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2020,6 +2020,7 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) int emulate_clts(struct kvm_vcpu *vcpu) { + KVMTRACE_0D(CLTS, vcpu, handler); kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); return X86EMUL_CONTINUE; } @@ -2600,27 +2601,41 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) { + unsigned long value; + kvm_x86_ops->decache_cr4_guest_bits(vcpu); switch (cr) { case 0: - return vcpu->arch.cr0; + value = vcpu->arch.cr0; + break; case 2: - return vcpu->arch.cr2; + value = vcpu->arch.cr2; + break; case 3: - return vcpu->arch.cr3; + value = vcpu->arch.cr3; + break; case 4: - return vcpu->arch.cr4; + value = vcpu->arch.cr4; + break; case 8: - return kvm_get_cr8(vcpu); + value = kvm_get_cr8(vcpu); + break; default: vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); return 0; } + KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value, + (u32)((u64)value >> 32), handler); + + return value; } void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, unsigned long *rflags) { + KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val, + (u32)((u64)val >> 32), handler); + switch (cr) { case 0: kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); -- cgit v1.2.3 From af9ca2d703f4cefbf6441bfe127c4191092ad394 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 30 Apr 2008 17:56:03 +0200 Subject: KVM: SVM: add missing kvmtrace markers This patch adds the missing kvmtrace markers to the svm module of kvm. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0eac1a5060a..8953292acfd 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -949,7 +949,9 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) { - return to_svm(vcpu)->db_regs[dr]; + unsigned long val = to_svm(vcpu)->db_regs[dr]; + KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); + return val; } static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, @@ -1004,6 +1006,12 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) fault_address = svm->vmcb->control.exit_info_2; error_code = svm->vmcb->control.exit_info_1; + + if (!npt_enabled) + KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code, + (u32)fault_address, (u32)(fault_address >> 32), + handler); + return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } @@ -1083,12 +1091,14 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { + KVMTRACE_0D(NMI, &svm->vcpu, handler); return 1; } static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { ++svm->vcpu.stat.irq_exits; + KVMTRACE_0D(INTR, &svm->vcpu, handler); return 1; } @@ -1230,6 +1240,9 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) if (svm_get_msr(&svm->vcpu, ecx, &data)) kvm_inject_gp(&svm->vcpu, 0); else { + KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data, + (u32)(data >> 32), handler); + svm->vmcb->save.rax = data & 0xffffffff; svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; svm->next_rip = svm->vmcb->save.rip + 2; @@ -1315,6 +1328,10 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; u64 data = (svm->vmcb->save.rax & -1u) | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); + + KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32), + handler); + svm->next_rip = svm->vmcb->save.rip + 2; if (svm_set_msr(&svm->vcpu, ecx, data)) kvm_inject_gp(&svm->vcpu, 0); @@ -1334,6 +1351,8 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int interrupt_window_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { + KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler); + svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; /* @@ -1408,6 +1427,9 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u32 exit_code = svm->vmcb->control.exit_code; + KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, + (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); + if (npt_enabled) { int mmu_reload = 0; if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { @@ -1481,6 +1503,8 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) { struct vmcb_control_area *control; + KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler); + control = &svm->vmcb->control; control->int_vector = irq; control->int_ctl &= ~V_INTR_PRIO_MASK; -- cgit v1.2.3 From d2ebb4103ff349af6dac14955bf93e57487a6694 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 30 Apr 2008 17:56:04 +0200 Subject: KVM: SVM: add tracing support for TDP page faults To distinguish between real page faults and nested page faults they should be traced as different events. This is implemented by this patch. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8953292acfd..218949cce1a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1011,6 +1011,10 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code, (u32)fault_address, (u32)(fault_address >> 32), handler); + else + KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code, + (u32)fault_address, (u32)(fault_address >> 32), + handler); return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } -- cgit v1.2.3 From f697554515b06e8d7264f316b25e6da943407142 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Fri, 2 May 2008 17:02:23 +0200 Subject: KVM: PIT: support mode 3 The in-kernel PIT emulation ignores pending timers if operating under mode 3, which for example Hurd uses. This mode should output a square wave, high for (N+1)/2 counts and low for (N-1)/2 counts. As we only care about the resulting interrupts, the period is N, and mode 3 is the same as mode 2 with regard to interrupts. Signed-off-by: Aurelien Jarno Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 735ec9a0b36..60074dc66bd 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -308,6 +308,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) create_pit_timer(&ps->pit_timer, val, 0); break; case 2: + case 3: create_pit_timer(&ps->pit_timer, val, 1); break; default: -- cgit v1.2.3 From 14ae51b6c068ef7ab52dc2d53fe226e6189f2ab2 Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Mon, 5 May 2008 13:05:16 -0400 Subject: KVM: SVM: Fake MSR_K7 performance counters Attached is a patch that fixes a guest crash when booting older Linux kernels. The problem stems from the fact that we are currently emulating MSR_K7_EVNTSEL[0-3], but not emulating MSR_K7_PERFCTR[0-3]. Because of this, setup_k7_watchdog() in the Linux kernel receives a GPF when it attempts to write into MSR_K7_PERFCTR, which causes an OOPs. The patch fixes it by just "fake" emulating the appropriate MSRs, throwing away the data in the process. This causes the NMI watchdog to not actually work, but it's not such a big deal in a virtualized environment. When we get a write to one of these counters, we printk_ratelimit() a warning. I decided to print it out for all writes, even if the data is 0; it doesn't seem to make sense to me to special case when data == 0. Tested by myself on a RHEL-4 guest, and Joerg Roedel on a Windows XP 64-bit guest. Signed-off-by: Chris Lalancette Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 218949cce1a..992ab711587 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1312,16 +1312,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_K7_EVNTSEL1: case MSR_K7_EVNTSEL2: case MSR_K7_EVNTSEL3: + case MSR_K7_PERFCTR0: + case MSR_K7_PERFCTR1: + case MSR_K7_PERFCTR2: + case MSR_K7_PERFCTR3: /* - * only support writing 0 to the performance counters for now - * to make Windows happy. Should be replaced by a real - * performance counter emulation later. + * Just discard all writes to the performance counters; this + * should keep both older linux and windows 64-bit guests + * happy */ - if (data != 0) - goto unhandled; + pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data); + break; default: - unhandled: return kvm_set_msr_common(vcpu, ecx, data); } return 0; -- cgit v1.2.3 From 7682f2d0dd3ff5bd2756eac018a5b4e7e30ef16c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 12 May 2008 19:25:43 +0300 Subject: KVM: VMX: Trivial vmcs_write64() code simplification Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8c951d3eab3..fff8e23433d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -431,10 +431,8 @@ static void vmcs_write32(unsigned long field, u32 value) static void vmcs_write64(unsigned long field, u64 value) { -#ifdef CONFIG_X86_64 - vmcs_writel(field, value); -#else vmcs_writel(field, value); +#ifndef CONFIG_X86_64 asm volatile (""); vmcs_writel(field+1, value >> 32); #endif -- cgit v1.2.3 From 1b7fcd3263e5f12dba43d27b64e1578bec070c28 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 15 May 2008 13:51:35 +0300 Subject: KVM: MMU: Fix false flooding when a pte points to page table The KVM MMU tries to detect when a speculative pte update is not actually used by demand fault, by checking the accessed bit of the shadow pte. If the shadow pte has not been accessed, we deem that page table flooded and remove the shadow page table, allowing further pte updates to proceed without emulation. However, if the pte itself points at a page table and only used for write operations, the accessed bit will never be set since all access will happen through the emulator. This is exactly what happens with kscand on old (2.4.x) HIGHMEM kernels. The kernel points a kmap_atomic() pte at a page table, and then proceeds with read-modify-write operations to look at the dirty and accessed bits. We get a false flood trigger on the kmap ptes, which results in the mmu spending all its time setting up and tearing down shadows. Fix by setting the shadow accessed bit on emulated accesses. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 17 ++++++++++++++++- arch/x86/kvm/mmu.h | 3 ++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8e449dbcc59..53f1ed852ca 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1122,8 +1122,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, else kvm_release_pfn_clean(pfn); } - if (!ptwrite || !*ptwrite) + if (speculative) { vcpu->arch.last_pte_updated = shadow_pte; + vcpu->arch.last_pte_gfn = gfn; + } } static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) @@ -1671,6 +1673,18 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, vcpu->arch.update_pte.pfn = pfn; } +static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + u64 *spte = vcpu->arch.last_pte_updated; + + if (spte + && vcpu->arch.last_pte_gfn == gfn + && shadow_accessed_mask + && !(*spte & shadow_accessed_mask) + && is_shadow_present_pte(*spte)) + set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); +} + void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { @@ -1694,6 +1708,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); spin_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_access_page(vcpu, gfn); kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, "pre pte write"); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 1730757bbc7..258e5d56298 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -15,7 +15,8 @@ #define PT_USER_MASK (1ULL << 2) #define PT_PWT_MASK (1ULL << 3) #define PT_PCD_MASK (1ULL << 4) -#define PT_ACCESSED_MASK (1ULL << 5) +#define PT_ACCESSED_SHIFT 5 +#define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) #define PT_DIRTY_MASK (1ULL << 6) #define PT_PAGE_SIZE_MASK (1ULL << 7) #define PT_PAT_MASK (1ULL << 7) -- cgit v1.2.3 From 4ecac3fd6dc2629ad76a658a486f081c44aef10e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 13 May 2008 13:23:38 +0300 Subject: KVM: Handle virtualization instruction #UD faults during reboot KVM turns off hardware virtualization extensions during reboot, in order to disassociate the memory used by the virtualization extensions from the processor, and in order to have the system in a consistent state. Unfortunately virtual machines may still be running while this goes on, and once virtualization extensions are turned off, any virtulization instruction will #UD on execution. Fix by adding an exception handler to virtualization instructions; if we get an exception during reboot, we simply spin waiting for the reset to complete. If it's a true exception, BUG() so we can have our stack trace. Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 20 +++++++++++--------- arch/x86/kvm/vmx.c | 25 ++++++++++++++----------- 2 files changed, 25 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 992ab711587..9390a31c06f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -27,6 +27,8 @@ #include +#define __ex(x) __kvm_handle_fault_on_reboot(x) + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -129,17 +131,17 @@ static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) static inline void clgi(void) { - asm volatile (SVM_CLGI); + asm volatile (__ex(SVM_CLGI)); } static inline void stgi(void) { - asm volatile (SVM_STGI); + asm volatile (__ex(SVM_STGI)); } static inline void invlpga(unsigned long addr, u32 asid) { - asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid)); + asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); } static inline unsigned long kvm_read_cr2(void) @@ -1758,17 +1760,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* Enter guest mode */ "push %%rax \n\t" "mov %c[vmcb](%[svm]), %%rax \n\t" - SVM_VMLOAD "\n\t" - SVM_VMRUN "\n\t" - SVM_VMSAVE "\n\t" + __ex(SVM_VMLOAD) "\n\t" + __ex(SVM_VMRUN) "\n\t" + __ex(SVM_VMSAVE) "\n\t" "pop %%rax \n\t" #else /* Enter guest mode */ "push %%eax \n\t" "mov %c[vmcb](%[svm]), %%eax \n\t" - SVM_VMLOAD "\n\t" - SVM_VMRUN "\n\t" - SVM_VMSAVE "\n\t" + __ex(SVM_VMLOAD) "\n\t" + __ex(SVM_VMRUN) "\n\t" + __ex(SVM_VMSAVE) "\n\t" "pop %%eax \n\t" #endif diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fff8e23433d..b80b4d14163 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -30,6 +30,8 @@ #include #include +#define __ex(x) __kvm_handle_fault_on_reboot(x) + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -278,7 +280,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) u64 gva; } operand = { vpid, 0, gva }; - asm volatile (ASM_VMX_INVVPID + asm volatile (__ex(ASM_VMX_INVVPID) /* CF==1 or ZF==1 --> rc = -1 */ "; ja 1f ; ud2 ; 1:" : : "a"(&operand), "c"(ext) : "cc", "memory"); @@ -290,7 +292,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) u64 eptp, gpa; } operand = {eptp, gpa}; - asm volatile (ASM_VMX_INVEPT + asm volatile (__ex(ASM_VMX_INVEPT) /* CF==1 or ZF==1 --> rc = -1 */ "; ja 1f ; ud2 ; 1:\n" : : "a" (&operand), "c" (ext) : "cc", "memory"); @@ -311,7 +313,7 @@ static void vmcs_clear(struct vmcs *vmcs) u64 phys_addr = __pa(vmcs); u8 error; - asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0" + asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc", "memory"); if (error) @@ -378,7 +380,7 @@ static unsigned long vmcs_readl(unsigned long field) { unsigned long value; - asm volatile (ASM_VMX_VMREAD_RDX_RAX + asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) : "=a"(value) : "d"(field) : "cc"); return value; } @@ -413,7 +415,7 @@ static void vmcs_writel(unsigned long field, unsigned long value) { u8 error; - asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" + asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0" : "=q"(error) : "a"(value), "d"(field) : "cc"); if (unlikely(error)) vmwrite_error(field, value); @@ -621,7 +623,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) u8 error; per_cpu(current_vmcs, cpu) = vmx->vmcs; - asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" + asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc"); if (error) @@ -1030,13 +1032,14 @@ static void hardware_enable(void *garbage) MSR_IA32_FEATURE_CONTROL_LOCKED | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ - asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) + asm volatile (ASM_VMX_VMXON_RAX + : : "a"(&phys_addr), "m"(phys_addr) : "memory", "cc"); } static void hardware_disable(void *garbage) { - asm volatile (ASM_VMX_VMXOFF : : : "cc"); + asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); write_cr4(read_cr4() & ~X86_CR4_VMXE); } @@ -2834,7 +2837,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) "push %%edx; push %%ebp;" "push %%ecx \n\t" #endif - ASM_VMX_VMWRITE_RSP_RDX "\n\t" + __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" /* Check if vmlaunch of vmresume is needed */ "cmpl $0, %c[launched](%0) \n\t" /* Load guest registers. Don't clobber flags. */ @@ -2869,9 +2872,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) #endif /* Enter guest mode */ "jne .Llaunched \n\t" - ASM_VMX_VMLAUNCH "\n\t" + __ex(ASM_VMX_VMLAUNCH) "\n\t" "jmp .Lkvm_vmx_return \n\t" - ".Llaunched: " ASM_VMX_VMRESUME "\n\t" + ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" ".Lkvm_vmx_return: " /* Save guest registers, load host registers, keep flags */ #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 543e42436643d68ad007d0bae2f485caac9c8a02 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 13 May 2008 16:22:47 +0300 Subject: KVM: VMX: Add list of potentially locally cached vcpus VMX hardware can cache the contents of a vcpu's vmcs. This cache needs to be flushed when migrating a vcpu to another cpu, or (which is the case that interests us here) when disabling hardware virtualization on a cpu. The current implementation of decaching iterates over the list of all vcpus, picks the ones that are potentially cached on the cpu that is being offlined, and flushes the cache. The problem is that it uses mutex_trylock() to gain exclusive access to the vcpu, which fires off a (benign) warning about using the mutex in an interrupt context. To avoid this, and to make things generally nicer, add a new per-cpu list of potentially cached vcus. This makes the decaching code much simpler. The list is vmx-specific since other hardware doesn't have this issue. [andrea: fix crash on suspend/resume] Signed-off-by: Andrea Arcangeli Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 24 ++++++++++++++++++++++-- arch/x86/kvm/x86.c | 27 --------------------------- 2 files changed, 22 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b80b4d14163..4d179d10637 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -55,6 +55,7 @@ struct vmcs { struct vcpu_vmx { struct kvm_vcpu vcpu; + struct list_head local_vcpus_link; int launched; u8 fail; u32 idt_vectoring_info; @@ -93,6 +94,7 @@ static int init_rmode(struct kvm *kvm); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); +static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); static struct page *vmx_io_bitmap_a; static struct page *vmx_io_bitmap_b; @@ -331,6 +333,9 @@ static void __vcpu_clear(void *arg) if (per_cpu(current_vmcs, cpu) == vmx->vmcs) per_cpu(current_vmcs, cpu) = NULL; rdtscll(vmx->vcpu.arch.host_tsc); + list_del(&vmx->local_vcpus_link); + vmx->vcpu.cpu = -1; + vmx->launched = 0; } static void vcpu_clear(struct vcpu_vmx *vmx) @@ -338,7 +343,6 @@ static void vcpu_clear(struct vcpu_vmx *vmx) if (vmx->vcpu.cpu == -1) return; smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); - vmx->launched = 0; } static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) @@ -617,6 +621,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_clear(vmx); kvm_migrate_timers(vcpu); vpid_sync_vcpu_all(vmx); + local_irq_disable(); + list_add(&vmx->local_vcpus_link, + &per_cpu(vcpus_on_cpu, cpu)); + local_irq_enable(); } if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { @@ -1022,6 +1030,7 @@ static void hardware_enable(void *garbage) u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); u64 old; + INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) @@ -1037,8 +1046,19 @@ static void hardware_enable(void *garbage) : "memory", "cc"); } +static void vmclear_local_vcpus(void) +{ + int cpu = raw_smp_processor_id(); + struct vcpu_vmx *vmx, *n; + + list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu), + local_vcpus_link) + __vcpu_clear(vmx); +} + static void hardware_disable(void *garbage) { + vmclear_local_vcpus(); asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); write_cr4(read_cr4() & ~X86_CR4_VMXE); } @@ -2967,7 +2987,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); if (vmx->vmcs) { - on_each_cpu(__vcpu_clear, vmx, 1); + vcpu_clear(vmx); free_vmcs(vmx->vmcs); vmx->vmcs = NULL; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59084a3981c..8c14ddcaba7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -823,33 +823,6 @@ out: */ void decache_vcpus_on_cpu(int cpu) { - struct kvm *vm; - struct kvm_vcpu *vcpu; - int i; - - spin_lock(&kvm_lock); - list_for_each_entry(vm, &vm_list, vm_list) - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = vm->vcpus[i]; - if (!vcpu) - continue; - /* - * If the vcpu is locked, then it is running on some - * other cpu and therefore it is not cached on the - * cpu in question. - * - * If it's not locked, check the last cpu it executed - * on. - */ - if (mutex_trylock(&vcpu->mutex)) { - if (vcpu->cpu == cpu) { - kvm_x86_ops->vcpu_decache(vcpu); - vcpu->cpu = -1; - } - mutex_unlock(&vcpu->mutex); - } - } - spin_unlock(&kvm_lock); } int kvm_dev_ioctl_check_extension(long ext) -- cgit v1.2.3 From 7cc8883074b040aa8c1ebd3a17463b0ea3a9ef16 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 13 May 2008 16:29:20 +0300 Subject: KVM: Remove decache_vcpus_on_cpu() and related callbacks Obsoleted by the vmx-specific per-cpu list. Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 8 -------- arch/powerpc/kvm/powerpc.c | 4 ---- arch/s390/kvm/kvm-s390.c | 4 ---- arch/x86/kvm/svm.c | 5 ----- arch/x86/kvm/vmx.c | 6 ------ arch/x86/kvm/x86.c | 8 -------- 6 files changed, 35 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 68c978be9a5..7c504be5797 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1035,14 +1035,6 @@ static void kvm_free_vmm_area(void) } } -/* - * Make sure that a cpu that is being hot-unplugged does not have any vcpus - * cached on it. Leave it as blank for IA64. - */ -void decache_vcpus_on_cpu(int cpu) -{ -} - static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 777e0f34e0e..0513b359851 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -240,10 +240,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { } -void decache_vcpus_on_cpu(int cpu) -{ -} - int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6558b09ff57..4585c8ac2b0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -79,10 +79,6 @@ void kvm_arch_hardware_disable(void *garbage) { } -void decache_vcpus_on_cpu(int cpu) -{ -} - int kvm_arch_hardware_setup(void) { return 0; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9390a31c06f..238e8f3afaf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -709,10 +709,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) rdtscll(vcpu->arch.host_tsc); } -static void svm_vcpu_decache(struct kvm_vcpu *vcpu) -{ -} - static void svm_cache_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1933,7 +1929,6 @@ static struct kvm_x86_ops svm_x86_ops = { .prepare_guest_switch = svm_prepare_guest_switch, .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .vcpu_decache = svm_vcpu_decache, .set_guest_debug = svm_guest_debug, .get_msr = svm_get_msr, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4d179d10637..b99bb37e5de 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -692,11 +692,6 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) update_exception_bitmap(vcpu); } -static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) -{ - vcpu_clear(to_vmx(vcpu)); -} - static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { return vmcs_readl(GUEST_RFLAGS); @@ -3114,7 +3109,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .prepare_guest_switch = vmx_save_host_state, .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, - .vcpu_decache = vmx_vcpu_decache, .set_guest_debug = set_guest_debug, .guest_debug_pre = kvm_guest_debug_pre, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c14ddcaba7..fd03b4465bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -817,14 +817,6 @@ out: return r; } -/* - * Make sure that a cpu that is being hot-unplugged does not have any vcpus - * cached on it. - */ -void decache_vcpus_on_cpu(int cpu) -{ -} - int kvm_dev_ioctl_check_extension(long ext) { int r; -- cgit v1.2.3 From 50d40d7fb9b09e68a657c68837fcfa067b70cc42 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 25 May 2008 14:38:15 +0300 Subject: KVM: Remove unnecessary ->decache_regs() call Since we aren't modifying any register, there's no need to decache the register state. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fd03b4465bc..5f00c60f0af 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2297,7 +2297,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_x86_ops->cache_regs(vcpu); memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); - kvm_x86_ops->decache_regs(vcpu); kvm_x86_ops->skip_emulated_instruction(vcpu); -- cgit v1.2.3 From 3419ffc8e45a5344abc87684cbca6cdc5c9c8a01 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 15 May 2008 09:52:48 +0800 Subject: KVM: IOAPIC/LAPIC: Enable NMI support [avi: fix ia64 build breakage] Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/lapic.c | 3 ++- arch/x86/kvm/x86.c | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f9201fbc61d..e48d1939403 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -356,8 +356,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_SMI: printk(KERN_DEBUG "Ignoring guest SMI\n"); break; + case APIC_DM_NMI: - printk(KERN_DEBUG "Ignoring guest NMI\n"); + kvm_inject_nmi(vcpu); break; case APIC_DM_INIT: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f00c60f0af..19974dde656 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -173,6 +173,12 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); } +void kvm_inject_nmi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.nmi_pending = 1; +} +EXPORT_SYMBOL_GPL(kvm_inject_nmi); + void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { WARN_ON(vcpu->arch.exception.pending); -- cgit v1.2.3 From f08864b42a45581a64558aa5b6b673c77b97ee5d Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 15 May 2008 18:23:25 +0800 Subject: KVM: VMX: Enable NMI with in-kernel irqchip Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 124 +++++++++++++++++++++++++++++++++++++++++++++-------- arch/x86/kvm/vmx.h | 12 +++++- arch/x86/kvm/x86.c | 1 + 3 files changed, 118 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b99bb37e5de..1bb99465720 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -264,6 +264,11 @@ static inline int cpu_has_vmx_vpid(void) SECONDARY_EXEC_ENABLE_VPID); } +static inline int cpu_has_virtual_nmis(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; +} + static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -1088,7 +1093,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmentry_control = 0; min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = 0; + opt = PIN_BASED_VIRTUAL_NMIS; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) return -EIO; @@ -2130,6 +2135,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } +static void vmx_inject_nmi(struct kvm_vcpu *vcpu) +{ + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); + vcpu->arch.nmi_pending = 0; +} + static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->arch.irq_summary); @@ -2653,6 +2665,19 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + u32 cpu_based_vm_exec_control; + + /* clear pending NMI */ + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + ++vcpu->stat.nmi_window_exits; + + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -2663,6 +2688,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_EXCEPTION_NMI] = handle_exception, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, + [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, [EXIT_REASON_IO_INSTRUCTION] = handle_io, [EXIT_REASON_CR_ACCESS] = handle_cr, [EXIT_REASON_DR_ACCESS] = handle_dr, @@ -2750,17 +2776,52 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } +static void enable_nmi_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + if (!cpu_has_virtual_nmis()) + return; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + return !(guest_intr & (GUEST_INTR_STATE_NMI | + GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_STI)); +} + +static int vmx_irq_enabled(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_STI)) && + (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); +} + +static void enable_intr_window(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); +} + static void vmx_intr_assist(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 idtv_info_field, intr_info_field; - int has_ext_irq, interrupt_window_open; + u32 idtv_info_field, intr_info_field, exit_intr_info_field; int vector; update_tpr_threshold(vcpu); - has_ext_irq = kvm_cpu_has_interrupt(vcpu); intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); + exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO); idtv_info_field = vmx->idt_vectoring_info; if (intr_info_field & INTR_INFO_VALID_MASK) { if (idtv_info_field & INTR_INFO_VALID_MASK) { @@ -2768,8 +2829,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (printk_ratelimit()) printk(KERN_ERR "Fault when IDT_Vectoring\n"); } - if (has_ext_irq) - enable_irq_window(vcpu); + enable_intr_window(vcpu); return; } if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { @@ -2779,30 +2839,56 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK; vmx_inject_irq(vcpu, vect); - if (unlikely(has_ext_irq)) - enable_irq_window(vcpu); + enable_intr_window(vcpu); return; } KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); + /* + * SDM 3: 25.7.1.2 + * Clear bit "block by NMI" before VM entry if a NMI delivery + * faulted. + */ + if ((idtv_info_field & VECTORING_INFO_TYPE_MASK) + == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis()) + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + ~GUEST_INTR_STATE_NMI); + + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field + & ~INTR_INFO_RESVD_BITS_MASK); vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, vmcs_read32(IDT_VECTORING_ERROR_CODE)); - if (unlikely(has_ext_irq)) - enable_irq_window(vcpu); + enable_intr_window(vcpu); return; } - if (!has_ext_irq) + if (cpu_has_virtual_nmis()) { + /* + * SDM 3: 25.7.1.2 + * Re-set bit "block by NMI" before VM entry if vmexit caused by + * a guest IRET fault. + */ + if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) && + (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8) + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | + GUEST_INTR_STATE_NMI); + else if (vcpu->arch.nmi_pending) { + if (vmx_nmi_enabled(vcpu)) + vmx_inject_nmi(vcpu); + enable_intr_window(vcpu); + return; + } + + } + if (!kvm_cpu_has_interrupt(vcpu)) return; - interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); - if (interrupt_window_open) { + if (vmx_irq_enabled(vcpu)) { vector = kvm_cpu_get_interrupt(vcpu); vmx_inject_irq(vcpu, vector); kvm_timer_intr_post(vcpu, vector); @@ -2963,7 +3049,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) fixup_rmode_irq(vmx); vcpu->arch.interrupt_window_open = - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; @@ -2971,7 +3058,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && + (intr_info & INTR_INFO_VALID_MASK)) { KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); } diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 79d94c610df..425a13436b3 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -40,6 +40,7 @@ #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 #define CPU_BASED_CR8_STORE_EXITING 0x00100000 #define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 #define CPU_BASED_MOV_DR_EXITING 0x00800000 #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 #define CPU_BASED_USE_IO_BITMAPS 0x02000000 @@ -216,7 +217,7 @@ enum vmcs_field { #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_PENDING_INTERRUPT 7 - +#define EXIT_REASON_NMI_WINDOW 8 #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 #define EXIT_REASON_HLT 12 @@ -251,7 +252,9 @@ enum vmcs_field { #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ #define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ +#define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */ #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ +#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 #define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK #define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK @@ -259,9 +262,16 @@ enum vmcs_field { #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +/* GUEST_INTERRUPTIBILITY_INFO flags. */ +#define GUEST_INTR_STATE_STI 0x00000001 +#define GUEST_INTR_STATE_MOV_SS 0x00000002 +#define GUEST_INTR_STATE_SMI 0x00000004 +#define GUEST_INTR_STATE_NMI 0x00000008 + /* * Exit Qualifications for MOV for Control Register Access */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19974dde656..05b54976c89 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -72,6 +72,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmio_exits", VCPU_STAT(mmio_exits) }, { "signal_exits", VCPU_STAT(signal_exits) }, { "irq_window", VCPU_STAT(irq_window_exits) }, + { "nmi_window", VCPU_STAT(nmi_window_exits) }, { "halt_exits", VCPU_STAT(halt_exits) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, -- cgit v1.2.3 From 9ba075a664dff836fd6fb93f90fcc827f7683d91 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 26 May 2008 20:06:35 +0300 Subject: KVM: MTRR support Add emulation for the memory type range registers, needed by VMware esx 3.5, and by pci device assignment. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05b54976c89..5f67a7c54e8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -611,6 +611,38 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); } +static bool msr_mtrr_valid(unsigned msr) +{ + switch (msr) { + case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: + case MSR_MTRRfix64K_00000: + case MSR_MTRRfix16K_80000: + case MSR_MTRRfix16K_A0000: + case MSR_MTRRfix4K_C0000: + case MSR_MTRRfix4K_C8000: + case MSR_MTRRfix4K_D0000: + case MSR_MTRRfix4K_D8000: + case MSR_MTRRfix4K_E0000: + case MSR_MTRRfix4K_E8000: + case MSR_MTRRfix4K_F0000: + case MSR_MTRRfix4K_F8000: + case MSR_MTRRdefType: + case MSR_IA32_CR_PAT: + return true; + case 0x2f8: + return true; + } + return false; +} + +static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + if (!msr_mtrr_valid(msr)) + return 1; + + vcpu->arch.mtrr[msr - 0x200] = data; + return 0; +} int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { @@ -632,8 +664,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: - case 0x200 ... 0x2ff: /* MTRRs */ break; + case 0x200 ... 0x2ff: + return set_msr_mtrr(vcpu, msr, data); case MSR_IA32_APICBASE: kvm_set_apic_base(vcpu, data); break; @@ -691,6 +724,15 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); } +static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + if (!msr_mtrr_valid(msr)) + return 1; + + *pdata = vcpu->arch.mtrr[msr - 0x200]; + return 0; +} + int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 data; @@ -712,11 +754,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MC0_MISC+16: case MSR_IA32_UCODE_REV: case MSR_IA32_EBL_CR_POWERON: - /* MTRR registers */ - case 0xfe: - case 0x200 ... 0x2ff: data = 0; break; + case MSR_MTRRcap: + data = 0x500 | KVM_NR_VAR_MTRR; + break; + case 0x200 ... 0x2ff: + return get_msr_mtrr(vcpu, msr, pdata); case 0xcd: /* fsb frequency */ data = 3; break; -- cgit v1.2.3 From 3e6e0aab1ba1e8b354ce01f5659336f9aee69437 Mon Sep 17 00:00:00 2001 From: Guillaume Thouvenin Date: Tue, 27 May 2008 10:18:46 +0200 Subject: KVM: Prefixes segment functions that will be exported with "kvm_" Prefixes functions that will be exported with kvm_. We also prefixed set_segment() even if it still static to be coherent. signed-off-by: Guillaume Thouvenin Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 78 +++++++++++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5f67a7c54e8..4c94fad7f01 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3100,8 +3100,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -static void get_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) +void kvm_get_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) { kvm_x86_ops->get_segment(vcpu, var, seg); } @@ -3110,7 +3110,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { struct kvm_segment cs; - get_segment(vcpu, &cs, VCPU_SREG_CS); + kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); *db = cs.db; *l = cs.l; } @@ -3124,15 +3124,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, vcpu_load(vcpu); - get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); - get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); - get_segment(vcpu, &sregs->es, VCPU_SREG_ES); - get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); - get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); - get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); + kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); + kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); + kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); + kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); + kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); + kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); - get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); - get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); + kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); + kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); kvm_x86_ops->get_idt(vcpu, &dt); sregs->idt.limit = dt.limit; @@ -3184,7 +3184,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return 0; } -static void set_segment(struct kvm_vcpu *vcpu, +static void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { kvm_x86_ops->set_segment(vcpu, var, seg); @@ -3221,7 +3221,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, if (selector & 1 << 2) { struct kvm_segment kvm_seg; - get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); + kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); if (kvm_seg.unusable) dtable->limit = 0; @@ -3327,7 +3327,7 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) { struct kvm_segment kvm_seg; - get_segment(vcpu, &kvm_seg, seg); + kvm_get_segment(vcpu, &kvm_seg, seg); return kvm_seg.selector; } @@ -3343,8 +3343,8 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, return 0; } -static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, - int type_bits, int seg) +int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + int type_bits, int seg) { struct kvm_segment kvm_seg; @@ -3357,7 +3357,7 @@ static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, if (!kvm_seg.s) kvm_seg.unusable = 1; - set_segment(vcpu, &kvm_seg, seg); + kvm_set_segment(vcpu, &kvm_seg, seg); return 0; } @@ -3403,25 +3403,25 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; - if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) + if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) return 1; - if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) + if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) return 1; - if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) + if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) return 1; - if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) + if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) return 1; - if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) + if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) return 1; - if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) + if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) return 1; - if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) + if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) return 1; return 0; } @@ -3462,19 +3462,19 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; - if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) + if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) return 1; - if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) + if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) return 1; - if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) + if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) return 1; - if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) + if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) return 1; - if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) + if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) return 1; return 0; } @@ -3532,7 +3532,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) struct desc_struct nseg_desc; int ret = 0; - get_segment(vcpu, &tr_seg, VCPU_SREG_TR); + kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) goto out; @@ -3591,7 +3591,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); tr_seg.type = 11; - set_segment(vcpu, &tr_seg, VCPU_SREG_TR); + kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); out: kvm_x86_ops->decache_regs(vcpu); return ret; @@ -3658,15 +3658,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, } } - set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); - set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); - set_segment(vcpu, &sregs->es, VCPU_SREG_ES); - set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); - set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); - set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); + kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); + kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); + kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); + kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); + kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); + kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); - set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); - set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); + kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); + kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); vcpu_put(vcpu); -- cgit v1.2.3 From 89c696383d6eb493351a89d450d8ad7a55cbe1da Mon Sep 17 00:00:00 2001 From: Guillaume Thouvenin Date: Tue, 27 May 2008 10:22:20 +0200 Subject: KVM: x86 emulator: Update c->dst.bytes in decode instruction Update c->dst.bytes in decode instruction instead of instruction itself. It's needed because if c->dst.bytes is equal to 0, the instruction is not emulated. Signed-off-by: Guillaume Thouvenin Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 932f216d890..a928aa6cdad 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -1049,6 +1049,7 @@ done_prefixes: break; case DstMem: if ((c->d & ModRM) && c->modrm_mod == 3) { + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.type = OP_REG; c->dst.val = c->dst.orig_val = c->modrm_val; c->dst.ptr = c->modrm_ptr; -- cgit v1.2.3 From 954cd36f7613ac6d084abe33114dd45a8e0dbe92 Mon Sep 17 00:00:00 2001 From: Guillaume Thouvenin Date: Tue, 27 May 2008 10:19:08 +0200 Subject: KVM: x86 emulator: add support for jmp far 0xea Add support for jmp far (opcode 0xea) instruction. Signed-off-by: Guillaume Thouvenin Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index a928aa6cdad..48b62cc3bd0 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -168,7 +168,8 @@ static u16 opcode_table[256] = { /* 0xE0 - 0xE7 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE8 - 0xEF */ - ImplicitOps | Stack, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, + ImplicitOps | Stack, SrcImm | ImplicitOps, + ImplicitOps, SrcImmByte | ImplicitOps, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, @@ -1661,7 +1662,33 @@ special_insn: break; } case 0xe9: /* jmp rel */ - case 0xeb: /* jmp rel short */ + goto jmp; + case 0xea: /* jmp far */ { + uint32_t eip; + uint16_t sel; + + switch (c->op_bytes) { + case 2: + eip = insn_fetch(u16, 2, c->eip); + break; + case 4: + eip = insn_fetch(u32, 4, c->eip); + break; + default: + DPRINTF("jmp far: Invalid op_bytes\n"); + goto cannot_emulate; + } + sel = insn_fetch(u16, 2, c->eip); + if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) { + DPRINTF("jmp far: Failed to load CS descriptor\n"); + goto cannot_emulate; + } + + c->eip = eip; + break; + } + case 0xeb: + jmp: /* jmp rel short */ jmp_rel(c, c->src.val); c->dst.type = OP_NONE; /* Disable writeback. */ break; -- cgit v1.2.3 From 615ac125618dc7b40ecb418e8b353d31ccf0e518 Mon Sep 17 00:00:00 2001 From: Guillaume Thouvenin Date: Tue, 27 May 2008 10:19:16 +0200 Subject: KVM: x86 emulator: adds support to mov r,imm (opcode 0xb8) instruction Add support to mov r, imm (0xb8) instruction. Signed-off-by: Guillaume Thouvenin Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 48b62cc3bd0..21d7ff6a8ec 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -152,7 +152,8 @@ static u16 opcode_table[256] = { ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, ByteOp | ImplicitOps | String, ImplicitOps | String, /* 0xB0 - 0xBF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0, /* 0xC0 - 0xC7 */ ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 0, ImplicitOps | Stack, 0, 0, @@ -1624,6 +1625,8 @@ special_insn: case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); goto cannot_emulate; + case 0xb8: /* mov r, imm */ + goto mov; case 0xc0 ... 0xc1: emulate_grp2(ctxt); break; -- cgit v1.2.3 From 4257198ae2c36e030a0947fef661c8de973778be Mon Sep 17 00:00:00 2001 From: Guillaume Thouvenin Date: Tue, 27 May 2008 14:49:15 +0200 Subject: KVM: x86 emulator: Add support for mov seg, r (0x8e) instruction Add support for mov r, sreg (0x8c) instruction. [avi: drop the sreg decoding table in favor of 1:1 encoding] Signed-off-by: Guillaume Thouvenin Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 21d7ff6a8ec..b049b6bf9a7 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -138,7 +138,8 @@ static u16 opcode_table[256] = { /* 0x88 - 0x8F */ ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, - 0, ModRM | DstReg, 0, Group | Group1A, + 0, ModRM | DstReg, + DstReg | SrcMem | ModRM | Mov, Group | Group1A, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, @@ -1520,6 +1521,28 @@ special_insn: case 0x8d: /* lea r16/r32, m */ c->dst.val = c->modrm_ea; break; + case 0x8e: { /* mov seg, r/m16 */ + uint16_t sel; + int type_bits; + int err; + + sel = c->src.val; + if (c->modrm_reg <= 5) { + type_bits = (c->modrm_reg == 1) ? 9 : 1; + err = kvm_load_segment_descriptor(ctxt->vcpu, sel, + type_bits, c->modrm_reg); + } else { + printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", + c->modrm); + goto cannot_emulate; + } + + if (err < 0) + goto cannot_emulate; + + c->dst.type = OP_NONE; /* Disable writeback. */ + break; + } case 0x8f: /* pop (sole member of Grp1a) */ rc = emulate_grp1a(ctxt, ops); if (rc != 0) -- cgit v1.2.3 From 38d5bc6d50a4368be08b39b02efb9cbbe1dd60d0 Mon Sep 17 00:00:00 2001 From: Guillaume Thouvenin Date: Tue, 27 May 2008 15:13:28 +0200 Subject: KVM: x86 emulator: Add support for mov r, sreg (0x8c) instruction Add support for mov r, sreg (0x8c) instruction Signed-off-by: Guillaume Thouvenin Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index b049b6bf9a7..2a9db4d90ba 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -138,7 +138,7 @@ static u16 opcode_table[256] = { /* 0x88 - 0x8F */ ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, - 0, ModRM | DstReg, + DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, DstReg | SrcMem | ModRM | Mov, Group | Group1A, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, @@ -1518,6 +1518,19 @@ special_insn: break; case 0x88 ... 0x8b: /* mov */ goto mov; + case 0x8c: { /* mov r/m, sreg */ + struct kvm_segment segreg; + + if (c->modrm_reg <= 5) + kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); + else { + printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", + c->modrm); + goto cannot_emulate; + } + c->dst.val = segreg.selector; + break; + } case 0x8d: /* lea r16/r32, m */ c->dst.val = c->modrm_ea; break; -- cgit v1.2.3 From eab9f71feb1851b5b700ca12ae614b6a0a441021 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 29 May 2008 14:20:16 +0300 Subject: KVM: MMU: Optimize prefetch_page() Instead of reading each pte individually, read 256 bytes worth of ptes and batch process them. Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 934c7b61939..4d918220bae 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -460,8 +460,9 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - int i, offset = 0, r = 0; - pt_element_t pt; + int i, j, offset, r; + pt_element_t pt[256 / sizeof(pt_element_t)]; + gpa_t pte_gpa; if (sp->role.metaphysical || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { @@ -469,19 +470,20 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, return; } - if (PTTYPE == 32) + pte_gpa = gfn_to_gpa(sp->gfn); + if (PTTYPE == 32) { offset = sp->role.quadrant << PT64_LEVEL_BITS; + pte_gpa += offset * sizeof(pt_element_t); + } - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - gpa_t pte_gpa = gfn_to_gpa(sp->gfn); - pte_gpa += (i+offset) * sizeof(pt_element_t); - - r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt, - sizeof(pt_element_t)); - if (r || is_present_pte(pt)) - sp->spt[i] = shadow_trap_nonpresent_pte; - else - sp->spt[i] = shadow_notrap_nonpresent_pte; + for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) { + r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt); + pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t); + for (j = 0; j < ARRAY_SIZE(pt); ++j) + if (r || is_present_pte(pt[j])) + sp->spt[i+j] = shadow_trap_nonpresent_pte; + else + sp->spt[i+j] = shadow_notrap_nonpresent_pte; } } -- cgit v1.2.3 From 19e43636b5af1c8b9cc8406af674835284abab0c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 29 May 2008 14:26:29 +0300 Subject: KVM: x86 emulator: simplify push imm8 emulation Instead of fetching the data explicitly, use SrcImmByte. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 2a9db4d90ba..4e037ea8fe6 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -121,7 +121,7 @@ static u16 opcode_table[256] = { 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , 0, 0, 0, 0, /* 0x68 - 0x6F */ - 0, 0, ImplicitOps | Mov | Stack, 0, + 0, 0, SrcImmByte | Mov | Stack, 0, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ /* 0x70 - 0x77 */ @@ -1425,8 +1425,6 @@ special_insn: c->dst.val = (s32) c->src.val; break; case 0x6a: /* push imm8 */ - c->src.val = 0L; - c->src.val = insn_fetch(s8, 1, c->eip); emulate_push(ctxt); break; case 0x6c: /* insb */ -- cgit v1.2.3 From 91ed7a0e15c6f6ff57f5cf70feabdba56a999863 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 29 May 2008 14:38:38 +0300 Subject: KVM: x86 emulator: implement 'push imm' (opcode 0x68) Encountered in FC6 boot sequence, now that we don't force ss.rpl = 0 during the protected mode transition. Not really necessary, but nice to have. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 4e037ea8fe6..b90857c7656 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -121,7 +121,7 @@ static u16 opcode_table[256] = { 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , 0, 0, 0, 0, /* 0x68 - 0x6F */ - 0, 0, SrcImmByte | Mov | Stack, 0, + SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ /* 0x70 - 0x77 */ @@ -1424,6 +1424,7 @@ special_insn: goto cannot_emulate; c->dst.val = (s32) c->src.val; break; + case 0x68: /* push imm */ case 0x6a: /* push imm8 */ emulate_push(ctxt); break; -- cgit v1.2.3 From d761a501cf9cd4fa08ff35d252ff08b8c31ce677 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 29 May 2008 14:55:03 +0300 Subject: KVM: MMU: Move nonpaging_prefetch_page() In preparation for next patch. No code change. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 53f1ed852ca..62741b7c422 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -776,6 +776,15 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, BUG(); } +static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *sp) +{ + int i; + + for (i = 0; i < PT64_ENT_PER_PAGE; ++i) + sp->spt[i] = shadow_trap_nonpresent_pte; +} + static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) { unsigned index; @@ -1213,15 +1222,6 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) } -static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp) -{ - int i; - - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) - sp->spt[i] = shadow_trap_nonpresent_pte; -} - static void mmu_free_roots(struct kvm_vcpu *vcpu) { int i; -- cgit v1.2.3 From 131d82791b628d4aeafd94ddc74a9b68f3d15a83 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 29 May 2008 14:56:28 +0300 Subject: KVM: MMU: Avoid page prefetch on SVM SVM cannot benefit from page prefetching since guest page fault bypass cannot by made to work there. Avoid accessing the guest page table in this case. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 62741b7c422..5ebb2788bd7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -850,7 +850,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, hlist_add_head(&sp->hash_link, bucket); if (!metaphysical) rmap_write_protect(vcpu->kvm, gfn); - vcpu->arch.mmu.prefetch_page(vcpu, sp); + if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) + vcpu->arch.mmu.prefetch_page(vcpu, sp); + else + nonpaging_prefetch_page(vcpu, sp); return sp; } -- cgit v1.2.3 From 92760499d01ef91518119908eb9b8798b6c9bd3f Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 30 May 2008 16:05:53 +0200 Subject: KVM: kvm_io_device: extend in_range() to manage len and write attribute Modify member in_range() of structure kvm_io_device to pass length and the type of the I/O (write or read). This modification allows to use kvm_io_device with coalesced MMIO. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 6 +++--- arch/x86/kvm/i8254.c | 6 ++++-- arch/x86/kvm/i8259.c | 3 ++- arch/x86/kvm/lapic.c | 3 ++- arch/x86/kvm/x86.c | 28 +++++++++++++++++----------- 5 files changed, 28 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 7c504be5797..bb58df7cc41 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -195,11 +195,11 @@ int kvm_dev_ioctl_check_extension(long ext) } static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, int is_write) { struct kvm_io_device *dev; - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write); return dev; } @@ -231,7 +231,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; mmio: - mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); + mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir); if (mmio_dev) { if (!p->dir) kvm_iodevice_write(mmio_dev, p->addr, p->size, diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 60074dc66bd..9e3391e9a1b 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -460,7 +460,8 @@ static void pit_ioport_read(struct kvm_io_device *this, mutex_unlock(&pit_state->lock); } -static int pit_in_range(struct kvm_io_device *this, gpa_t addr) +static int pit_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { return ((addr >= KVM_PIT_BASE_ADDRESS) && (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); @@ -501,7 +502,8 @@ static void speaker_ioport_read(struct kvm_io_device *this, mutex_unlock(&pit_state->lock); } -static int speaker_in_range(struct kvm_io_device *this, gpa_t addr) +static int speaker_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { return (addr == KVM_SPEAKER_BASE_ADDRESS); } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index ab29cf2def4..5857f59ad4a 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -346,7 +346,8 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1) return s->elcr; } -static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) +static int picdev_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { switch (addr) { case 0x20: diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e48d1939403..180ba7316da 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -785,7 +785,8 @@ static void apic_mmio_write(struct kvm_io_device *this, } -static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr) +static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr, + int len, int size) { struct kvm_lapic *apic = (struct kvm_lapic *)this->private; int ret = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4c94fad7f01..ab3f5552d69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1797,13 +1797,14 @@ static void kvm_init_msr_list(void) * Only apic need an MMIO device hook, so shortcut now.. */ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { struct kvm_io_device *dev; if (vcpu->arch.apic) { dev = &vcpu->arch.apic->dev; - if (dev->in_range(dev, addr)) + if (dev->in_range(dev, addr, len, is_write)) return dev; } return NULL; @@ -1811,13 +1812,15 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { struct kvm_io_device *dev; - dev = vcpu_find_pervcpu_dev(vcpu, addr); + dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write); if (dev == NULL) - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, + is_write); return dev; } @@ -1885,7 +1888,7 @@ mmio: * Is this MMIO handled locally? */ mutex_lock(&vcpu->kvm->lock); - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); + mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); if (mmio_dev) { kvm_iodevice_read(mmio_dev, gpa, bytes, val); mutex_unlock(&vcpu->kvm->lock); @@ -1940,7 +1943,7 @@ mmio: * Is this MMIO handled locally? */ mutex_lock(&vcpu->kvm->lock); - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); + mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1); if (mmio_dev) { kvm_iodevice_write(mmio_dev, gpa, bytes, val); mutex_unlock(&vcpu->kvm->lock); @@ -2317,9 +2320,10 @@ static void pio_string_write(struct kvm_io_device *pio_dev, } static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, - gpa_t addr) + gpa_t addr, int len, + int is_write) { - return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); + return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write); } int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, @@ -2351,7 +2355,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_x86_ops->skip_emulated_instruction(vcpu); - pio_dev = vcpu_find_pio_dev(vcpu, port); + pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); complete_pio(vcpu); @@ -2433,7 +2437,9 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } } - pio_dev = vcpu_find_pio_dev(vcpu, port); + pio_dev = vcpu_find_pio_dev(vcpu, port, + vcpu->arch.pio.cur_count, + !vcpu->arch.pio.in); if (!vcpu->arch.pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); -- cgit v1.2.3 From 542472b53ea9e0add0ba23976018210191d84754 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 30 May 2008 16:05:55 +0200 Subject: KVM: Add coalesced MMIO support (x86 part) This patch enables coalesced MMIO for x86 architecture. It defines KVM_MMIO_PAGE_OFFSET and KVM_CAP_COALESCED_MMIO. It enables the compilation of coalesced_mmio.c. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/x86/kvm/Makefile | 3 ++- arch/x86/kvm/x86.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index c97d35c218d..d0e940bb6f4 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -2,7 +2,8 @@ # Makefile for Kernel-based Virtual Machine module # -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ + coalesced_mmio.o) ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ab3f5552d69..d731d4fff1a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -885,6 +885,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MP_STATE: r = 1; break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); break; -- cgit v1.2.3 From 588968b6b7d34e6a88f538d1db9aca47b203623e Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 30 May 2008 16:05:56 +0200 Subject: KVM: Add coalesced MMIO support (powerpc part) This patch enables coalesced MMIO for powerpc architecture. It defines KVM_MMIO_PAGE_OFFSET and KVM_CAP_COALESCED_MMIO. It enables the compilation of coalesced_mmio.c. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- arch/powerpc/kvm/Makefile | 2 +- arch/powerpc/kvm/powerpc.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index d0d358d367e..04e3449e1f4 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -4,7 +4,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0513b359851..b850d249702 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -145,6 +145,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_USER_MEMORY: r = 1; break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; default: r = 0; break; -- cgit v1.2.3 From 7f39f8ac177db258200053074aa7a3d98656b1cf Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 30 May 2008 16:05:57 +0200 Subject: KVM: Add coalesced MMIO support (ia64 part) This patch enables coalesced MMIO for ia64 architecture. It defines KVM_MMIO_PAGE_OFFSET and KVM_CAP_COALESCED_MMIO. It enables the compilation of coalesced_mmio.c. [akpm: fix compile error on ia64] Signed-off-by: Laurent Vivier Signed-off-by: Andrew Morton Signed-off-by: Avi Kivity --- arch/ia64/kvm/Makefile | 3 ++- arch/ia64/kvm/kvm-ia64.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 112791dd254..bf22fb9e6dc 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -43,7 +43,8 @@ $(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ + coalesced_mmio.o) kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bb58df7cc41..9408b30576d 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -187,6 +187,9 @@ int kvm_dev_ioctl_check_extension(long ext) r = 1; break; + case KVM_CAP_COALESCED_MMIO: + r = KVM_COALESCED_MMIO_PAGE_OFFSET; + break; default: r = 0; } -- cgit v1.2.3 From 622395a9e63bf87a16faecf555ed02375cbae5b7 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 11 Jun 2008 19:52:53 -0300 Subject: KVM: only abort guest entry if timer count goes from 0->1 Only abort guest entry if the timer count went from 0->1, since for 1->2 or larger the bit will either be set already or a timer irq will have been injected. Using atomic_inc_and_test() for it also introduces an SMP barrier to the LAPIC version (thought it was unecessary because of timer migration, but guest can be scheduled to a different pCPU between exit and kvm_vcpu_block(), so there is the possibility for a race). Noticed by Avi. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 11 ++++------- arch/x86/kvm/lapic.c | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 9e3391e9a1b..c0f7872a912 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -198,14 +198,11 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; struct kvm_kpit_timer *pt = &ps->pit_timer; - atomic_inc(&pt->pending); - smp_mb__after_atomic_inc(); - if (vcpu0) { + if (!atomic_inc_and_test(&pt->pending)) set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); - if (waitqueue_active(&vcpu0->wq)) { - vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; - wake_up_interruptible(&vcpu0->wq); - } + if (vcpu0 && waitqueue_active(&vcpu0->wq)) { + vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(&vcpu0->wq); } pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 180ba7316da..73f43de69f6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -945,8 +945,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic) int result = 0; wait_queue_head_t *q = &apic->vcpu->wq; - atomic_inc(&apic->timer.pending); - set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); + if(!atomic_inc_and_test(&apic->timer.pending)) + set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); if (waitqueue_active(q)) { apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; wake_up_interruptible(q); -- cgit v1.2.3 From 25be46080f1a446cb2bda3daadbd22a5682b955e Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 10 Jun 2008 10:46:53 -0300 Subject: KVM: Do not calculate linear rip in emulation failure report If we're not gonna do anything (case in which failure is already reported), we do not need to even bother with calculating the linear rip. Signed-off-by: Glauber Costa Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d731d4fff1a..5d21bb69d88 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2081,11 +2081,11 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) unsigned long rip = vcpu->arch.rip; unsigned long rip_linear; - rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); - if (reported) return; + rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); + emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", -- cgit v1.2.3 From f76c710d759250a43976bcfcab6af6ebb94b7dc2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 13 Jun 2008 22:45:42 +0300 Subject: KVM: Use printk_rlimit() instead of reporting emulation failures just once Emulation failure reports are useful, so allow more than one per the lifetime of the module. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d21bb69d88..d1db5aa5c7f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2076,12 +2076,11 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) { - static int reported; u8 opcodes[4]; unsigned long rip = vcpu->arch.rip; unsigned long rip_linear; - if (reported) + if (!printk_ratelimit()) return; rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); @@ -2090,7 +2089,6 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); - reported = 1; } EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); -- cgit v1.2.3 From b13354f8f092884fa8d79472404de4907b25d579 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Sun, 15 Jun 2008 19:37:38 +0300 Subject: KVM: x86 emulator: emulate nop and xchg reg, acc (opcodes 0x90 - 0x97) Signed-off-by: Mohammed Gamal Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index b90857c7656..28082913919 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -140,8 +140,9 @@ static u16 opcode_table[256] = { ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, DstReg | SrcMem | ModRM | Mov, Group | Group1A, - /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x90 - 0x97 */ + DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, + /* 0x98 - 0x9F */ 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, @@ -1493,6 +1494,7 @@ special_insn: emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); break; case 0x86 ... 0x87: /* xchg */ + xchg: /* Write back the register source. */ switch (c->dst.bytes) { case 1: @@ -1560,6 +1562,17 @@ special_insn: if (rc != 0) goto done; break; + case 0x90: /* nop / xchg r8,rax */ + if (!(c->rex_prefix & 1)) { /* nop */ + c->dst.type = OP_NONE; + break; + } + case 0x91 ... 0x97: /* xchg reg,rax */ + c->src.type = c->dst.type = OP_REG; + c->src.bytes = c->dst.bytes = c->op_bytes; + c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; + c->src.val = *(c->src.ptr); + goto xchg; case 0x9c: /* pushf */ c->src.val = (unsigned long) ctxt->eflags; emulate_push(ctxt); -- cgit v1.2.3 From 8684c0af0b2bab770c257e2a04e1546eed35fa56 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Jun 2008 21:13:41 -0700 Subject: KVM: x86 emulator: handle undecoded rex.b with r/m = 5 in certain cases x86_64 does not decode rex.b in certain cases, where the r/m field = 5. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 28082913919..3721cfddc97 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -750,6 +750,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, switch (base_reg) { case 5: + case 13: if (c->modrm_mod != 0) c->modrm_ea += c->regs[base_reg]; else @@ -767,6 +768,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, } break; case 5: + case 13: if (c->modrm_mod != 0) c->modrm_ea += c->regs[c->modrm_rm]; else if (ctxt->mode == X86EMUL_MODE_PROT64) -- cgit v1.2.3 From dc71d0f1620790ec8e54101ca37e7b31e31208a8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Jun 2008 21:23:17 -0700 Subject: KVM: x86 emulator: simplify sib decoding Instead of using sparse switches, use simpler if/else sequences. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 3721cfddc97..ca7ab2469a4 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -748,24 +748,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, base_reg |= sib & 7; scale = sib >> 6; - switch (base_reg) { - case 5: - case 13: - if (c->modrm_mod != 0) - c->modrm_ea += c->regs[base_reg]; - else - c->modrm_ea += - insn_fetch(s32, 4, c->eip); - break; - default: + if ((base_reg & 7) == 5 && c->modrm_mod == 0) + c->modrm_ea += insn_fetch(s32, 4, c->eip); + else c->modrm_ea += c->regs[base_reg]; - } - switch (index_reg) { - case 4: - break; - default: + if (index_reg != 4) c->modrm_ea += c->regs[index_reg] << scale; - } break; case 5: case 13: -- cgit v1.2.3 From 84411d85dacdb6665578608c6a70fc8b819761a8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Jun 2008 21:53:26 -0700 Subject: KVM: x86 emulator: simplify r/m decoding Consolidate the duplicated code when not in any special case. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index ca7ab2469a4..c3a823174f3 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -740,9 +740,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, c->modrm_ea = (u16)c->modrm_ea; } else { /* 32/64-bit ModR/M decode. */ - switch (c->modrm_rm) { - case 4: - case 12: + if ((c->modrm_rm & 7) == 4) { sib = insn_fetch(u8, 1, c->eip); index_reg |= (sib >> 3) & 7; base_reg |= sib & 7; @@ -754,18 +752,11 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, c->modrm_ea += c->regs[base_reg]; if (index_reg != 4) c->modrm_ea += c->regs[index_reg] << scale; - break; - case 5: - case 13: - if (c->modrm_mod != 0) - c->modrm_ea += c->regs[c->modrm_rm]; - else if (ctxt->mode == X86EMUL_MODE_PROT64) + } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) { + if (ctxt->mode == X86EMUL_MODE_PROT64) rip_relative = 1; - break; - default: + } else c->modrm_ea += c->regs[c->modrm_rm]; - break; - } switch (c->modrm_mod) { case 0: if (c->modrm_rm == 5) -- cgit v1.2.3 From f5b4edcd52e78556800f90d08bfc9126416ac82f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Jun 2008 22:09:11 -0700 Subject: KVM: x86 emulator: simplify rip relative decoding rip relative decoding is relative to the instruction pointer of the next instruction; by moving address adjustment until after decoding is complete, we remove the need to determine the instruction size. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index c3a823174f3..20b604489c3 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -664,7 +664,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, { struct decode_cache *c = &ctxt->decode; u8 sib; - int index_reg = 0, base_reg = 0, scale, rip_relative = 0; + int index_reg = 0, base_reg = 0, scale; int rc = 0; if (c->rex_prefix) { @@ -754,7 +754,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, c->modrm_ea += c->regs[index_reg] << scale; } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) { if (ctxt->mode == X86EMUL_MODE_PROT64) - rip_relative = 1; + c->rip_relative = 1; } else c->modrm_ea += c->regs[c->modrm_rm]; switch (c->modrm_mod) { @@ -770,22 +770,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, break; } } - if (rip_relative) { - c->modrm_ea += c->eip; - switch (c->d & SrcMask) { - case SrcImmByte: - c->modrm_ea += 1; - break; - case SrcImm: - if (c->d & ByteOp) - c->modrm_ea += 1; - else - if (c->op_bytes == 8) - c->modrm_ea += 4; - else - c->modrm_ea += c->op_bytes; - } - } done: return rc; } @@ -1044,6 +1028,9 @@ done_prefixes: break; } + if (c->rip_relative) + c->modrm_ea += c->eip; + done: return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; } -- cgit v1.2.3 From 0adc8675d645940139d12477e5e05b8a0a7a1117 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Jun 2008 22:45:54 -0700 Subject: KVM: x86 emulator: avoid segment base adjust for lea Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 20b604489c3..38926b7da64 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -940,7 +940,7 @@ done_prefixes: c->override_base != &ctxt->gs_base) c->override_base = NULL; - if (c->override_base) + if (c->override_base && !(!c->twobyte && c->b == 0x8d)) c->modrm_ea += *c->override_base; if (c->ad_bytes != 8) -- cgit v1.2.3 From 7a5b56dfd3a682a51fc84682290d5147872a8e99 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 22 Jun 2008 16:22:51 +0300 Subject: KVM: x86 emulator: lazily evaluate segment registers Instead of prefetching all segment bases before emulation, read them at the last moment. Since most of them are unneeded, we save some cycles on Intel machines where this is a bit expensive. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 21 ---------- arch/x86/kvm/x86_emulate.c | 96 +++++++++++++++++++++++++++------------------- 2 files changed, 57 insertions(+), 60 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d1db5aa5c7f..f726ba79fd3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2126,27 +2126,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu, ? X86EMUL_MODE_PROT64 : cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; - if (vcpu->arch.emulate_ctxt.mode == X86EMUL_MODE_PROT64) { - vcpu->arch.emulate_ctxt.cs_base = 0; - vcpu->arch.emulate_ctxt.ds_base = 0; - vcpu->arch.emulate_ctxt.es_base = 0; - vcpu->arch.emulate_ctxt.ss_base = 0; - } else { - vcpu->arch.emulate_ctxt.cs_base = - get_segment_base(vcpu, VCPU_SREG_CS); - vcpu->arch.emulate_ctxt.ds_base = - get_segment_base(vcpu, VCPU_SREG_DS); - vcpu->arch.emulate_ctxt.es_base = - get_segment_base(vcpu, VCPU_SREG_ES); - vcpu->arch.emulate_ctxt.ss_base = - get_segment_base(vcpu, VCPU_SREG_SS); - } - - vcpu->arch.emulate_ctxt.gs_base = - get_segment_base(vcpu, VCPU_SREG_GS); - vcpu->arch.emulate_ctxt.fs_base = - get_segment_base(vcpu, VCPU_SREG_FS); - r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); /* Reject the instructions other than VMCALL/VMMCALL when diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 38926b7da64..18ca25c2d4a 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -522,6 +522,39 @@ static inline void jmp_rel(struct decode_cache *c, int rel) register_address_increment(c, &c->eip, rel); } +static void set_seg_override(struct decode_cache *c, int seg) +{ + c->has_seg_override = true; + c->seg_override = seg; +} + +static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) +{ + if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) + return 0; + + return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg); +} + +static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, + struct decode_cache *c) +{ + if (!c->has_seg_override) + return 0; + + return seg_base(ctxt, c->seg_override); +} + +static unsigned long es_base(struct x86_emulate_ctxt *ctxt) +{ + return seg_base(ctxt, VCPU_SREG_ES); +} + +static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) +{ + return seg_base(ctxt, VCPU_SREG_SS); +} + static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long linear, u8 *dest) @@ -735,8 +768,8 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, } if (c->modrm_rm == 2 || c->modrm_rm == 3 || (c->modrm_rm == 6 && c->modrm_mod != 0)) - if (!c->override_base) - c->override_base = &ctxt->ss_base; + if (!c->has_seg_override) + set_seg_override(c, VCPU_SREG_SS); c->modrm_ea = (u16)c->modrm_ea; } else { /* 32/64-bit ModR/M decode. */ @@ -807,6 +840,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) memset(c, 0, sizeof(struct decode_cache)); c->eip = ctxt->vcpu->arch.rip; + ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); switch (mode) { @@ -845,23 +879,15 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) /* switch between 2/4 bytes */ c->ad_bytes = def_ad_bytes ^ 6; break; + case 0x26: /* ES override */ case 0x2e: /* CS override */ - c->override_base = &ctxt->cs_base; - break; + case 0x36: /* SS override */ case 0x3e: /* DS override */ - c->override_base = &ctxt->ds_base; - break; - case 0x26: /* ES override */ - c->override_base = &ctxt->es_base; + set_seg_override(c, (c->b >> 3) & 3); break; case 0x64: /* FS override */ - c->override_base = &ctxt->fs_base; - break; case 0x65: /* GS override */ - c->override_base = &ctxt->gs_base; - break; - case 0x36: /* SS override */ - c->override_base = &ctxt->ss_base; + set_seg_override(c, c->b & 7); break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -933,15 +959,11 @@ done_prefixes: if (rc) goto done; - if (!c->override_base) - c->override_base = &ctxt->ds_base; - if (mode == X86EMUL_MODE_PROT64 && - c->override_base != &ctxt->fs_base && - c->override_base != &ctxt->gs_base) - c->override_base = NULL; + if (!c->has_seg_override) + set_seg_override(c, VCPU_SREG_DS); - if (c->override_base && !(!c->twobyte && c->b == 0x8d)) - c->modrm_ea += *c->override_base; + if (!(!c->twobyte && c->b == 0x8d)) + c->modrm_ea += seg_override_base(ctxt, c); if (c->ad_bytes != 8) c->modrm_ea = (u32)c->modrm_ea; @@ -1043,7 +1065,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) c->dst.bytes = c->op_bytes; c->dst.val = c->src.val; register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); - c->dst.ptr = (void *) register_address(c, ctxt->ss_base, + c->dst.ptr = (void *) register_address(c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); } @@ -1053,7 +1075,7 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; int rc; - rc = ops->read_std(register_address(c, ctxt->ss_base, + rc = ops->read_std(register_address(c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]), &c->dst.val, c->dst.bytes, ctxt->vcpu); if (rc != 0) @@ -1375,11 +1397,11 @@ special_insn: register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); c->dst.ptr = (void *) register_address( - c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]); + c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); break; case 0x58 ... 0x5f: /* pop reg */ pop_instruction: - if ((rc = ops->read_std(register_address(c, ctxt->ss_base, + if ((rc = ops->read_std(register_address(c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]), c->dst.ptr, c->op_bytes, ctxt->vcpu)) != 0) goto done; @@ -1405,7 +1427,7 @@ special_insn: c->rep_prefix ? address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, (ctxt->eflags & EFLG_DF), - register_address(c, ctxt->es_base, + register_address(c, es_base(ctxt), c->regs[VCPU_REGS_RDI]), c->rep_prefix, c->regs[VCPU_REGS_RDX]) == 0) { @@ -1421,9 +1443,8 @@ special_insn: c->rep_prefix ? address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, (ctxt->eflags & EFLG_DF), - register_address(c, c->override_base ? - *c->override_base : - ctxt->ds_base, + register_address(c, + seg_override_base(ctxt, c), c->regs[VCPU_REGS_RSI]), c->rep_prefix, c->regs[VCPU_REGS_RDX]) == 0) { @@ -1559,11 +1580,10 @@ special_insn: c->dst.type = OP_MEM; c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)register_address(c, - ctxt->es_base, + es_base(ctxt), c->regs[VCPU_REGS_RDI]); if ((rc = ops->read_emulated(register_address(c, - c->override_base ? *c->override_base : - ctxt->ds_base, + seg_override_base(ctxt, c), c->regs[VCPU_REGS_RSI]), &c->dst.val, c->dst.bytes, ctxt->vcpu)) != 0) @@ -1579,8 +1599,7 @@ special_insn: c->src.type = OP_NONE; /* Disable writeback. */ c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->src.ptr = (unsigned long *)register_address(c, - c->override_base ? *c->override_base : - ctxt->ds_base, + seg_override_base(ctxt, c), c->regs[VCPU_REGS_RSI]); if ((rc = ops->read_emulated((unsigned long)c->src.ptr, &c->src.val, @@ -1591,7 +1610,7 @@ special_insn: c->dst.type = OP_NONE; /* Disable writeback. */ c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)register_address(c, - ctxt->es_base, + es_base(ctxt), c->regs[VCPU_REGS_RDI]); if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, @@ -1615,7 +1634,7 @@ special_insn: c->dst.type = OP_MEM; c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)register_address(c, - ctxt->es_base, + es_base(ctxt), c->regs[VCPU_REGS_RDI]); c->dst.val = c->regs[VCPU_REGS_RAX]; register_address_increment(c, &c->regs[VCPU_REGS_RDI], @@ -1627,8 +1646,7 @@ special_insn: c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; if ((rc = ops->read_emulated(register_address(c, - c->override_base ? *c->override_base : - ctxt->ds_base, + seg_override_base(ctxt, c), c->regs[VCPU_REGS_RSI]), &c->dst.val, c->dst.bytes, -- cgit v1.2.3 From 6ada8cca79cb971f5da7d1756f4f9292e3ef1e03 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 22 Jun 2008 16:45:24 +0300 Subject: KVM: MMU: When debug is enabled, make it a run-time parameter Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5ebb2788bd7..5994645dcee 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -66,7 +66,8 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} #endif #if defined(MMU_DEBUG) || defined(AUDIT) -static int dbg = 1; +static int dbg = 0; +module_param(dbg, bool, 0644); #endif #ifndef MMU_DEBUG -- cgit v1.2.3 From db475c39eca0f2e44953d96e768d7ce808ab85bd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 22 Jun 2008 16:46:22 +0300 Subject: KVM: MMU: Fix printk format Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5994645dcee..1fd8e3b58cc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1116,7 +1116,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, mark_page_dirty(vcpu->kvm, gfn); pgprintk("%s: setting spte %llx\n", __func__, spte); - pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n", + pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); set_shadow_pte(shadow_pte, spte); -- cgit v1.2.3 From 65267ea1b3e768dc54b63cd7fad520d89c27d350 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 18 Jun 2008 14:43:38 +0800 Subject: KVM: VMX: Fix a wrong usage of vmcs_config The function ept_update_paging_mode_cr0() write to CPU_BASED_VM_EXEC_CONTROL based on vmcs_config.cpu_based_exec_ctrl. That's wrong because the variable may not consistent with the content in the CPU_BASE_VM_EXEC_CONTROL MSR. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1bb99465720..6a3a4038f3b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1441,7 +1441,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, if (!(cr0 & X86_CR0_PG)) { /* From paging/starting to nonpaging */ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_config.cpu_based_exec_ctrl | + vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) | (CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING)); vcpu->arch.cr0 = cr0; @@ -1451,7 +1451,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, } else if (!is_paging(vcpu)) { /* From nonpaging to paging */ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_config.cpu_based_exec_ctrl & + vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING)); vcpu->arch.cr0 = cr0; -- cgit v1.2.3 From efa67e0d1f51842393606034051d805ab9948abd Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Fri, 20 Jun 2008 09:51:30 +0200 Subject: KVM: VMX: Fake emulate Intel perfctr MSRs Older linux guests (in this case, 2.6.9) can attempt to access the performance counter MSRs without a fixup section, and injecting a GPF kills the guest. Work around by allowing the guest to write those MSRs. Tested by me on RHEL-4 i386 and x86_64 guests, as well as F-9 guests. Signed-off-by: Chris Lalancette Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6a3a4038f3b..d493a97e788 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -920,6 +920,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) break; case MSR_IA32_TIME_STAMP_COUNTER: guest_write_tsc(data); + break; + case MSR_P6_PERFCTR0: + case MSR_P6_PERFCTR1: + case MSR_P6_EVNTSEL0: + case MSR_P6_EVNTSEL1: + /* + * Just discard all writes to the performance counters; this + * should keep both older linux and windows 64-bit guests + * happy + */ + pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); + break; default: vmx_load_host_state(vmx); -- cgit v1.2.3 From f8b78fa3d406f3a2dc038e2b47749013a9295994 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 23 Jun 2008 12:04:25 -0300 Subject: KVM: move slots_lock acquision down to vapic_exit There is no need to grab slots_lock if the vapic_page will not be touched. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f726ba79fd3..55906e4c467 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2787,8 +2787,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu) if (!apic || !apic->vapic_addr) return; + down_read(&vcpu->kvm->slots_lock); kvm_release_page_dirty(apic->vapic_page); mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); + up_read(&vcpu->kvm->slots_lock); } static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -2944,9 +2946,7 @@ out: post_kvm_run_save(vcpu, kvm_run); - down_read(&vcpu->kvm->slots_lock); vapic_exit(vcpu); - up_read(&vcpu->kvm->slots_lock); return r; } -- cgit v1.2.3 From dfdded7c41e5b68c79a9f8a942d41f56bc265ba4 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Fri, 27 Jun 2008 15:05:34 +0200 Subject: KVM: Fix memory leak on guest exit This patch fixes a memory leak, we want to free the physmem when destroying the vm. Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4585c8ac2b0..b802ce6f675 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -194,6 +194,7 @@ out_nokvm: void kvm_arch_destroy_vm(struct kvm *kvm) { debug_unregister(kvm->arch.dbf); + kvm_free_physmem(kvm); free_page((unsigned long)(kvm->arch.sca)); kfree(kvm); module_put(THIS_MODULE); -- cgit v1.2.3 From 4da29e909ea8087de09e27476f91f51a070cabe8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 27 Jun 2008 15:05:38 +0200 Subject: KVM: s390: Set guest storage limit and offset to sane values Some machines do not accept 16EB as guest storage limit. Lets change the default for the guest storage limit to a sane value. We also should set the guest_origin to what userspace thinks it is. This allows guests starting at an address != 0. Signed-off-by: Christian Borntraeger Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b802ce6f675..cdab57c5bc7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -247,11 +247,16 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gbea = 1; } +/* The current code can have up to 256 pages for virtio */ +#define VIRTIODESCSPACE (256ul * 4096ul) + int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); - vcpu->arch.sie_block->gmslm = 0xffffffffffUL; - vcpu->arch.sie_block->gmsor = 0x000000000000; + vcpu->arch.sie_block->gmslm = vcpu->kvm->arch.guest_memsize + + vcpu->kvm->arch.guest_origin + + VIRTIODESCSPACE - 1ul; + vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; vcpu->arch.sie_block->ecb = 2; vcpu->arch.sie_block->eca = 0xC1002001U; setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, -- cgit v1.2.3 From 180c12fb22bd17c7187ae1bce023d24a42b2980c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 27 Jun 2008 15:05:40 +0200 Subject: KVM: s390: rename private structures While doing some tests with our lcrash implementation I have seen a naming conflict with prefix_info in kvm_host.h vs. addrconf.h To avoid future conflicts lets rename private definitions in asm/kvm_host.h by adding the kvm_s390 prefix. Signed-off-by: Christian Borntraeger Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/interrupt.c | 32 ++++++++++++++++---------------- arch/s390/kvm/kvm-s390.c | 3 ++- arch/s390/kvm/priv.c | 2 +- arch/s390/kvm/sigp.c | 20 ++++++++++---------- 4 files changed, 29 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 84a7fed4cd4..11230b0db95 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -31,7 +31,7 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) } static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, - struct interrupt_info *inti) + struct kvm_s390_interrupt_info *inti) { switch (inti->type) { case KVM_S390_INT_EMERGENCY: @@ -91,7 +91,7 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) } static void __set_intercept_indicator(struct kvm_vcpu *vcpu, - struct interrupt_info *inti) + struct kvm_s390_interrupt_info *inti) { switch (inti->type) { case KVM_S390_INT_EMERGENCY: @@ -111,7 +111,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, } static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, - struct interrupt_info *inti) + struct kvm_s390_interrupt_info *inti) { const unsigned short table[] = { 2, 4, 4, 6 }; int rc, exception = 0; @@ -290,9 +290,9 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) { - struct local_interrupt *li = &vcpu->arch.local_int; - struct float_interrupt *fi = vcpu->arch.local_int.float_int; - struct interrupt_info *inti; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *inti; int rc = 0; if (atomic_read(&li->active)) { @@ -408,9 +408,9 @@ void kvm_s390_idle_wakeup(unsigned long data) void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { - struct local_interrupt *li = &vcpu->arch.local_int; - struct float_interrupt *fi = vcpu->arch.local_int.float_int; - struct interrupt_info *n, *inti = NULL; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; + struct kvm_s390_interrupt_info *n, *inti = NULL; int deliver; __reset_intercept_indicators(vcpu); @@ -465,8 +465,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) { - struct local_interrupt *li = &vcpu->arch.local_int; - struct interrupt_info *inti; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_interrupt_info *inti; inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) @@ -487,9 +487,9 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int) { - struct local_interrupt *li; - struct float_interrupt *fi; - struct interrupt_info *inti; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti; int sigcpu; inti = kzalloc(sizeof(*inti), GFP_KERNEL); @@ -544,8 +544,8 @@ int kvm_s390_inject_vm(struct kvm *kvm, int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt *s390int) { - struct local_interrupt *li; - struct interrupt_info *inti; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index cdab57c5bc7..399acf3f64d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -275,7 +275,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, if (!vcpu) goto out_nomem; - vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL); + vcpu->arch.sie_block = (struct kvm_s390_sie_block *) + get_zeroed_page(GFP_KERNEL); if (!vcpu->arch.sie_block) goto out_free_cpu; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c02286c6a93..2e2d2ffb6a0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -199,7 +199,7 @@ out: static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int cpus = 0; int n; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 0a236acfb5f..5a556114eaa 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -45,7 +45,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int rc; if (cpu_addr >= KVM_MAX_VCPUS) @@ -71,9 +71,9 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct local_interrupt *li; - struct interrupt_info *inti; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; int rc; if (cpu_addr >= KVM_MAX_VCPUS) @@ -108,9 +108,9 @@ unlock: static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct local_interrupt *li; - struct interrupt_info *inti; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; int rc; if (cpu_addr >= KVM_MAX_VCPUS) @@ -169,9 +169,9 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, u64 *reg) { - struct float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct local_interrupt *li; - struct interrupt_info *inti; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; int rc; u8 tmp; -- cgit v1.2.3 From 0da1db75a2feca54564add30828bab658982481c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 2 Jul 2008 16:02:11 +0200 Subject: KVM: SVM: fix suspend/resume support On suspend the svm_hardware_disable function is called which frees all svm_data variables. On resume they are not re-allocated. This patch removes the deallocation of svm_data from the hardware_disable function to the hardware_unsetup function which is not called on suspend. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 238e8f3afaf..858e2970223 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -272,19 +272,11 @@ static int has_svm(void) static void svm_hardware_disable(void *garbage) { - struct svm_cpu_data *svm_data - = per_cpu(svm_data, raw_smp_processor_id()); - - if (svm_data) { - uint64_t efer; + uint64_t efer; - wrmsrl(MSR_VM_HSAVE_PA, 0); - rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); - per_cpu(svm_data, raw_smp_processor_id()) = NULL; - __free_page(svm_data->save_area); - kfree(svm_data); - } + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); } static void svm_hardware_enable(void *garbage) @@ -323,6 +315,19 @@ static void svm_hardware_enable(void *garbage) page_to_pfn(svm_data->save_area) << PAGE_SHIFT); } +static void svm_cpu_uninit(int cpu) +{ + struct svm_cpu_data *svm_data + = per_cpu(svm_data, raw_smp_processor_id()); + + if (!svm_data) + return; + + per_cpu(svm_data, raw_smp_processor_id()) = NULL; + __free_page(svm_data->save_area); + kfree(svm_data); +} + static int svm_cpu_init(int cpu) { struct svm_cpu_data *svm_data; @@ -460,6 +465,11 @@ err: static __exit void svm_hardware_unsetup(void) { + int cpu; + + for_each_online_cpu(cpu) + svm_cpu_uninit(cpu); + __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); iopm_base = 0; } -- cgit v1.2.3 From 7e37c2998a5a0b00134f6227167694b710f57ac0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 1 Jul 2008 01:19:19 +0300 Subject: x86: KVM guest: make kvm_smp_prepare_boot_cpu() static This patch makes the needlessly global kvm_smp_prepare_boot_cpu() static. Signed-off-by: Adrian Bunk Signed-off-by: Avi Kivity --- arch/x86/kernel/kvmclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 87edf1ceb1d..d02def06ca9 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -113,7 +113,7 @@ static void kvm_setup_secondary_clock(void) #endif #ifdef CONFIG_SMP -void __init kvm_smp_prepare_boot_cpu(void) +static void __init kvm_smp_prepare_boot_cpu(void) { WARN_ON(kvm_register_clock("primary cpu clock")); native_smp_prepare_boot_cpu(); -- cgit v1.2.3 From 5a4c92880493945678315a6df810f7a21f55b985 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 3 Jul 2008 18:33:02 -0300 Subject: KVM: mmu_shrink: kvm_mmu_zap_page requires slots_lock to be held kvm_mmu_zap_page() needs slots lock held (rmap_remove->gfn_to_memslot, for example). Since kvm_lock spinlock is held in mmu_shrink(), do a non-blocking down_read_trylock(). Untested. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1fd8e3b58cc..ff7cf632175 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1987,6 +1987,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) list_for_each_entry(kvm, &vm_list, vm_list) { int npages; + if (!down_read_trylock(&kvm->slots_lock)) + continue; spin_lock(&kvm->mmu_lock); npages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages; @@ -1999,6 +2001,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) nr_to_scan--; spin_unlock(&kvm->mmu_lock); + up_read(&kvm->slots_lock); } if (kvm_freed) list_move_tail(&kvm_freed->vm_list, &vm_list); -- cgit v1.2.3 From 4e1096d27f3d095735c1c69c7b0a26a06a0d454e Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Sun, 6 Jul 2008 19:16:51 +0800 Subject: KVM: VMX: Add ept_sync_context in flush_tlb Fix a potention issue caused by kvm_mmu_slot_remove_write_access(). The old behavior don't sync EPT TLB with modified EPT entry, which result in inconsistent content of EPT TLB and EPT table. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d493a97e788..fff3b490976 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -91,6 +91,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) } static int init_rmode(struct kvm *kvm); +static u64 construct_eptp(unsigned long root_hpa); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -1422,6 +1423,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu) static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { vpid_sync_vcpu_all(to_vmx(vcpu)); + if (vm_need_ept()) + ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); } static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From ac9f6dc0db0b5582ebf8bb720d7c41c3d2159013 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 6 Jul 2008 15:48:31 +0300 Subject: KVM: Apply the kernel sigmask to vcpus blocked due to being uninitialized Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 55906e4c467..89fc8565ede 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2958,15 +2958,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); - vcpu_put(vcpu); - return -EAGAIN; + r = -EAGAIN; + goto out; } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - /* re-sync apic's tpr */ if (!irqchip_in_kernel(vcpu->kvm)) kvm_set_cr8(vcpu, kvm_run->cr8); -- cgit v1.2.3 From 19fdfa0d133ae216e9d1c69a8333fe63fcf8e584 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Sun, 6 Jul 2008 16:51:26 +0300 Subject: KVM: x86 emulator: Fix HLT instruction This patch fixes issue encountered with HLT instruction under FreeDOS's HIMEM XMS Driver. The HLT instruction jumped directly to the done label and skips updating the EIP value, therefore causing the guest to spin endlessly on the same instruction. The patch changes the instruction so that it writes back the updated EIP value. Signed-off-by: Mohammed Gamal Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 18ca25c2d4a..8bc63f62fbb 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -1731,7 +1731,7 @@ special_insn: break; case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; - goto done; + break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ ctxt->eflags ^= EFLG_CF; -- cgit v1.2.3 From c65bbfa1d693d375da51f9c8aa9fb26f09fa19ed Mon Sep 17 00:00:00 2001 From: Ben-Ami Yassour Date: Sun, 6 Jul 2008 17:15:07 +0300 Subject: KVM: check injected pic irq within valid pic irqs Check that an injected pic irq is between 0 and 15. Signed-off-by: Ben-Ami Yassour Signed-off-by: Avi Kivity --- arch/x86/kvm/i8259.c | 6 ++++-- arch/x86/kvm/irq.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 5857f59ad4a..c31164e8aa4 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -130,8 +130,10 @@ void kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; - pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); - pic_update_irq(s); + if (irq >= 0 && irq < PIC_NUM_PINS) { + pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); + pic_update_irq(s); + } } /* diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2a15be2275c..7ca47cbb48b 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -30,6 +30,8 @@ #include "ioapic.h" #include "lapic.h" +#define PIC_NUM_PINS 16 + struct kvm; struct kvm_vcpu; -- cgit v1.2.3 From d6e88aec07aa8f6c7e4024f5734ec659fd7c5a40 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 10 Jul 2008 16:53:33 +0300 Subject: KVM: Prefix some x86 low level function with kvm_, to avoid namespace issues Fixes compilation with CONFIG_VMI enabled. Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 12 ++++++------ arch/x86/kvm/vmx.c | 24 ++++++++++++------------ arch/x86/kvm/x86.c | 18 +++++++++--------- 3 files changed, 27 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 858e2970223..b756e876dce 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1710,9 +1710,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) sync_lapic_to_cr8(vcpu); save_host_msrs(vcpu); - fs_selector = read_fs(); - gs_selector = read_gs(); - ldt_selector = read_ldt(); + fs_selector = kvm_read_fs(); + gs_selector = kvm_read_gs(); + ldt_selector = kvm_read_ldt(); svm->host_cr2 = kvm_read_cr2(); svm->host_dr6 = read_dr6(); svm->host_dr7 = read_dr7(); @@ -1845,9 +1845,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) write_dr7(svm->host_dr7); kvm_write_cr2(svm->host_cr2); - load_fs(fs_selector); - load_gs(gs_selector); - load_ldt(ldt_selector); + kvm_load_fs(fs_selector); + kvm_load_gs(gs_selector); + kvm_load_ldt(ldt_selector); load_host_msrs(vcpu); reload_tss(vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fff3b490976..0cac6370171 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -484,7 +484,7 @@ static void reload_tss(void) struct descriptor_table gdt; struct desc_struct *descs; - get_gdt(&gdt); + kvm_get_gdt(&gdt); descs = (void *)gdt.base; descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ load_TR_desc(); @@ -540,9 +540,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. */ - vmx->host_state.ldt_sel = read_ldt(); + vmx->host_state.ldt_sel = kvm_read_ldt(); vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; - vmx->host_state.fs_sel = read_fs(); + vmx->host_state.fs_sel = kvm_read_fs(); if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); vmx->host_state.fs_reload_needed = 0; @@ -550,7 +550,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmcs_write16(HOST_FS_SELECTOR, 0); vmx->host_state.fs_reload_needed = 1; } - vmx->host_state.gs_sel = read_gs(); + vmx->host_state.gs_sel = kvm_read_gs(); if (!(vmx->host_state.gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { @@ -586,15 +586,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) ++vmx->vcpu.stat.host_state_reload; vmx->host_state.loaded = 0; if (vmx->host_state.fs_reload_needed) - load_fs(vmx->host_state.fs_sel); + kvm_load_fs(vmx->host_state.fs_sel); if (vmx->host_state.gs_ldt_reload_needed) { - load_ldt(vmx->host_state.ldt_sel); + kvm_load_ldt(vmx->host_state.ldt_sel); /* * If we have to reload gs, we must take care to * preserve our gs base. */ local_irq_save(flags); - load_gs(vmx->host_state.gs_sel); + kvm_load_gs(vmx->host_state.gs_sel); #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); #endif @@ -654,8 +654,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * Linux uses per-cpu TSS and GDT, so set these when switching * processors. */ - vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */ - get_gdt(&dt); + vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ + kvm_get_gdt(&dt); vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); @@ -1943,8 +1943,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */ - vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */ + vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */ + vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */ vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ #ifdef CONFIG_X86_64 rdmsrl(MSR_FS_BASE, a); @@ -1958,7 +1958,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - get_idt(&dt); + kvm_get_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 89fc8565ede..b131f3c0cf6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3767,14 +3767,14 @@ void fx_init(struct kvm_vcpu *vcpu) * allocate ram with GFP_KERNEL. */ if (!used_math()) - fx_save(&vcpu->arch.host_fx_image); + kvm_fx_save(&vcpu->arch.host_fx_image); /* Initialize guest FPU by resetting ours and saving into guest's */ preempt_disable(); - fx_save(&vcpu->arch.host_fx_image); - fx_finit(); - fx_save(&vcpu->arch.guest_fx_image); - fx_restore(&vcpu->arch.host_fx_image); + kvm_fx_save(&vcpu->arch.host_fx_image); + kvm_fx_finit(); + kvm_fx_save(&vcpu->arch.guest_fx_image); + kvm_fx_restore(&vcpu->arch.host_fx_image); preempt_enable(); vcpu->arch.cr0 |= X86_CR0_ET; @@ -3791,8 +3791,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 1; - fx_save(&vcpu->arch.host_fx_image); - fx_restore(&vcpu->arch.guest_fx_image); + kvm_fx_save(&vcpu->arch.host_fx_image); + kvm_fx_restore(&vcpu->arch.guest_fx_image); } EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); @@ -3802,8 +3802,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 0; - fx_save(&vcpu->arch.guest_fx_image); - fx_restore(&vcpu->arch.host_fx_image); + kvm_fx_save(&vcpu->arch.guest_fx_image); + kvm_fx_restore(&vcpu->arch.host_fx_image); ++vcpu->stat.fpu_reload; } EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); -- cgit v1.2.3 From 34d4cb8fca1f2a31be152b74797e6cd160ec9de6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 10 Jul 2008 20:49:31 -0300 Subject: KVM: MMU: nuke shadowed pgtable pages and ptes on memslot destruction Flush the shadow mmu before removing regions to avoid stale entries. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 3 +++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 5 +++++ 4 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 9408b30576d..2672f4d278a 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1455,6 +1455,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b850d249702..53826a5f6c0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -170,6 +170,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} + struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 399acf3f64d..1782cbcd282 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -675,6 +675,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ +} + gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b131f3c0cf6..9f1cdb011cf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4032,6 +4032,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, return 0; } +void kvm_arch_flush_shadow(struct kvm *kvm) +{ + kvm_mmu_zap_all(kvm); +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE -- cgit v1.2.3 From 376c53c2b30d4a1955240f59f4ecd959aa118f92 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 10 Jul 2008 20:54:29 -0300 Subject: KVM: MMU: improve invalid shadow root page handling Harden kvm_mmu_zap_page() against invalid root pages that had been shadowed from memslots that are gone. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ff7cf632175..7f57da66382 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -930,14 +930,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) } kvm_mmu_page_unlink_children(kvm, sp); if (!sp->root_count) { - if (!sp->role.metaphysical) + if (!sp->role.metaphysical && !sp->role.invalid) unaccount_shadowed(kvm, sp->gfn); hlist_del(&sp->hash_link); kvm_mmu_free_page(kvm, sp); } else { + int invalid = sp->role.invalid; list_move(&sp->link, &kvm->arch.active_mmu_pages); sp->role.invalid = 1; kvm_reload_remote_mmus(kvm); + if (!sp->role.metaphysical && !invalid) + unaccount_shadowed(kvm, sp->gfn); } kvm_mmu_reset_last_pte_updated(kvm); } -- cgit v1.2.3 From 2a7c5b8b550b1fb1db9eb490420132e637f5dcb4 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 10 Jul 2008 17:08:15 -0300 Subject: KVM: x86 emulator: emulate clflush If the guest issues a clflush in a mmio address, the instruction can trap into the hypervisor. Currently, we do not decode clflush properly, causing the guest to hang. This patch fixes this emulating clflush (opcode 0f ae). Signed-off-by: Glauber Costa Signed-off-by: Avi Kivity --- arch/x86/kvm/x86_emulate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 8bc63f62fbb..f2f90468f8b 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -219,7 +219,7 @@ static u16 twobyte_table[256] = { /* 0xA0 - 0xA7 */ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, /* 0xB0 - 0xB7 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, DstMem | SrcReg | ModRM | BitOp, @@ -1947,6 +1947,8 @@ twobyte_insn: c->src.val &= (c->dst.bytes << 3) - 1; emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); break; + case 0xae: /* clflush */ + break; case 0xb0 ... 0xb1: /* cmpxchg */ /* * Save real source value, then compare EAX against -- cgit v1.2.3 From 722c05f2192070bac0208b2c16ce13929b32d92f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 13 Jul 2008 11:33:54 +0300 Subject: KVM: MMU: Fix potential race setting upper shadow ptes on nonpae hosts The direct mapped shadow code (used for real mode and two dimensional paging) sets upper-level ptes using direct assignment rather than calling set_shadow_pte(). A nonpae host will split this into two writes, which opens up a race if another vcpu accesses the same memory area. Fix by calling set_shadow_pte() instead of assigning directly. Noticed by Izik Eidus. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7f57da66382..b0e4ddca6c1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1189,9 +1189,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, return -ENOMEM; } - table[index] = __pa(new_table->spt) - | PT_PRESENT_MASK | PT_WRITABLE_MASK - | shadow_user_mask | shadow_x_mask; + set_shadow_pte(&table[index], + __pa(new_table->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask); } table_addr = table[index] & PT64_BASE_ADDR_MASK; } -- cgit v1.2.3 From 3450004a8cec8bab246372a1cabb9c2483b1e6c3 Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Tue, 15 Jul 2008 19:57:30 +0300 Subject: [MIPS] PCI: Make the pcibios_max_latency variable static The pcibios_max_latency variable is needlessly defined global, and this patch makes it static. Build-tested using malta_defconfig. Signed-off-by: Dmitri Vorobiev Signed-off-by: Ralf Baechle --- arch/mips/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index d7d6cb063d2..77bd5b68dc4 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -204,7 +204,7 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask) * If we set up a device for bus mastering, we need to check the latency * timer as certain crappy BIOSes forget to set it properly. */ -unsigned int pcibios_max_latency = 255; +static unsigned int pcibios_max_latency = 255; void pcibios_set_master(struct pci_dev *dev) { -- cgit v1.2.3 From f028b8605613ade67fda554e30d367911d6c7222 Mon Sep 17 00:00:00 2001 From: Dmitri Vorobiev Date: Tue, 15 Jul 2008 19:57:31 +0300 Subject: [MIPS] Fix missing prototypes in asm/fpu.h While building the Malta defconfig, sparse spat the following warnings: >>>>>>>>>>>>>>>>>> arch/mips/math-emu/kernel_linkage.c:31:6: warning: symbol 'fpu_emulator_init_fpu' was not declared. Should it be static? arch/mips/math-emu/kernel_linkage.c:54:5: warning: symbol 'fpu_emulator_save_context' was not declared. Should it be static? arch/mips/math-emu/kernel_linkage.c:68:5: warning: symbol 'fpu_emulator_restore_context' was not declared. Should it be static? >>>>>>>>>>>>>>>>>> This patch fixes these errors by adding the proper prototypes to the include/asm-mips/fpu.h header, and actually using this header in the sparse-spotted source file. Build-tested with Malta defconfig. Signed-off-by: Dmitri Vorobiev Signed-off-by: Ralf Baechle --- arch/mips/math-emu/kernel_linkage.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c index ed49ef01ac5..52e6c58c8de 100644 --- a/arch/mips/math-emu/kernel_linkage.c +++ b/arch/mips/math-emu/kernel_linkage.c @@ -24,6 +24,7 @@ #include #include +#include #include #define SIGNALLING_NAN 0x7ff800007ff80000LL -- cgit v1.2.3 From 36e5c21de51e83bfa17c1e7334050edd2eda3d47 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 16 Jul 2008 14:06:15 +0200 Subject: [MIPS] IP22, IP28: Fix merge bug Instead of one SGI_HAS_HAL2 for IP22 and one for IP28, IP28 got two of them... Let's give IP22 some ALSA sound, too. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Ralf Baechle [MIPS] IP22, IP28: Fix merge bug Instead of one SGI_HAS_HAL2 for IP22 and one for IP28, IP28 got two of them... Let's give IP22 some ALSA sound, too. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d21df5f1b1f..30edc395dce 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -330,6 +330,7 @@ config SGI_IP22 select SGI_HAS_DS1286 select SGI_HAS_I8042 select SGI_HAS_INDYDOG + select SGI_HAS_HAL2 select SGI_HAS_SEEQ select SGI_HAS_WD93 select SGI_HAS_ZILOG @@ -386,7 +387,6 @@ config SGI_IP28 select SGI_HAS_I8042 select SGI_HAS_INDYDOG select SGI_HAS_HAL2 - select SGI_HAS_HAL2 select SGI_HAS_SEEQ select SGI_HAS_WD93 select SGI_HAS_ZILOG -- cgit v1.2.3 From 5a334fa9240411121f5dda9605fc7fd98429e8c5 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 16 Jul 2008 15:18:54 +0200 Subject: [MIPS] IP22: Use common SGI button driver Use the Indy/O2 button driver. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Ralf Baechle --- arch/mips/sgi-ip22/ip22-platform.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/sgi-ip22/ip22-platform.c b/arch/mips/sgi-ip22/ip22-platform.c index fc6df96305e..60141235ec4 100644 --- a/arch/mips/sgi-ip22/ip22-platform.c +++ b/arch/mips/sgi-ip22/ip22-platform.c @@ -188,8 +188,7 @@ static int __init sgi_button_devinit(void) if (ip22_is_fullhouse()) return 0; /* full house has no volume buttons */ - return IS_ERR(platform_device_register_simple("sgiindybtns", - -1, NULL, 0)); + return IS_ERR(platform_device_register_simple("sgibtns", -1, NULL, 0)); } device_initcall(sgi_button_devinit); -- cgit v1.2.3 From 36a0a3cd45b49ceff78ac28efef1cbeec413d8c2 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 16 Jul 2008 15:18:58 +0200 Subject: [MIPS] IP32: Use common SGI button driver Use the Indy/O2 button driver. Signed-off-by: Thomas Bogendoerfer Signed-off-by: Ralf Baechle --- arch/mips/sgi-ip32/ip32-platform.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c index 2ee401ba0b2..3d63721e0e8 100644 --- a/arch/mips/sgi-ip32/ip32-platform.c +++ b/arch/mips/sgi-ip32/ip32-platform.c @@ -85,18 +85,7 @@ device_initcall(sgio2audio_devinit); static __init int sgio2btns_devinit(void) { - struct platform_device *pd; - int ret; - - pd = platform_device_alloc("sgio2btns", -1); - if (!pd) - return -ENOMEM; - - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; + return IS_ERR(platform_device_register_simple("sgibtns", -1, NULL, 0)); } device_initcall(sgio2btns_devinit); -- cgit v1.2.3 From 73b4390fb23456964201abda79f1210fe337d01a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 16 Jul 2008 16:12:25 +0100 Subject: [MIPS] Routerboard 532: Support for base system Signed-off-by: Phil Sutter Signed-off-by: Florian Fainelli Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 20 +- arch/mips/Makefile | 7 + arch/mips/configs/rb532_defconfig | 1314 +++++++++++++++++++++++++++++++++++++ arch/mips/pci/Makefile | 1 + arch/mips/pci/fixup-rc32434.c | 69 ++ arch/mips/pci/ops-rc32434.c | 207 ++++++ arch/mips/pci/pci-rc32434.c | 221 +++++++ arch/mips/rb532/Makefile | 7 + arch/mips/rb532/devices.c | 331 ++++++++++ arch/mips/rb532/gpio.c | 220 +++++++ arch/mips/rb532/irq.c | 209 ++++++ arch/mips/rb532/prom.c | 158 +++++ arch/mips/rb532/serial.c | 53 ++ arch/mips/rb532/setup.c | 79 +++ arch/mips/rb532/time.c | 67 ++ 15 files changed, 2962 insertions(+), 1 deletion(-) create mode 100644 arch/mips/configs/rb532_defconfig create mode 100644 arch/mips/pci/fixup-rc32434.c create mode 100644 arch/mips/pci/ops-rc32434.c create mode 100644 arch/mips/pci/pci-rc32434.c create mode 100644 arch/mips/rb532/Makefile create mode 100644 arch/mips/rb532/devices.c create mode 100644 arch/mips/rb532/gpio.c create mode 100644 arch/mips/rb532/irq.c create mode 100644 arch/mips/rb532/prom.c create mode 100644 arch/mips/rb532/serial.c create mode 100644 arch/mips/rb532/setup.c create mode 100644 arch/mips/rb532/time.c (limited to 'arch') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 30edc395dce..b9c754f4070 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -558,6 +558,24 @@ config MACH_TX39XX config MACH_TX49XX bool "Toshiba TX49 series based machines" +config MIKROTIK_RB532 + bool "Mikrotik RB532 boards" + select CEVT_R4K + select CSRC_R4K + select DMA_NONCOHERENT + select GENERIC_HARDIRQS_NO__DO_IRQ + select HW_HAS_PCI + select IRQ_CPU + select SYS_HAS_CPU_MIPS32_R1 + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_LITTLE_ENDIAN + select SWAP_IO_SPACE + select BOOT_RAW + select GENERIC_GPIO + help + Support the Mikrotik(tm) RouterBoard 532 series, + based on the IDT RC32434 SoC. + config WR_PPMC bool "Wind River PPMC board" select CEVT_R4K @@ -899,7 +917,7 @@ config BOOT_ELF32 config MIPS_L1_CACHE_SHIFT int - default "4" if MACH_DECSTATION + default "4" if MACH_DECSTATION || MIKROTIK_RB532 default "7" if SGI_IP22 || SGI_IP27 || SGI_IP28 || SNI_RM default "4" if PMC_MSP4200_EVAL default "5" diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 356453322b4..9aab51caf16 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -559,6 +559,13 @@ load-$(CONFIG_MACH_TX49XX) += 0xffffffff80100000 # core-$(CONFIG_TOSHIBA_JMR3927) += arch/mips/txx9/jmr3927/ +# +# Routerboard 532 board +# +core-$(CONFIG_MIKROTIK_RB532) += arch/mips/rb532/ +cflags-$(CONFIG_MIKROTIK_RB532) += -Iinclude/asm-mips/mach-rc32434 +load-$(CONFIG_MIKROTIK_RB532) += 0xffffffff80101000 + # # Toshiba RBTX4927 board or # Toshiba RBTX4937 board diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig new file mode 100644 index 00000000000..f28dc32974e --- /dev/null +++ b/arch/mips/configs/rb532_defconfig @@ -0,0 +1,1314 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.25 +# Mon Apr 28 12:24:17 2008 +# +CONFIG_MIPS=y + +# +# Machine selection +# +# CONFIG_MACH_ALCHEMY is not set +# CONFIG_BASLER_EXCITE is not set +# CONFIG_BCM47XX is not set +# CONFIG_MIPS_COBALT is not set +# CONFIG_MACH_DECSTATION is not set +# CONFIG_MACH_JAZZ is not set +# CONFIG_LASAT is not set +# CONFIG_LEMOTE_FULONG is not set +# CONFIG_MIPS_ATLAS is not set +# CONFIG_MIPS_MALTA is not set +# CONFIG_MIPS_SEAD is not set +# CONFIG_MIPS_SIM is not set +# CONFIG_MARKEINS is not set +# CONFIG_MACH_VR41XX is not set +# CONFIG_PNX8550_JBS is not set +# CONFIG_PNX8550_STB810 is not set +# CONFIG_PMC_MSP is not set +# CONFIG_PMC_YOSEMITE is not set +# CONFIG_SGI_IP22 is not set +# CONFIG_SGI_IP27 is not set +# CONFIG_SGI_IP28 is not set +# CONFIG_SGI_IP32 is not set +# CONFIG_SIBYTE_CRHINE is not set +# CONFIG_SIBYTE_CARMEL is not set +# CONFIG_SIBYTE_CRHONE is not set +# CONFIG_SIBYTE_RHONE is not set +# CONFIG_SIBYTE_SWARM is not set +# CONFIG_SIBYTE_LITTLESUR is not set +# CONFIG_SIBYTE_SENTOSA is not set +# CONFIG_SIBYTE_BIGSUR is not set +# CONFIG_SNI_RM is not set +# CONFIG_TOSHIBA_JMR3927 is not set +CONFIG_MIKROTIK_RB532=y +# CONFIG_TOSHIBA_RBTX4927 is not set +# CONFIG_TOSHIBA_RBTX4938 is not set +# CONFIG_WR_PPMC is not set +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_SUPPORTS_OPROFILE=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_BOOT_RAW=y +CONFIG_CEVT_R4K=y +CONFIG_CSRC_R4K=y +CONFIG_DMA_NONCOHERENT=y +CONFIG_DMA_NEED_PCI_MAP_STATE=y +# CONFIG_HOTPLUG_CPU is not set +# CONFIG_NO_IOPORT is not set +CONFIG_GENERIC_GPIO=y +# CONFIG_CPU_BIG_ENDIAN is not set +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_SYS_SUPPORTS_LITTLE_ENDIAN=y +CONFIG_IRQ_CPU=y +CONFIG_SWAP_IO_SPACE=y +CONFIG_MIPS_L1_CACHE_SHIFT=4 + +# +# CPU selection +# +# CONFIG_CPU_LOONGSON2 is not set +CONFIG_CPU_MIPS32_R1=y +# CONFIG_CPU_MIPS32_R2 is not set +# CONFIG_CPU_MIPS64_R1 is not set +# CONFIG_CPU_MIPS64_R2 is not set +# CONFIG_CPU_R3000 is not set +# CONFIG_CPU_TX39XX is not set +# CONFIG_CPU_VR41XX is not set +# CONFIG_CPU_R4300 is not set +# CONFIG_CPU_R4X00 is not set +# CONFIG_CPU_TX49XX is not set +# CONFIG_CPU_R5000 is not set +# CONFIG_CPU_R5432 is not set +# CONFIG_CPU_R6000 is not set +# CONFIG_CPU_NEVADA is not set +# CONFIG_CPU_R8000 is not set +# CONFIG_CPU_R10000 is not set +# CONFIG_CPU_RM7000 is not set +# CONFIG_CPU_RM9000 is not set +# CONFIG_CPU_SB1 is not set +CONFIG_SYS_HAS_CPU_MIPS32_R1=y +CONFIG_CPU_MIPS32=y +CONFIG_CPU_MIPSR1=y +CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y +CONFIG_CPU_SUPPORTS_32BIT_KERNEL=y + +# +# Kernel type +# +CONFIG_32BIT=y +# CONFIG_64BIT is not set +CONFIG_PAGE_SIZE_4KB=y +# CONFIG_PAGE_SIZE_8KB is not set +# CONFIG_PAGE_SIZE_16KB is not set +# CONFIG_PAGE_SIZE_64KB is not set +CONFIG_CPU_HAS_PREFETCH=y +CONFIG_MIPS_MT_DISABLED=y +# CONFIG_MIPS_MT_SMP is not set +# CONFIG_MIPS_MT_SMTC is not set +CONFIG_CPU_HAS_LLSC=y +CONFIG_CPU_HAS_SYNC=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_CPU_SUPPORTS_HIGHMEM=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +# CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +# CONFIG_HZ_48 is not set +CONFIG_HZ_100=y +# CONFIG_HZ_128 is not set +# CONFIG_HZ_250 is not set +# CONFIG_HZ_256 is not set +# CONFIG_HZ_1000 is not set +# CONFIG_HZ_1024 is not set +CONFIG_SYS_SUPPORTS_ARBIT_HZ=y +CONFIG_HZ=100 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_KEXEC is not set +# CONFIG_SECCOMP is not set +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +# CONFIG_POSIX_MQUEUE is not set +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_CGROUPS is not set +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +CONFIG_USER_SCHED=y +# CONFIG_CGROUP_SCHED is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_EMBEDDED=y +CONFIG_SYSCTL_SYSCALL=y +# CONFIG_KALLSYMS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +# CONFIG_ELF_CORE is not set +CONFIG_COMPAT_BRK=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +# CONFIG_VM_EVENT_COUNTERS is not set +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_HAVE_KPROBES is not set +# CONFIG_HAVE_KRETPROBES is not set +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +CONFIG_IOSCHED_DEADLINE=y +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +CONFIG_DEFAULT_DEADLINE=y +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="deadline" +CONFIG_CLASSIC_RCU=y + +# +# Bus options (PCI, PCMCIA, EISA, ISA, TC) +# +CONFIG_HW_HAS_PCI=y +CONFIG_PCI=y +CONFIG_PCI_DOMAINS=y +# CONFIG_ARCH_SUPPORTS_MSI is not set +CONFIG_PCI_LEGACY=y +CONFIG_MMU=y +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_TRAD_SIGNALS=y + +# +# Power management options +# +CONFIG_ARCH_SUSPEND_POSSIBLE=y +# CONFIG_PM is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +CONFIG_ARPD=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=m +CONFIG_INET_TCP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BIC=m +CONFIG_TCP_CONG_CUBIC=m +CONFIG_TCP_CONG_WESTWOOD=m +CONFIG_TCP_CONG_HTCP=m +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_VEGAS=y +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +# CONFIG_DEFAULT_BIC is not set +# CONFIG_DEFAULT_CUBIC is not set +# CONFIG_DEFAULT_HTCP is not set +CONFIG_DEFAULT_VEGAS=y +# CONFIG_DEFAULT_WESTWOOD is not set +# CONFIG_DEFAULT_RENO is not set +CONFIG_DEFAULT_TCP_CONG="vegas" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IP_VS is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_NETFILTER_ADVANCED=y +# CONFIG_BRIDGE_NETFILTER is not set + +# +# Core Netfilter Configuration +# +# CONFIG_NETFILTER_NETLINK_QUEUE is not set +# CONFIG_NETFILTER_NETLINK_LOG is not set +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CT_ACCT=y +CONFIG_NF_CONNTRACK_MARK=y +# CONFIG_NF_CONNTRACK_EVENTS is not set +# CONFIG_NF_CT_PROTO_DCCP is not set +# CONFIG_NF_CT_PROTO_SCTP is not set +# CONFIG_NF_CT_PROTO_UDPLITE is not set +# CONFIG_NF_CONNTRACK_AMANDA is not set +CONFIG_NF_CONNTRACK_FTP=m +# CONFIG_NF_CONNTRACK_H323 is not set +CONFIG_NF_CONNTRACK_IRC=m +# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set +# CONFIG_NF_CONNTRACK_PPTP is not set +# CONFIG_NF_CONNTRACK_SANE is not set +# CONFIG_NF_CONNTRACK_SIP is not set +CONFIG_NF_CONNTRACK_TFTP=m +# CONFIG_NF_CT_NETLINK is not set +CONFIG_NETFILTER_XTABLES=y +# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set +# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set +# CONFIG_NETFILTER_XT_TARGET_DSCP is not set +# CONFIG_NETFILTER_XT_TARGET_MARK is not set +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +# CONFIG_NETFILTER_XT_TARGET_NOTRACK is not set +# CONFIG_NETFILTER_XT_TARGET_RATEEST is not set +CONFIG_NETFILTER_XT_TARGET_TRACE=m +# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set +# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +# CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set +# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set +CONFIG_NETFILTER_XT_MATCH_DCCP=m +# CONFIG_NETFILTER_XT_MATCH_DSCP is not set +# CONFIG_NETFILTER_XT_MATCH_ESP is not set +# CONFIG_NETFILTER_XT_MATCH_HELPER is not set +# CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set +# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +# CONFIG_NETFILTER_XT_MATCH_MAC is not set +# CONFIG_NETFILTER_XT_MATCH_MARK is not set +# CONFIG_NETFILTER_XT_MATCH_OWNER is not set +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y +# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set +# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set +# CONFIG_NETFILTER_XT_MATCH_RATEEST is not set +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SCTP=m +CONFIG_NETFILTER_XT_MATCH_STATE=y +# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set +# CONFIG_NETFILTER_XT_MATCH_STRING is not set +# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set +# CONFIG_NETFILTER_XT_MATCH_TIME is not set +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m + +# +# IP: Netfilter Configuration +# +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_CONNTRACK_PROC_COMPAT=y +# CONFIG_IP_NF_QUEUE is not set +CONFIG_IP_NF_IPTABLES=y +# CONFIG_IP_NF_MATCH_RECENT is not set +# CONFIG_IP_NF_MATCH_ECN is not set +# CONFIG_IP_NF_MATCH_AH is not set +# CONFIG_IP_NF_MATCH_TTL is not set +CONFIG_IP_NF_MATCH_ADDRTYPE=m +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +# CONFIG_IP_NF_TARGET_LOG is not set +# CONFIG_IP_NF_TARGET_ULOG is not set +CONFIG_NF_NAT=y +CONFIG_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +# CONFIG_IP_NF_TARGET_REDIRECT is not set +# CONFIG_IP_NF_TARGET_NETMAP is not set +# CONFIG_NF_NAT_SNMP_BASIC is not set +CONFIG_NF_NAT_FTP=m +CONFIG_NF_NAT_IRC=m +CONFIG_NF_NAT_TFTP=m +# CONFIG_NF_NAT_AMANDA is not set +# CONFIG_NF_NAT_PPTP is not set +# CONFIG_NF_NAT_H323 is not set +# CONFIG_NF_NAT_SIP is not set +CONFIG_IP_NF_MANGLE=y +# CONFIG_IP_NF_TARGET_ECN is not set +# CONFIG_IP_NF_TARGET_TTL is not set +# CONFIG_IP_NF_TARGET_CLUSTERIP is not set +CONFIG_IP_NF_RAW=m +# CONFIG_IP_NF_ARPTABLES is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=y +# CONFIG_DECNET is not set +CONFIG_LLC=y +CONFIG_LLC2=m +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +CONFIG_NET_SCH_CBQ=m +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RR=m +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +CONFIG_NET_SCH_NETEM=m +# CONFIG_NET_SCH_INGRESS is not set + +# +# Classification +# +CONFIG_NET_CLS=y +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y +CONFIG_CLS_U32_MARK=y +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +# CONFIG_NET_CLS_FLOW is not set +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_STACK=32 +CONFIG_NET_EMATCH_CMP=m +CONFIG_NET_EMATCH_NBYTE=m +CONFIG_NET_EMATCH_U32=m +CONFIG_NET_EMATCH_META=m +CONFIG_NET_EMATCH_TEXT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +# CONFIG_NET_ACT_NAT is not set +CONFIG_NET_ACT_PEDIT=m +# CONFIG_NET_ACT_SIMP is not set +CONFIG_NET_CLS_IND=y +CONFIG_NET_SCH_FIFO=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +# CONFIG_AX25 is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y + +# +# Wireless +# +# CONFIG_CFG80211 is not set +CONFIG_WIRELESS_EXT=y +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_CONCAT is not set +CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_REDBOOT_PARTS is not set +# CONFIG_MTD_CMDLINE_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLKDEVS=y +CONFIG_MTD_BLOCK=y +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +# CONFIG_MTD_INTEL_VR_NOR is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +CONFIG_MTD_BLOCK2MTD=y + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_VERIFY_WRITE=y +# CONFIG_MTD_NAND_ECC_SMC is not set +# CONFIG_MTD_NAND_MUSEUM_IDS is not set +CONFIG_MTD_NAND_IDS=y +# CONFIG_MTD_NAND_DISKONCHIP is not set +# CONFIG_MTD_NAND_CAFE is not set +# CONFIG_MTD_NAND_NANDSIM is not set +CONFIG_MTD_NAND_PLATFORM=y +# CONFIG_MTD_ONENAND is not set + +# +# UBI - Unsorted block images +# +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_MISC_DEVICES=y +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_ENCLOSURE_SERVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +# CONFIG_BLK_DEV_SD is not set +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +# CONFIG_BLK_DEV_SR is not set +# CONFIG_CHR_DEV_SG is not set +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y +# CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +# CONFIG_SCSI_AIC79XX is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_MVSAS is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set +CONFIG_ATA=y +# CONFIG_ATA_NONSTANDARD is not set +# CONFIG_SATA_PMP is not set +# CONFIG_SATA_AHCI is not set +# CONFIG_SATA_SIL24 is not set +CONFIG_ATA_SFF=y +# CONFIG_SATA_SVW is not set +# CONFIG_ATA_PIIX is not set +# CONFIG_SATA_MV is not set +# CONFIG_SATA_NV is not set +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +# CONFIG_SATA_SIL is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +# CONFIG_SATA_VIA is not set +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_NS87415 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +CONFIG_PATA_RB532=y +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_PLATFORM is not set +# CONFIG_MD is not set +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +# CONFIG_IEEE1394 is not set +# CONFIG_I2O is not set +CONFIG_NETDEVICES=y +# CONFIG_NETDEVICES_MULTIQUEUE is not set +CONFIG_IFB=m +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_ARCNET is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_AX88796 is not set +CONFIG_KORINA=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_DM9000 is not set +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_TC35815 is not set +# CONFIG_EEPRO100 is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_R6040 is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +CONFIG_VIA_RHINE=y +# CONFIG_VIA_RHINE_MMIO is not set +CONFIG_VIA_RHINE_NAPI=y +# CONFIG_SC92031 is not set +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +CONFIG_WLAN_80211=y +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_LIBERTAS is not set +# CONFIG_HERMES is not set +CONFIG_ATMEL=m +# CONFIG_PCI_ATMEL is not set +# CONFIG_PRISM54 is not set +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_HOSTAP is not set +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +# CONFIG_PPP_SYNC_TTY is not set +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set +CONFIG_PPPOE=m +CONFIG_PPPOL2TP=m +# CONFIG_SLIP is not set +CONFIG_SLHC=m +# CONFIG_NET_FC is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_GPIO is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL_NONSTANDARD is not set +# CONFIG_NOZOMI is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_SERIAL_8250_PCI is not set +CONFIG_SERIAL_8250_NR_UARTS=2 +CONFIG_SERIAL_8250_RUNTIME_UARTS=2 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_IPMI_HANDLER is not set +CONFIG_HW_RANDOM=y +# CONFIG_RTC is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +# CONFIG_SOFT_WATCHDOG is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# Sonics Silicon Backplane +# +CONFIG_SSB_POSSIBLE=y +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set + +# +# Multimedia devices +# +CONFIG_VIDEO_DEV=m +CONFIG_VIDEO_V4L2_COMMON=m +CONFIG_VIDEO_ALLOW_V4L1=y +CONFIG_VIDEO_V4L1_COMPAT=y +CONFIG_VIDEO_V4L2=m +CONFIG_VIDEO_V4L1=m +CONFIG_VIDEO_CAPTURE_DRIVERS=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set + +# +# Encoders/decoders and other helper chips +# + +# +# Audio decoders +# + +# +# Video decoders +# + +# +# Video and audio decoders +# + +# +# MPEG video encoders +# +# CONFIG_VIDEO_CX2341X is not set + +# +# Video encoders +# + +# +# Video improvement chips +# +# CONFIG_VIDEO_VIVI is not set +# CONFIG_VIDEO_CPIA is not set +# CONFIG_VIDEO_STRADIS is not set +# CONFIG_SOC_CAMERA is not set +# CONFIG_RADIO_ADAPTERS is not set +# CONFIG_DVB_CORE is not set +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_DRM is not set +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set + +# +# Sound +# +# CONFIG_SOUND is not set +CONFIG_HID_SUPPORT=y +# CONFIG_HID is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_USB is not set +# CONFIG_USB_OTG_WHITELIST is not set +# CONFIG_USB_OTG_BLACKLIST_HUB is not set + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y + +# +# LED drivers +# +# CONFIG_LEDS_GPIO is not set + +# +# LED Triggers +# +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +# CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set +# CONFIG_INFINIBAND is not set +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_UIO is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4DEV_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +# CONFIG_XFS_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_CONFIGFS_FS=y + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_FS_DEBUG=0 +CONFIG_JFFS2_FS_WRITEBUFFER=y +# CONFIG_JFFS2_FS_WBUF_VERIFY is not set +CONFIG_JFFS2_SUMMARY=y +# CONFIG_JFFS2_FS_XATTR is not set +CONFIG_JFFS2_COMPRESSION_OPTIONS=y +CONFIG_JFFS2_ZLIB=y +# CONFIG_JFFS2_LZO is not set +CONFIG_JFFS2_RTIME=y +# CONFIG_JFFS2_RUBIN is not set +# CONFIG_JFFS2_CMODE_NONE is not set +CONFIG_JFFS2_CMODE_PRIORITY=y +# CONFIG_JFFS2_CMODE_SIZE is not set +# CONFIG_JFFS2_CMODE_FAVOURLZO is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +# CONFIG_NFS_FS is not set +# CONFIG_NFSD is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +# CONFIG_OSF_PARTITION is not set +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +# CONFIG_MINIX_SUBPARTITION is not set +# CONFIG_SOLARIS_X86_PARTITION is not set +# CONFIG_UNIXWARE_DISKLABEL is not set +# CONFIG_LDM_PARTITION is not set +# CONFIG_SGI_PARTITION is not set +# CONFIG_ULTRIX_PARTITION is not set +# CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set +# CONFIG_EFI_PARTITION is not set +# CONFIG_SYSV68_PARTITION is not set +# CONFIG_NLS is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_KERNEL is not set +# CONFIG_SAMPLES is not set +CONFIG_CMDLINE="" + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +CONFIG_CRYPTO_ALGAPI=m +CONFIG_CRYPTO_AEAD=m +CONFIG_CRYPTO_BLKCIPHER=m +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +CONFIG_CRYPTO_TEST=m + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set +# CONFIG_CRYPTO_HW is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +# CONFIG_GENERIC_FIND_FIRST_BIT is not set +CONFIG_CRC_CCITT=m +CONFIG_CRC16=m +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 57e34cafa49..15e01aec37f 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -49,3 +49,4 @@ obj-$(CONFIG_TOSHIBA_RBTX4938) += fixup-rbtx4938.o obj-$(CONFIG_VICTOR_MPC30X) += fixup-mpc30x.o obj-$(CONFIG_ZAO_CAPCELLA) += fixup-capcella.o obj-$(CONFIG_WR_PPMC) += fixup-wrppmc.o +obj-$(CONFIG_MIKROTIK_RB532) += pci-rc32434.o ops-rc32434.o fixup-rc32434.o diff --git a/arch/mips/pci/fixup-rc32434.c b/arch/mips/pci/fixup-rc32434.c new file mode 100644 index 00000000000..75b90dcb7a0 --- /dev/null +++ b/arch/mips/pci/fixup-rc32434.c @@ -0,0 +1,69 @@ +/* + * Copyright 2001 MontaVista Software Inc. + * Author: MontaVista Software, Inc. + * stevel@mvista.com or source@mvista.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#include + +static int __devinitdata irq_map[2][12] = { + {0, 0, 2, 3, 2, 3, 0, 0, 0, 0, 0, 1}, + {0, 0, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3} +}; + +int __devinit pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +{ + int irq = 0; + + if (dev->bus->number < 2 && PCI_SLOT(dev->devfn) < 12) + irq = irq_map[dev->bus->number][PCI_SLOT(dev->devfn)]; + + return irq + GROUP4_IRQ_BASE + 4; +} + +static void rc32434_pci_early_fixup(struct pci_dev *dev) +{ + if (PCI_SLOT(dev->devfn) == 6 && dev->bus->number == 0) { + /* disable prefetched memory range */ + pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, 0); + pci_write_config_word(dev, PCI_PREF_MEMORY_BASE, 0x10); + + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 4); + } +} + +/* + * The fixup applies to both the IDT and VIA devices present on the board + */ +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, rc32434_pci_early_fixup); + +/* Do platform specific device initialization at pci_enable_device() time */ +int pcibios_plat_dev_init(struct pci_dev *dev) +{ + return 0; +} diff --git a/arch/mips/pci/ops-rc32434.c b/arch/mips/pci/ops-rc32434.c new file mode 100644 index 00000000000..d1f8fa210ca --- /dev/null +++ b/arch/mips/pci/ops-rc32434.c @@ -0,0 +1,207 @@ +/* + * BRIEF MODULE DESCRIPTION + * pci_ops for IDT EB434 board + * + * Copyright 2004 IDT Inc. (rischelp@idt.com) + * Copyright 2006 Felix Fietkau + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#define PCI_ACCESS_READ 0 +#define PCI_ACCESS_WRITE 1 + + +#define PCI_CFG_SET(bus, slot, func, off) \ + (rc32434_pci->pcicfga = (0x80000000 | \ + ((bus) << 16) | ((slot)<<11) | \ + ((func)<<8) | (off))) + +static inline int config_access(unsigned char access_type, + struct pci_bus *bus, unsigned int devfn, + unsigned char where, u32 *data) +{ + unsigned int slot = PCI_SLOT(devfn); + u8 func = PCI_FUNC(devfn); + + /* Setup address */ + PCI_CFG_SET(bus->number, slot, func, where); + rc32434_sync(); + + if (access_type == PCI_ACCESS_WRITE) + rc32434_pci->pcicfgd = *data; + else + *data = rc32434_pci->pcicfgd; + + rc32434_sync(); + + return 0; +} + + +/* + * We can't address 8 and 16 bit words directly. Instead we have to + * read/write a 32bit word and mask/modify the data we actually want. + */ +static int read_config_byte(struct pci_bus *bus, unsigned int devfn, + int where, u8 *val) +{ + u32 data; + int ret; + + ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data); + *val = (data >> ((where & 3) << 3)) & 0xff; + return ret; +} + +static int read_config_word(struct pci_bus *bus, unsigned int devfn, + int where, u16 *val) +{ + u32 data; + int ret; + + ret = config_access(PCI_ACCESS_READ, bus, devfn, where, &data); + *val = (data >> ((where & 3) << 3)) & 0xffff; + return ret; +} + +static int read_config_dword(struct pci_bus *bus, unsigned int devfn, + int where, u32 *val) +{ + int ret; + int delay = 1; + + /* + * Don't scan too far, else there will be errors with plugged in + * daughterboard (rb564). + */ + if (bus->number == 0 && PCI_SLOT(devfn) > 21) + return 0; + +retry: + ret = config_access(PCI_ACCESS_READ, bus, devfn, where, val); + + /* + * Certain devices react delayed at device scan time, this + * gives them time to settle + */ + if (where == PCI_VENDOR_ID) { + if (ret == 0xffffffff || ret == 0x00000000 || + ret == 0x0000ffff || ret == 0xffff0000) { + if (delay > 4) + return 0; + delay *= 2; + msleep(delay); + goto retry; + } + } + + return ret; +} + +static int +write_config_byte(struct pci_bus *bus, unsigned int devfn, int where, + u8 val) +{ + u32 data = 0; + + if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data)) + return -1; + + data = (data & ~(0xff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + + if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data)) + return -1; + + return PCIBIOS_SUCCESSFUL; +} + + +static int +write_config_word(struct pci_bus *bus, unsigned int devfn, int where, + u16 val) +{ + u32 data = 0; + + if (config_access(PCI_ACCESS_READ, bus, devfn, where, &data)) + return -1; + + data = (data & ~(0xffff << ((where & 3) << 3))) | + (val << ((where & 3) << 3)); + + if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &data)) + return -1; + + + return PCIBIOS_SUCCESSFUL; +} + + +static int +write_config_dword(struct pci_bus *bus, unsigned int devfn, int where, + u32 val) +{ + if (config_access(PCI_ACCESS_WRITE, bus, devfn, where, &val)) + return -1; + + return PCIBIOS_SUCCESSFUL; +} + +static int pci_config_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + switch (size) { + case 1: + return read_config_byte(bus, devfn, where, (u8 *) val); + case 2: + return read_config_word(bus, devfn, where, (u16 *) val); + default: + return read_config_dword(bus, devfn, where, val); + } +} + +static int pci_config_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + switch (size) { + case 1: + return write_config_byte(bus, devfn, where, (u8) val); + case 2: + return write_config_word(bus, devfn, where, (u16) val); + default: + return write_config_dword(bus, devfn, where, val); + } +} + +struct pci_ops rc32434_pci_ops = { + .read = pci_config_read, + .write = pci_config_write, +}; diff --git a/arch/mips/pci/pci-rc32434.c b/arch/mips/pci/pci-rc32434.c new file mode 100644 index 00000000000..1c2821e2f49 --- /dev/null +++ b/arch/mips/pci/pci-rc32434.c @@ -0,0 +1,221 @@ +/* + * BRIEF MODULE DESCRIPTION + * PCI initialization for IDT EB434 board + * + * Copyright 2004 IDT Inc. (rischelp@idt.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#include +#include + +#define PCI_ACCESS_READ 0 +#define PCI_ACCESS_WRITE 1 + +/* define an unsigned array for the PCI registers */ +static unsigned int korina_cnfg_regs[25] = { + KORINA_CNFG1, KORINA_CNFG2, KORINA_CNFG3, KORINA_CNFG4, + KORINA_CNFG5, KORINA_CNFG6, KORINA_CNFG7, KORINA_CNFG8, + KORINA_CNFG9, KORINA_CNFG10, KORINA_CNFG11, KORINA_CNFG12, + KORINA_CNFG13, KORINA_CNFG14, KORINA_CNFG15, KORINA_CNFG16, + KORINA_CNFG17, KORINA_CNFG18, KORINA_CNFG19, KORINA_CNFG20, + KORINA_CNFG21, KORINA_CNFG22, KORINA_CNFG23, KORINA_CNFG24 +}; +static struct resource rc32434_res_pci_mem1; +static struct resource rc32434_res_pci_mem2; + +static struct resource rc32434_res_pci_mem1 = { + .name = "PCI MEM1", + .start = 0x50000000, + .end = 0x5FFFFFFF, + .flags = IORESOURCE_MEM, + .parent = &rc32434_res_pci_mem1, + .sibling = NULL, + .child = &rc32434_res_pci_mem2 +}; + +static struct resource rc32434_res_pci_mem2 = { + .name = "PCI Mem2", + .start = 0x60000000, + .end = 0x6FFFFFFF, + .flags = IORESOURCE_MEM, + .parent = &rc32434_res_pci_mem1, + .sibling = NULL, + .child = NULL +}; + +static struct resource rc32434_res_pci_io1 = { + .name = "PCI I/O1", + .start = 0x18800000, + .end = 0x188FFFFF, + .flags = IORESOURCE_IO, +}; + +extern struct pci_ops rc32434_pci_ops; + +#define PCI_MEM1_START PCI_ADDR_START +#define PCI_MEM1_END (PCI_ADDR_START + CPUTOPCI_MEM_WIN - 1) +#define PCI_MEM2_START (PCI_ADDR_START + CPUTOPCI_MEM_WIN) +#define PCI_MEM2_END (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) - 1) +#define PCI_IO1_START (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN)) +#define PCI_IO1_END \ + (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + CPUTOPCI_IO_WIN - 1) +#define PCI_IO2_START \ + (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + CPUTOPCI_IO_WIN) +#define PCI_IO2_END \ + (PCI_ADDR_START + (2 * CPUTOPCI_MEM_WIN) + (2 * CPUTOPCI_IO_WIN) - 1) + +struct pci_controller rc32434_controller2; + +struct pci_controller rc32434_controller = { + .pci_ops = &rc32434_pci_ops, + .mem_resource = &rc32434_res_pci_mem1, + .io_resource = &rc32434_res_pci_io1, + .mem_offset = 0, + .io_offset = 0, + +}; + +#ifdef __MIPSEB__ +#define PCI_ENDIAN_FLAG PCILBAC_sb_m +#else +#define PCI_ENDIAN_FLAG 0 +#endif + +static int __init rc32434_pcibridge_init(void) +{ + unsigned int pcicvalue, pcicdata = 0; + unsigned int dummyread, pcicntlval; + int loopCount; + unsigned int pci_config_addr; + + pcicvalue = rc32434_pci->pcic; + pcicvalue = (pcicvalue >> PCIM_SHFT) & PCIM_BIT_LEN; + if (!((pcicvalue == PCIM_H_EA) || + (pcicvalue == PCIM_H_IA_FIX) || + (pcicvalue == PCIM_H_IA_RR))) { + pr_err(KERN_ERR "PCI init error!!!\n"); + /* Not in Host Mode, return ERROR */ + return -1; + } + /* Enables the Idle Grant mode, Arbiter Parking */ + pcicdata |= (PCI_CTL_IGM | PCI_CTL_EAP | PCI_CTL_EN); + rc32434_pci->pcic = pcicdata; /* Enable the PCI bus Interface */ + /* Zero out the PCI status & PCI Status Mask */ + for (;;) { + pcicdata = rc32434_pci->pcis; + if (!(pcicdata & PCI_STAT_RIP)) + break; + } + + rc32434_pci->pcis = 0; + rc32434_pci->pcism = 0xFFFFFFFF; + /* Zero out the PCI decoupled registers */ + rc32434_pci->pcidac = 0; /* + * disable PCI decoupled accesses at + * initialization + */ + rc32434_pci->pcidas = 0; /* clear the status */ + rc32434_pci->pcidasm = 0x0000007F; /* Mask all the interrupts */ + /* Mask PCI Messaging Interrupts */ + rc32434_pci_msg->pciiic = 0; + rc32434_pci_msg->pciiim = 0xFFFFFFFF; + rc32434_pci_msg->pciioic = 0; + rc32434_pci_msg->pciioim = 0; + + + /* Setup PCILB0 as Memory Window */ + rc32434_pci->pcilba[0].address = (unsigned int) (PCI_ADDR_START); + + /* setup the PCI map address as same as the local address */ + + rc32434_pci->pcilba[0].mapping = (unsigned int) (PCI_ADDR_START); + + + /* Setup PCILBA1 as MEM */ + rc32434_pci->pcilba[0].control = + (((SIZE_256MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG); + dummyread = rc32434_pci->pcilba[0].control; /* flush the CPU write Buffers */ + rc32434_pci->pcilba[1].address = 0x60000000; + rc32434_pci->pcilba[1].mapping = 0x60000000; + + /* setup PCILBA2 as IO Window */ + rc32434_pci->pcilba[1].control = + (((SIZE_256MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG); + dummyread = rc32434_pci->pcilba[1].control; /* flush the CPU write Buffers */ + rc32434_pci->pcilba[2].address = 0x18C00000; + rc32434_pci->pcilba[2].mapping = 0x18FFFFFF; + + /* setup PCILBA2 as IO Window */ + rc32434_pci->pcilba[2].control = + (((SIZE_4MB & 0x1f) << PCI_LBAC_SIZE_BIT) | PCI_ENDIAN_FLAG); + dummyread = rc32434_pci->pcilba[2].control; /* flush the CPU write Buffers */ + + /* Setup PCILBA3 as IO Window */ + rc32434_pci->pcilba[3].address = 0x18800000; + rc32434_pci->pcilba[3].mapping = 0x18800000; + rc32434_pci->pcilba[3].control = + ((((SIZE_1MB & 0x1ff) << PCI_LBAC_SIZE_BIT) | PCI_LBAC_MSI) | + PCI_ENDIAN_FLAG); + dummyread = rc32434_pci->pcilba[3].control; /* flush the CPU write Buffers */ + + pci_config_addr = (unsigned int) (0x80000004); + for (loopCount = 0; loopCount < 24; loopCount++) { + rc32434_pci->pcicfga = pci_config_addr; + dummyread = rc32434_pci->pcicfga; + rc32434_pci->pcicfgd = korina_cnfg_regs[loopCount]; + dummyread = rc32434_pci->pcicfgd; + pci_config_addr += 4; + } + rc32434_pci->pcitc = + (unsigned int) ((PCITC_RTIMER_VAL & 0xff) << PCI_TC_RTIMER_BIT) | + ((PCITC_DTIMER_VAL & 0xff) << PCI_TC_DTIMER_BIT); + + pcicntlval = rc32434_pci->pcic; + pcicntlval &= ~PCI_CTL_TNR; + rc32434_pci->pcic = pcicntlval; + pcicntlval = rc32434_pci->pcic; + + return 0; +} + +static int __init rc32434_pci_init(void) +{ + pr_info("PCI: Initializing PCI\n"); + + ioport_resource.start = rc32434_res_pci_io1.start; + ioport_resource.end = rc32434_res_pci_io1.end; + + rc32434_pcibridge_init(); + + register_pci_controller(&rc32434_controller); + rc32434_sync(); + + return 0; +} + +arch_initcall(rc32434_pci_init); diff --git a/arch/mips/rb532/Makefile b/arch/mips/rb532/Makefile new file mode 100644 index 00000000000..8f0b6b6a162 --- /dev/null +++ b/arch/mips/rb532/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the RB532 board specific parts of the kernel +# + +obj-y += irq.o time.o setup.o serial.o prom.o gpio.o devices.o + +EXTRA_CFLAGS += -Werror diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c new file mode 100644 index 00000000000..44fb0a62877 --- /dev/null +++ b/arch/mips/rb532/devices.c @@ -0,0 +1,331 @@ +/* + * RouterBoard 500 Platform devices + * + * Copyright (C) 2006 Felix Fietkau + * Copyright (C) 2007 Florian Fainelli + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define ETH0_DMA_RX_IRQ (GROUP1_IRQ_BASE + 0) +#define ETH0_DMA_TX_IRQ (GROUP1_IRQ_BASE + 1) +#define ETH0_RX_OVR_IRQ (GROUP3_IRQ_BASE + 9) +#define ETH0_TX_UND_IRQ (GROUP3_IRQ_BASE + 10) + +#define ETH0_RX_DMA_ADDR (DMA0_BASE_ADDR + 0 * DMA_CHAN_OFFSET) +#define ETH0_TX_DMA_ADDR (DMA0_BASE_ADDR + 1 * DMA_CHAN_OFFSET) + +/* NAND definitions */ +#define GPIO_RDY (1 << 0x08) +#define GPIO_WPX (1 << 0x09) +#define GPIO_ALE (1 << 0x0a) +#define GPIO_CLE (1 << 0x0b) + +extern char *board_type; + +static struct resource korina_dev0_res[] = { + { + .name = "korina_regs", + .start = ETH0_BASE_ADDR, + .end = ETH0_BASE_ADDR + sizeof(struct eth_regs), + .flags = IORESOURCE_MEM, + }, { + .name = "korina_rx", + .start = ETH0_DMA_RX_IRQ, + .end = ETH0_DMA_RX_IRQ, + .flags = IORESOURCE_IRQ + }, { + .name = "korina_tx", + .start = ETH0_DMA_TX_IRQ, + .end = ETH0_DMA_TX_IRQ, + .flags = IORESOURCE_IRQ + }, { + .name = "korina_ovr", + .start = ETH0_RX_OVR_IRQ, + .end = ETH0_RX_OVR_IRQ, + .flags = IORESOURCE_IRQ + }, { + .name = "korina_und", + .start = ETH0_TX_UND_IRQ, + .end = ETH0_TX_UND_IRQ, + .flags = IORESOURCE_IRQ + }, { + .name = "korina_dma_rx", + .start = ETH0_RX_DMA_ADDR, + .end = ETH0_RX_DMA_ADDR + DMA_CHAN_OFFSET - 1, + .flags = IORESOURCE_MEM, + }, { + .name = "korina_dma_tx", + .start = ETH0_TX_DMA_ADDR, + .end = ETH0_TX_DMA_ADDR + DMA_CHAN_OFFSET - 1, + .flags = IORESOURCE_MEM, + } +}; + +static struct korina_device korina_dev0_data = { + .name = "korina0", + .mac = {0xde, 0xca, 0xff, 0xc0, 0xff, 0xee} +}; + +static struct platform_device korina_dev0 = { + .id = 0, + .name = "korina", + .dev.platform_data = &korina_dev0_data, + .resource = korina_dev0_res, + .num_resources = ARRAY_SIZE(korina_dev0_res), +}; + +#define CF_GPIO_NUM 13 + +static struct resource cf_slot0_res[] = { + { + .name = "cf_membase", + .flags = IORESOURCE_MEM + }, { + .name = "cf_irq", + .start = (8 + 4 * 32 + CF_GPIO_NUM), /* 149 */ + .end = (8 + 4 * 32 + CF_GPIO_NUM), + .flags = IORESOURCE_IRQ + } +}; + +static struct cf_device cf_slot0_data = { + .gpio_pin = 13 +}; + +static struct platform_device cf_slot0 = { + .id = 0, + .name = "pata-rb532-cf", + .dev.platform_data = &cf_slot0_data, + .resource = cf_slot0_res, + .num_resources = ARRAY_SIZE(cf_slot0_res), +}; + +/* Resources and device for NAND */ +static int rb532_dev_ready(struct mtd_info *mtd) +{ + return readl(IDT434_REG_BASE + GPIOD) & GPIO_RDY; +} + +static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl) +{ + struct nand_chip *chip = mtd->priv; + unsigned char orbits, nandbits; + + if (ctrl & NAND_CTRL_CHANGE) { + orbits = (ctrl & NAND_CLE) << 1; + orbits |= (ctrl & NAND_ALE) >> 1; + + nandbits = (~ctrl & NAND_CLE) << 1; + nandbits |= (~ctrl & NAND_ALE) >> 1; + + set_latch_u5(orbits, nandbits); + } + if (cmd != NAND_CMD_NONE) + writeb(cmd, chip->IO_ADDR_W); +} + +static struct resource nand_slot0_res[] = { + [0] = { + .name = "nand_membase", + .flags = IORESOURCE_MEM + } +}; + +static struct platform_nand_data rb532_nand_data = { + .ctrl.dev_ready = rb532_dev_ready, + .ctrl.cmd_ctrl = rb532_cmd_ctrl, +}; + +static struct platform_device nand_slot0 = { + .name = "gen_nand", + .id = -1, + .resource = nand_slot0_res, + .num_resources = ARRAY_SIZE(nand_slot0_res), + .dev.platform_data = &rb532_nand_data, +}; + +static struct mtd_partition rb532_partition_info[] = { + { + .name = "Routerboard NAND boot", + .offset = 0, + .size = 4 * 1024 * 1024, + }, { + .name = "rootfs", + .offset = MTDPART_OFS_NXTBLK, + .size = MTDPART_SIZ_FULL, + } +}; + +static struct platform_device rb532_led = { + .name = "rb532-led", + .id = 0, +}; + +static struct gpio_keys_button rb532_gpio_btn[] = { + { + .gpio = 1, + .code = BTN_0, + .desc = "S1", + .active_low = 1, + } +}; + +static struct gpio_keys_platform_data rb532_gpio_btn_data = { + .buttons = rb532_gpio_btn, + .nbuttons = ARRAY_SIZE(rb532_gpio_btn), +}; + +static struct platform_device rb532_button = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &rb532_gpio_btn_data, + } +}; + +static struct resource rb532_wdt_res[] = { + { + .name = "rb532_wdt_res", + .start = INTEG0_BASE_ADDR, + .end = INTEG0_BASE_ADDR + sizeof(struct integ), + .flags = IORESOURCE_MEM, + } +}; + +static struct platform_device rb532_wdt = { + .name = "rc32434_wdt", + .id = -1, + .resource = rb532_wdt_res, + .num_resources = ARRAY_SIZE(rb532_wdt_res), +}; + +static struct platform_device *rb532_devs[] = { + &korina_dev0, + &nand_slot0, + &cf_slot0, + &rb532_led, + &rb532_button, + &rb532_wdt +}; + +static void __init parse_mac_addr(char *macstr) +{ + int i, j; + unsigned char result, value; + + for (i = 0; i < 6; i++) { + result = 0; + + if (i != 5 && *(macstr + 2) != ':') + return; + + for (j = 0; j < 2; j++) { + if (isxdigit(*macstr) + && (value = + isdigit(*macstr) ? *macstr - + '0' : toupper(*macstr) - 'A' + 10) < 16) { + result = result * 16 + value; + macstr++; + } else + return; + } + + macstr++; + korina_dev0_data.mac[i] = result; + } +} + + +/* DEVICE CONTROLLER 1 */ +#define CFG_DC_DEV1 ((void *)0xb8010010) +#define CFG_DC_DEV2 ((void *)0xb8010020) +#define CFG_DC_DEVBASE 0x0 +#define CFG_DC_DEVMASK 0x4 +#define CFG_DC_DEVC 0x8 +#define CFG_DC_DEVTC 0xC + +/* NAND definitions */ +#define NAND_CHIP_DELAY 25 + +static void __init rb532_nand_setup(void) +{ + switch (mips_machtype) { + case MACH_MIKROTIK_RB532A: + set_latch_u5(LO_FOFF | LO_CEX, + LO_ULED | LO_ALE | LO_CLE | LO_WPX); + break; + default: + set_latch_u5(LO_WPX | LO_FOFF | LO_CEX, + LO_ULED | LO_ALE | LO_CLE); + break; + } + + /* Setup NAND specific settings */ + rb532_nand_data.chip.nr_chips = 1; + rb532_nand_data.chip.nr_partitions = ARRAY_SIZE(rb532_partition_info); + rb532_nand_data.chip.partitions = rb532_partition_info; + rb532_nand_data.chip.chip_delay = NAND_CHIP_DELAY; + rb532_nand_data.chip.options = NAND_NO_AUTOINCR; +} + + +static int __init plat_setup_devices(void) +{ + /* Look for the CF card reader */ + if (!readl(CFG_DC_DEV1 + CFG_DC_DEVMASK)) + rb532_devs[1] = NULL; + else { + cf_slot0_res[0].start = + readl(CFG_DC_DEV1 + CFG_DC_DEVBASE); + cf_slot0_res[0].end = cf_slot0_res[0].start + 0x1000; + } + + /* Read the NAND resources from the device controller */ + nand_slot0_res[0].start = readl(CFG_DC_DEV2 + CFG_DC_DEVBASE); + nand_slot0_res[0].end = nand_slot0_res[0].start + 0x1000; + + /* Initialise the NAND device */ + rb532_nand_setup(); + + return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs)); +} + +static int __init setup_kmac(char *s) +{ + printk(KERN_INFO "korina mac = %s\n", s); + parse_mac_addr(s); + return 0; +} + +__setup("kmac=", setup_kmac); + +arch_initcall(plat_setup_devices); diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c new file mode 100644 index 00000000000..b2fe82dba0a --- /dev/null +++ b/arch/mips/rb532/gpio.c @@ -0,0 +1,220 @@ +/* + * Miscellaneous functions for IDT EB434 board + * + * Copyright 2004 IDT Inc. (rischelp@idt.com) + * Copyright 2006 Phil Sutter + * Copyright 2007 Florian Fainelli + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +struct rb532_gpio_reg __iomem *rb532_gpio_reg0; +EXPORT_SYMBOL(rb532_gpio_reg0); + +struct mpmc_device dev3; + +static struct resource rb532_gpio_reg0_res[] = { + { + .name = "gpio_reg0", + .start = (u32)(IDT434_REG_BASE + GPIOBASE), + .end = (u32)(IDT434_REG_BASE + GPIOBASE + sizeof(struct rb532_gpio_reg)), + .flags = IORESOURCE_MEM, + } +}; + +static struct resource rb532_dev3_ctl_res[] = { + { + .name = "dev3_ctl", + .start = (u32)(IDT434_REG_BASE + DEV3BASE), + .end = (u32)(IDT434_REG_BASE + DEV3BASE + sizeof(struct dev_reg)), + .flags = IORESOURCE_MEM, + } +}; + +void set_434_reg(unsigned reg_offs, unsigned bit, unsigned len, unsigned val) +{ + unsigned flags, data; + unsigned i = 0; + + spin_lock_irqsave(&dev3.lock, flags); + + data = *(volatile unsigned *) (IDT434_REG_BASE + reg_offs); + for (i = 0; i != len; ++i) { + if (val & (1 << i)) + data |= (1 << (i + bit)); + else + data &= ~(1 << (i + bit)); + } + writel(data, (IDT434_REG_BASE + reg_offs)); + + spin_unlock_irqrestore(&dev3.lock, flags); +} +EXPORT_SYMBOL(set_434_reg); + +unsigned get_434_reg(unsigned reg_offs) +{ + return readl(IDT434_REG_BASE + reg_offs); +} +EXPORT_SYMBOL(get_434_reg); + +void set_latch_u5(unsigned char or_mask, unsigned char nand_mask) +{ + unsigned flags; + + spin_lock_irqsave(&dev3.lock, flags); + + dev3.state = (dev3.state | or_mask) & ~nand_mask; + writel(dev3.state, &dev3.base); + + spin_unlock_irqrestore(&dev3.lock, flags); +} +EXPORT_SYMBOL(set_latch_u5); + +unsigned char get_latch_u5(void) +{ + return dev3.state; +} +EXPORT_SYMBOL(get_latch_u5); + +int rb532_gpio_get_value(unsigned gpio) +{ + return readl(&rb532_gpio_reg0->gpiod) & (1 << gpio); +} +EXPORT_SYMBOL(rb532_gpio_get_value); + +void rb532_gpio_set_value(unsigned gpio, int value) +{ + unsigned tmp; + + tmp = readl(&rb532_gpio_reg0->gpiod) & ~(1 << gpio); + if (value) + tmp |= 1 << gpio; + + writel(tmp, (void *)&rb532_gpio_reg0->gpiod); +} +EXPORT_SYMBOL(rb532_gpio_set_value); + +int rb532_gpio_direction_input(unsigned gpio) +{ + writel(readl(&rb532_gpio_reg0->gpiocfg) & ~(1 << gpio), + (void *)&rb532_gpio_reg0->gpiocfg); + + return 0; +} +EXPORT_SYMBOL(rb532_gpio_direction_input); + +int rb532_gpio_direction_output(unsigned gpio, int value) +{ + gpio_set_value(gpio, value); + writel(readl(&rb532_gpio_reg0->gpiocfg) | (1 << gpio), + (void *)&rb532_gpio_reg0->gpiocfg); + + return 0; +} +EXPORT_SYMBOL(rb532_gpio_direction_output); + +void rb532_gpio_set_int_level(unsigned gpio, int value) +{ + unsigned tmp; + + tmp = readl(&rb532_gpio_reg0->gpioilevel) & ~(1 << gpio); + if (value) + tmp |= 1 << gpio; + writel(tmp, (void *)&rb532_gpio_reg0->gpioilevel); +} +EXPORT_SYMBOL(rb532_gpio_set_int_level); + +int rb532_gpio_get_int_level(unsigned gpio) +{ + return readl(&rb532_gpio_reg0->gpioilevel) & (1 << gpio); +} +EXPORT_SYMBOL(rb532_gpio_get_int_level); + +void rb532_gpio_set_int_status(unsigned gpio, int value) +{ + unsigned tmp; + + tmp = readl(&rb532_gpio_reg0->gpioistat); + if (value) + tmp |= 1 << gpio; + writel(tmp, (void *)&rb532_gpio_reg0->gpioistat); +} +EXPORT_SYMBOL(rb532_gpio_set_int_status); + +int rb532_gpio_get_int_status(unsigned gpio) +{ + return readl(&rb532_gpio_reg0->gpioistat) & (1 << gpio); +} +EXPORT_SYMBOL(rb532_gpio_get_int_status); + +void rb532_gpio_set_func(unsigned gpio, int value) +{ + unsigned tmp; + + tmp = readl(&rb532_gpio_reg0->gpiofunc); + if (value) + tmp |= 1 << gpio; + writel(tmp, (void *)&rb532_gpio_reg0->gpiofunc); +} +EXPORT_SYMBOL(rb532_gpio_set_func); + +int rb532_gpio_get_func(unsigned gpio) +{ + return readl(&rb532_gpio_reg0->gpiofunc) & (1 << gpio); +} +EXPORT_SYMBOL(rb532_gpio_get_func); + +int __init rb532_gpio_init(void) +{ + rb532_gpio_reg0 = ioremap_nocache(rb532_gpio_reg0_res[0].start, + rb532_gpio_reg0_res[0].end - + rb532_gpio_reg0_res[0].start); + + if (!rb532_gpio_reg0) { + printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); + return -ENXIO; + } + + dev3.base = ioremap_nocache(rb532_dev3_ctl_res[0].start, + rb532_dev3_ctl_res[0].end - + rb532_dev3_ctl_res[0].start); + + if (!dev3.base) { + printk(KERN_ERR "rb532: cannot remap device controller 3\n"); + return -ENXIO; + } + + return 0; +} +arch_initcall(rb532_gpio_init); diff --git a/arch/mips/rb532/irq.c b/arch/mips/rb532/irq.c new file mode 100644 index 00000000000..c0d0f950caf --- /dev/null +++ b/arch/mips/rb532/irq.c @@ -0,0 +1,209 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Copyright 2002 MontaVista Software Inc. + * Author: MontaVista Software, Inc. + * stevel@mvista.com or source@mvista.com + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +struct intr_group { + u32 mask; /* mask of valid bits in pending/mask registers */ + volatile u32 *base_addr; +}; + +#define RC32434_NR_IRQS (GROUP4_IRQ_BASE + 32) + +#if (NR_IRQS < RC32434_NR_IRQS) +#error Too little irqs defined. Did you override ? +#endif + +static const struct intr_group intr_group[NUM_INTR_GROUPS] = { + { + .mask = 0x0000efff, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 0 * IC_GROUP_OFFSET)}, + { + .mask = 0x00001fff, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 1 * IC_GROUP_OFFSET)}, + { + .mask = 0x00000007, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 2 * IC_GROUP_OFFSET)}, + { + .mask = 0x0003ffff, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 3 * IC_GROUP_OFFSET)}, + { + .mask = 0xffffffff, + .base_addr = (u32 *) KSEG1ADDR(IC_GROUP0_PEND + 4 * IC_GROUP_OFFSET)} +}; + +#define READ_PEND(base) (*(base)) +#define READ_MASK(base) (*(base + 2)) +#define WRITE_MASK(base, val) (*(base + 2) = (val)) + +static inline int irq_to_group(unsigned int irq_nr) +{ + return (irq_nr - GROUP0_IRQ_BASE) >> 5; +} + +static inline int group_to_ip(unsigned int group) +{ + return group + 2; +} + +static inline void enable_local_irq(unsigned int ip) +{ + int ipnum = 0x100 << ip; + + set_c0_status(ipnum); +} + +static inline void disable_local_irq(unsigned int ip) +{ + int ipnum = 0x100 << ip; + + clear_c0_status(ipnum); +} + +static inline void ack_local_irq(unsigned int ip) +{ + int ipnum = 0x100 << ip; + + clear_c0_cause(ipnum); +} + +static void rb532_enable_irq(unsigned int irq_nr) +{ + int ip = irq_nr - GROUP0_IRQ_BASE; + unsigned int group, intr_bit; + volatile unsigned int *addr; + + if (ip < 0) + enable_local_irq(irq_nr); + else { + group = ip >> 5; + + ip &= (1 << 5) - 1; + intr_bit = 1 << ip; + + enable_local_irq(group_to_ip(group)); + + addr = intr_group[group].base_addr; + WRITE_MASK(addr, READ_MASK(addr) & ~intr_bit); + } +} + +static void rb532_disable_irq(unsigned int irq_nr) +{ + int ip = irq_nr - GROUP0_IRQ_BASE; + unsigned int group, intr_bit, mask; + volatile unsigned int *addr; + + if (ip < 0) { + disable_local_irq(irq_nr); + } else { + group = ip >> 5; + + ip &= (1 << 5) - 1; + intr_bit = 1 << ip; + addr = intr_group[group].base_addr; + mask = READ_MASK(addr); + mask |= intr_bit; + WRITE_MASK(addr, mask); + + /* + * if there are no more interrupts enabled in this + * group, disable corresponding IP + */ + if (mask == intr_group[group].mask) + disable_local_irq(group_to_ip(group)); + } +} + +static void rb532_mask_and_ack_irq(unsigned int irq_nr) +{ + rb532_disable_irq(irq_nr); + ack_local_irq(group_to_ip(irq_to_group(irq_nr))); +} + +static struct irq_chip rc32434_irq_type = { + .name = "RB532", + .ack = rb532_disable_irq, + .mask = rb532_disable_irq, + .mask_ack = rb532_mask_and_ack_irq, + .unmask = rb532_enable_irq, +}; + +void __init arch_init_irq(void) +{ + int i; + + pr_info("Initializing IRQ's: %d out of %d\n", RC32434_NR_IRQS, NR_IRQS); + + for (i = 0; i < RC32434_NR_IRQS; i++) + set_irq_chip_and_handler(i, &rc32434_irq_type, + handle_level_irq); +} + +/* Main Interrupt dispatcher */ +asmlinkage void plat_irq_dispatch(void) +{ + unsigned int ip, pend, group; + volatile unsigned int *addr; + unsigned int cp0_cause = read_c0_cause() & read_c0_status(); + + if (cp0_cause & CAUSEF_IP7) { + do_IRQ(7); + } else { + ip = (cp0_cause & 0x7c00); + if (ip) { + group = 21 + (fls(ip) - 32); + + addr = intr_group[group].base_addr; + + pend = READ_PEND(addr); + pend &= ~READ_MASK(addr); /* only unmasked interrupts */ + pend = 39 + (fls(pend) - 32); + do_IRQ((group << 5) + pend); + } + } +} diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c new file mode 100644 index 00000000000..1bc0af8febf --- /dev/null +++ b/arch/mips/rb532/prom.c @@ -0,0 +1,158 @@ +/* + * RouterBoard 500 specific prom routines + * + * Copyright (C) 2003, Peter Sadik + * Copyright (C) 2005-2006, P.Christeas + * Copyright (C) 2007, Gabor Juhos + * Felix Fietkau + * Florian Fainelli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +extern void __init setup_serial_port(void); + +unsigned int idt_cpu_freq = 132000000; +EXPORT_SYMBOL(idt_cpu_freq); +unsigned int gpio_bootup_state; +EXPORT_SYMBOL(gpio_bootup_state); + +static struct resource ddr_reg[] = { + { + .name = "ddr-reg", + .start = DDR0_PHYS_ADDR, + .end = DDR0_PHYS_ADDR + sizeof(struct ddr_ram), + .flags = IORESOURCE_MEM, + } +}; + +void __init prom_free_prom_memory(void) +{ + /* No prom memory to free */ +} + +static inline int match_tag(char *arg, const char *tag) +{ + return strncmp(arg, tag, strlen(tag)) == 0; +} + +static inline unsigned long tag2ul(char *arg, const char *tag) +{ + char *num; + + num = arg + strlen(tag); + return simple_strtoul(num, 0, 10); +} + +void __init prom_setup_cmdline(void) +{ + char cmd_line[CL_SIZE]; + char *cp, *board; + int prom_argc; + char **prom_argv, **prom_envp; + int i; + + prom_argc = fw_arg0; + prom_argv = (char **) fw_arg1; + prom_envp = (char **) fw_arg2; + + cp = cmd_line; + /* Note: it is common that parameters start + * at argv[1] and not argv[0], + * however, our elf loader starts at [0] */ + for (i = 0; i < prom_argc; i++) { + if (match_tag(prom_argv[i], FREQ_TAG)) { + idt_cpu_freq = tag2ul(prom_argv[i], FREQ_TAG); + continue; + } +#ifdef IGNORE_CMDLINE_MEM + /* parses out the "mem=xx" arg */ + if (match_tag(prom_argv[i], MEM_TAG)) + continue; +#endif + if (i > 0) + *(cp++) = ' '; + if (match_tag(prom_argv[i], BOARD_TAG)) { + board = prom_argv[i] + strlen(BOARD_TAG); + + if (match_tag(board, BOARD_RB532A)) + mips_machtype = MACH_MIKROTIK_RB532A; + else + mips_machtype = MACH_MIKROTIK_RB532; + } + + if (match_tag(prom_argv[i], GPIO_TAG)) + gpio_bootup_state = tag2ul(prom_argv[i], GPIO_TAG); + + strcpy(cp, prom_argv[i]); + cp += strlen(prom_argv[i]); + } + *(cp++) = ' '; + + i = strlen(arcs_cmdline); + if (i > 0) { + *(cp++) = ' '; + strcpy(cp, arcs_cmdline); + cp += strlen(arcs_cmdline); + } + if (gpio_bootup_state & 0x02) + strcpy(cp, GPIO_INIT_NOBUTTON); + else + strcpy(cp, GPIO_INIT_BUTTON); + + cmd_line[CL_SIZE-1] = '\0'; + + strcpy(arcs_cmdline, cmd_line); +} + +void __init prom_init(void) +{ + struct ddr_ram __iomem *ddr; + phys_t memsize; + phys_t ddrbase; + + ddr = ioremap_nocache(ddr_reg[0].start, + ddr_reg[0].end - ddr_reg[0].start); + + if (!ddr) { + printk(KERN_ERR "Unable to remap DDR register\n"); + return; + } + + ddrbase = (phys_t)&ddr->ddrbase; + memsize = (phys_t)&ddr->ddrmask; + memsize = 0 - memsize; + + prom_setup_cmdline(); + + /* give all RAM to boot allocator, + * except for the first 0x400 and the last 0x200 bytes */ + add_memory_region(ddrbase + 0x400, memsize - 0x600, BOOT_MEM_RAM); +} diff --git a/arch/mips/rb532/serial.c b/arch/mips/rb532/serial.c new file mode 100644 index 00000000000..1a05b5ddee0 --- /dev/null +++ b/arch/mips/rb532/serial.c @@ -0,0 +1,53 @@ +/* + * BRIEF MODULE DESCRIPTION + * Serial port initialisation. + * + * Copyright 2004 IDT Inc. (rischelp@idt.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#include +#include + +extern unsigned int idt_cpu_freq; + +static struct uart_port rb532_uart = { + .type = PORT_16550A, + .line = 0, + .irq = RC32434_UART0_IRQ, + .iotype = UPIO_MEM, + .membase = (char *)KSEG1ADDR(RC32434_UART0_BASE), + .regshift = 2 +}; + +int __init setup_serial_port(void) +{ + rb532_uart.uartclk = idt_cpu_freq; + + return early_serial_setup(&rb532_uart); +} +arch_initcall(setup_serial_port); diff --git a/arch/mips/rb532/setup.c b/arch/mips/rb532/setup.c new file mode 100644 index 00000000000..7aafa95ac20 --- /dev/null +++ b/arch/mips/rb532/setup.c @@ -0,0 +1,79 @@ +/* + * setup.c - boot time setup code + */ + +#include + +#include +#include +#include +#include + +#include +#include + +struct pci_reg __iomem *pci_reg; +EXPORT_SYMBOL(pci_reg); + +static struct resource pci0_res[] = { + { + .name = "pci_reg0", + .start = PCI0_BASE_ADDR, + .end = PCI0_BASE_ADDR + sizeof(struct pci_reg), + .flags = IORESOURCE_MEM, + } +}; + +static void rb_machine_restart(char *command) +{ + /* just jump to the reset vector */ + writel(0x80000001, (void *)KSEG1ADDR(RC32434_REG_BASE + RC32434_RST)); + ((void (*)(void)) KSEG1ADDR(0x1FC00000u))(); +} + +static void rb_machine_halt(void) +{ + for (;;) + continue; +} + +void __init plat_mem_setup(void) +{ + u32 val; + + _machine_restart = rb_machine_restart; + _machine_halt = rb_machine_halt; + pm_power_off = rb_machine_halt; + + set_io_port_base(KSEG1); + + pci_reg = ioremap_nocache(pci0_res[0].start, + pci0_res[0].end - pci0_res[0].start); + if (!pci_reg) { + printk(KERN_ERR "Could not remap PCI registers\n"); + return; + } + + val = __raw_readl(&pci_reg->pcic); + val &= 0xFFFFFF7; + __raw_writel(val, (void *)&pci_reg->pcic); + +#ifdef CONFIG_PCI + /* Enable PCI interrupts in EPLD Mask register */ + *epld_mask = 0x0; + *(epld_mask + 1) = 0x0; +#endif + write_c0_wired(0); +} + +const char *get_system_type(void) +{ + switch (mips_machtype) { + case MACH_MIKROTIK_RB532A: + return "Mikrotik RB532A"; + break; + default: + return "Mikrotik RB532"; + break; + } +} diff --git a/arch/mips/rb532/time.c b/arch/mips/rb532/time.c new file mode 100644 index 00000000000..db74edf8cef --- /dev/null +++ b/arch/mips/rb532/time.c @@ -0,0 +1,67 @@ +/* + * Carsten Langgaard, carstenl@mips.com + * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Setting up the clock on the MIPS boards. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +extern unsigned int idt_cpu_freq; + +/* + * Figure out the r4k offset, the amount to increment the compare + * register for each time tick. There is no RTC available. + * + * The RC32434 counts at half the CPU *core* speed. + */ +static unsigned long __init cal_r4koff(void) +{ + mips_hpt_frequency = idt_cpu_freq * IDT_CLOCK_MULT / 2; + + return mips_hpt_frequency / HZ; +} + +void __init plat_time_init(void) +{ + unsigned int est_freq, flags; + unsigned long r4k_offset; + + local_irq_save(flags); + + printk(KERN_INFO "calculating r4koff... "); + r4k_offset = cal_r4koff(); + printk("%08lx(%d)\n", r4k_offset, (int) r4k_offset); + + est_freq = 2 * r4k_offset * HZ; + est_freq += 5000; /* round */ + est_freq -= est_freq % 10000; + printk(KERN_INFO "CPU frequency %d.%02d MHz\n", est_freq / 1000000, + (est_freq % 1000000) * 100 / 1000000); + local_irq_restore(flags); +} -- cgit v1.2.3 From 8d795f2a5cf73338a467ac82bdeb73225e987c45 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 18 Jul 2008 00:43:48 +0900 Subject: [MIPS] TXx9: Miscellaneous build fixes * Fix build if only RBTX4927 or RBTX4938 was selected. * Move gpio helpers to generic part. * Select SOC_TX4938 for RBTX4927/37 board. * Fix parent of rbtx4938_fpga_resource. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/txx9/Kconfig | 2 ++ arch/mips/txx9/generic/setup.c | 20 ++++++++++++++++++++ arch/mips/txx9/rbtx4938/setup.c | 14 +------------- 3 files changed, 23 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig index b92a134ef12..6de4c5aa92b 100644 --- a/arch/mips/txx9/Kconfig +++ b/arch/mips/txx9/Kconfig @@ -7,6 +7,8 @@ config TOSHIBA_RBTX4927 bool "Toshiba RBTX49[23]7 board" depends on MACH_TX49XX select SOC_TX4927 + # TX4937 is subset of TX4938 + select SOC_TX4938 help This Toshiba board is based on the TX4927 processor. Say Y here to support this machine type diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 5afc5d5cab0..8caef07701b 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -94,6 +94,22 @@ void clk_put(struct clk *clk) } EXPORT_SYMBOL(clk_put); +/* GPIO support */ + +#ifdef CONFIG_GENERIC_GPIO +int gpio_to_irq(unsigned gpio) +{ + return -EINVAL; +} +EXPORT_SYMBOL(gpio_to_irq); + +int irq_to_gpio(unsigned irq) +{ + return -EINVAL; +} +EXPORT_SYMBOL(irq_to_gpio); +#endif + extern struct txx9_board_vec jmr3927_vec; extern struct txx9_board_vec rbtx4927_vec; extern struct txx9_board_vec rbtx4937_vec; @@ -126,15 +142,19 @@ void __init prom_init(void) #endif #ifdef CONFIG_CPU_TX49XX switch (TX4938_REV_PCODE()) { +#ifdef CONFIG_TOSHIBA_RBTX4927 case 0x4927: txx9_board_vec = &rbtx4927_vec; break; case 0x4937: txx9_board_vec = &rbtx4937_vec; break; +#endif +#ifdef CONFIG_TOSHIBA_RBTX4938 case 0x4938: txx9_board_vec = &rbtx4938_vec; break; +#endif } #endif diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c index aaa987ae0f8..c2da92396b7 100644 --- a/arch/mips/txx9/rbtx4938/setup.c +++ b/arch/mips/txx9/rbtx4938/setup.c @@ -457,7 +457,7 @@ static void __init rbtx4938_mem_setup(void) rbtx4938_fpga_resource.start = CPHYSADDR(RBTX4938_FPGA_REG_ADDR); rbtx4938_fpga_resource.end = CPHYSADDR(RBTX4938_FPGA_REG_ADDR) + 0xffff; rbtx4938_fpga_resource.flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, &rbtx4938_fpga_resource)) + if (request_resource(&txx9_ce_res[2], &rbtx4938_fpga_resource)) printk("request resource for fpga failed\n"); _machine_restart = rbtx4938_machine_restart; @@ -488,18 +488,6 @@ static int __init rbtx4938_ne_init(void) return IS_ERR(dev) ? PTR_ERR(dev) : 0; } -/* GPIO support */ - -int gpio_to_irq(unsigned gpio) -{ - return -EINVAL; -} - -int irq_to_gpio(unsigned irq) -{ - return -EINVAL; -} - static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock); static void rbtx4938_spi_gpio_set(struct gpio_chip *chip, unsigned int offset, -- cgit v1.2.3 From fc22c3571c86cc36f4eb29336ce40c04a666ee98 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 16 Jul 2008 19:25:40 +0300 Subject: [MIPS] mips/sgi-ip22/ip28-berr.c: fix the build Commit 3e6ea3b0d7a93550a93a265e732413d3a5aaf0d2 (linux-mips.org) / 52f4f6bbcff5510f662a002ec1219660ea25af62 (kernel.org) ([MIPS] Use kernel-supplied ARRAY_SIZE() macro.) causes the following compile error: <-- snip --> ... CC arch/mips/sgi-ip22/ip28-berr.o /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c: In function 'ip28_be_interrupt': /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:415: error: subscripted value is neither array nor pointer /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:415: error: subscripted value is neither array nor pointer /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:415: warning: type defaults to 'int' in declaration of 'type name' /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:424: error: subscripted value is neither array nor pointer /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:424: error: subscripted value is neither array nor pointer /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/mips/sgi-ip22/ip28-berr.c:424: warning: type defaults to 'int' in declaration of 'type name' make[2]: *** [arch/mips/sgi-ip22/ip28-berr.o] Error 1 <-- snip --> Using ARRAY_SIZE in these places in arch/mips/sgi-ip22/ip28-berr.c was bogus, and therefore gets reverted by this patch. Signed-off-by: Adrian Bunk Signed-off-by: Ralf Baechle --- arch/mips/sgi-ip22/ip28-berr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index fee7a2e0e53..30e12e2ec4b 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -412,7 +412,7 @@ static int ip28_be_interrupt(const struct pt_regs *regs) * Now we have an asynchronous bus error, speculatively or DMA caused. * Need to search all DMA descriptors for the error address. */ - for (i = 0; i < ARRAY_SIZE(hpc3); ++i) { + for (i = 0; i < sizeof(hpc3)/sizeof(struct hpc3_stat); ++i) { struct hpc3_stat *hp = (struct hpc3_stat *)&hpc3 + i; if ((cpu_err_stat & CPU_ERRMASK) && (cpu_err_addr == hp->ndptr || cpu_err_addr == hp->cbp)) @@ -421,7 +421,7 @@ static int ip28_be_interrupt(const struct pt_regs *regs) (gio_err_addr == hp->ndptr || gio_err_addr == hp->cbp)) break; } - if (i < ARRAY_SIZE(hpc3)) { + if (i < sizeof(hpc3)/sizeof(struct hpc3_stat)) { struct hpc3_stat *hp = (struct hpc3_stat *)&hpc3 + i; printk(KERN_ERR "at DMA addresses: HPC3 @ %08lx:" " ctl %08x, ndp %08x, cbp %08x\n", -- cgit v1.2.3 From b5d5accc7a2eb41f43ef346f3b258ba2f6342a1c Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Fri, 18 Jul 2008 23:03:15 +0900 Subject: [MIPS] Cobalt: Fix I/O port resource range LCD and buttons don't use I/O port space. Signed-off-by: Yoichi Yuasa Signed-off-by: Ralf Baechle --- arch/mips/cobalt/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/cobalt/setup.c b/arch/mips/cobalt/setup.c index dd23beb8604..b5164422724 100644 --- a/arch/mips/cobalt/setup.c +++ b/arch/mips/cobalt/setup.c @@ -81,8 +81,8 @@ void __init plat_mem_setup(void) set_io_port_base(CKSEG1ADDR(GT_DEF_PCI0_IO_BASE)); - /* I/O port resource must include LCD/buttons */ - ioport_resource.end = 0x0fffffff; + /* I/O port resource */ + ioport_resource.end = 0x01ffffff; /* These resources have been reserved by VIA SuperI/O chip. */ for (i = 0; i < ARRAY_SIZE(cobalt_reserved_resources); i++) -- cgit v1.2.3 From 255033a9bb900a06c9a7798908ce12557d24fb66 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Sat, 19 Jul 2008 01:51:41 +0900 Subject: [MIPS] TXx9: Cleanups for 64-bit support * Unify (and fix) mem_tx4938.c and mem_tx4927.c * Simplify prom_init * Kill volatiles and unused definitions for tx4927.h and tx4938.h Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/txx9/generic/Makefile | 2 +- arch/mips/txx9/generic/mem_tx4927.c | 94 +++++---------------------- arch/mips/txx9/generic/mem_tx4938.c | 124 ------------------------------------ arch/mips/txx9/rbtx4927/prom.c | 6 +- arch/mips/txx9/rbtx4938/prom.c | 6 +- arch/mips/txx9/rbtx4938/setup.c | 11 ++-- 6 files changed, 24 insertions(+), 219 deletions(-) delete mode 100644 arch/mips/txx9/generic/mem_tx4938.c (limited to 'arch') diff --git a/arch/mips/txx9/generic/Makefile b/arch/mips/txx9/generic/Makefile index 668fdaad644..ab274ede9a7 100644 --- a/arch/mips/txx9/generic/Makefile +++ b/arch/mips/txx9/generic/Makefile @@ -5,7 +5,7 @@ obj-y += setup.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_SOC_TX4927) += mem_tx4927.o irq_tx4927.o -obj-$(CONFIG_SOC_TX4938) += mem_tx4938.o irq_tx4938.o +obj-$(CONFIG_SOC_TX4938) += mem_tx4927.o irq_tx4938.o obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o obj-$(CONFIG_KGDB) += dbgio.o diff --git a/arch/mips/txx9/generic/mem_tx4927.c b/arch/mips/txx9/generic/mem_tx4927.c index 12dfc377bf2..ef6ea6e9787 100644 --- a/arch/mips/txx9/generic/mem_tx4927.c +++ b/arch/mips/txx9/generic/mem_tx4927.c @@ -1,5 +1,5 @@ /* - * linux/arch/mips/tx4927/common/tx4927_prom.c + * linux/arch/mips/txx9/generic/mem_tx4927.c * * common tx4927 memory interface * @@ -32,8 +32,9 @@ #include #include #include +#include -static unsigned int __init tx4927_process_sdccr(unsigned long addr) +static unsigned int __init tx4927_process_sdccr(u64 __iomem *addr) { u64 val; unsigned int sdccr_ce; @@ -45,97 +46,32 @@ static unsigned int __init tx4927_process_sdccr(unsigned long addr) unsigned int rs = 0; unsigned int cs = 0; unsigned int mw = 0; - unsigned int msize = 0; - val = __raw_readq((void __iomem *)addr); + val = __raw_readq(addr); /* MVMCP -- need #defs for these bits masks */ sdccr_ce = ((val & (1 << 10)) >> 10); sdccr_bs = ((val & (1 << 8)) >> 8); sdccr_rs = ((val & (3 << 5)) >> 5); - sdccr_cs = ((val & (3 << 2)) >> 2); + sdccr_cs = ((val & (7 << 2)) >> 2); sdccr_mw = ((val & (1 << 0)) >> 0); if (sdccr_ce) { - switch (sdccr_bs) { - case 0:{ - bs = 2; - break; - } - case 1:{ - bs = 4; - break; - } - } - switch (sdccr_rs) { - case 0:{ - rs = 2048; - break; - } - case 1:{ - rs = 4096; - break; - } - case 2:{ - rs = 8192; - break; - } - case 3:{ - rs = 0; - break; - } - } - switch (sdccr_cs) { - case 0:{ - cs = 256; - break; - } - case 1:{ - cs = 512; - break; - } - case 2:{ - cs = 1024; - break; - } - case 3:{ - cs = 2048; - break; - } - } - switch (sdccr_mw) { - case 0:{ - mw = 8; - break; - } /* 8 bytes = 64 bits */ - case 1:{ - mw = 4; - break; - } /* 4 bytes = 32 bits */ - } + bs = 2 << sdccr_bs; + rs = 2048 << sdccr_rs; + cs = 256 << sdccr_cs; + mw = 8 >> sdccr_mw; } - /* bytes per chip MB per chip num chips */ - msize = (((rs * cs * mw) / (1024 * 1024)) * bs); - - return (msize); + return rs * cs * mw * bs; } - unsigned int __init tx4927_get_mem_size(void) { - unsigned int c0; - unsigned int c1; - unsigned int c2; - unsigned int c3; - unsigned int total; - - /* MVMCP -- need #defs for these registers */ - c0 = tx4927_process_sdccr(0xff1f8000); - c1 = tx4927_process_sdccr(0xff1f8008); - c2 = tx4927_process_sdccr(0xff1f8010); - c3 = tx4927_process_sdccr(0xff1f8018); - total = c0 + c1 + c2 + c3; + unsigned int total = 0; + int i; - return (total); + for (i = 0; i < ARRAY_SIZE(tx4927_sdramcptr->cr); i++) + total += tx4927_process_sdccr(&tx4927_sdramcptr->cr[i]); + return total; } diff --git a/arch/mips/txx9/generic/mem_tx4938.c b/arch/mips/txx9/generic/mem_tx4938.c deleted file mode 100644 index 20baeaeba4c..00000000000 --- a/arch/mips/txx9/generic/mem_tx4938.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * linux/arch/mips/tx4938/common/prom.c - * - * common tx4938 memory interface - * Copyright (C) 2000-2001 Toshiba Corporation - * - * 2003-2005 (c) MontaVista Software, Inc. This file is licensed under the - * terms of the GNU General Public License version 2. This program is - * licensed "as is" without any warranty of any kind, whether express - * or implied. - * - * Support for TX4938 in 2.6 - Manish Lachwani (mlachwani@mvista.com) - */ - -#include -#include -#include - -static unsigned int __init -tx4938_process_sdccr(u64 * addr) -{ - u64 val; - unsigned int sdccr_ce; - unsigned int sdccr_rs; - unsigned int sdccr_cs; - unsigned int sdccr_mw; - unsigned int rs = 0; - unsigned int cs = 0; - unsigned int mw = 0; - unsigned int bc = 4; - unsigned int msize = 0; - - val = ____raw_readq((void __iomem *)addr); - - /* MVMCP -- need #defs for these bits masks */ - sdccr_ce = ((val & (1 << 10)) >> 10); - sdccr_rs = ((val & (3 << 5)) >> 5); - sdccr_cs = ((val & (7 << 2)) >> 2); - sdccr_mw = ((val & (1 << 0)) >> 0); - - if (sdccr_ce) { - switch (sdccr_rs) { - case 0:{ - rs = 2048; - break; - } - case 1:{ - rs = 4096; - break; - } - case 2:{ - rs = 8192; - break; - } - default:{ - rs = 0; - break; - } - } - switch (sdccr_cs) { - case 0:{ - cs = 256; - break; - } - case 1:{ - cs = 512; - break; - } - case 2:{ - cs = 1024; - break; - } - case 3:{ - cs = 2048; - break; - } - case 4:{ - cs = 4096; - break; - } - default:{ - cs = 0; - break; - } - } - switch (sdccr_mw) { - case 0:{ - mw = 8; - break; - } /* 8 bytes = 64 bits */ - case 1:{ - mw = 4; - break; - } /* 4 bytes = 32 bits */ - } - } - - /* bytes per chip MB per chip bank count */ - msize = (((rs * cs * mw) / (1024 * 1024)) * (bc)); - - /* MVMCP -- bc hard coded to 4 from table 9.3.1 */ - /* boad supports bc=2 but no way to detect */ - - return (msize); -} - -unsigned int __init -tx4938_get_mem_size(void) -{ - unsigned int c0; - unsigned int c1; - unsigned int c2; - unsigned int c3; - unsigned int total; - - /* MVMCP -- need #defs for these registers */ - c0 = tx4938_process_sdccr((u64 *) 0xff1f8000); - c1 = tx4938_process_sdccr((u64 *) 0xff1f8008); - c2 = tx4938_process_sdccr((u64 *) 0xff1f8010); - c3 = tx4938_process_sdccr((u64 *) 0xff1f8018); - total = c0 + c1 + c2 + c3; - - return (total); -} diff --git a/arch/mips/txx9/rbtx4927/prom.c b/arch/mips/txx9/rbtx4927/prom.c index 942e627d2dc..5c0de54ebdd 100644 --- a/arch/mips/txx9/rbtx4927/prom.c +++ b/arch/mips/txx9/rbtx4927/prom.c @@ -36,10 +36,6 @@ void __init rbtx4927_prom_init(void) { - extern int tx4927_get_mem_size(void); - int msize; - prom_init_cmdline(); - msize = tx4927_get_mem_size(); - add_memory_region(0, msize << 20, BOOT_MEM_RAM); + add_memory_region(0, tx4927_get_mem_size(), BOOT_MEM_RAM); } diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c index fbb37458ddb..ee189519ce5 100644 --- a/arch/mips/txx9/rbtx4938/prom.c +++ b/arch/mips/txx9/rbtx4938/prom.c @@ -18,12 +18,8 @@ void __init rbtx4938_prom_init(void) { - extern int tx4938_get_mem_size(void); - int msize; #ifndef CONFIG_TX4938_NAND_BOOT prom_init_cmdline(); #endif - - msize = tx4938_get_mem_size(); - add_memory_region(0, msize << 20, BOOT_MEM_RAM); + add_memory_region(0, tx4938_get_mem_size(), BOOT_MEM_RAM); } diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c index c2da92396b7..c1e076c7b2d 100644 --- a/arch/mips/txx9/rbtx4938/setup.c +++ b/arch/mips/txx9/rbtx4938/setup.c @@ -310,7 +310,7 @@ void __init tx4938_board_setup(void) printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str); for (i = 0; i < 4; i++) { - unsigned long long cr = tx4938_sdramcptr->cr[i]; + u64 cr = TX4938_SDRAMC_CR(i); unsigned long ram_base, ram_size; if (!((unsigned long)cr & 0x00000400)) continue; /* disabled */ @@ -318,20 +318,21 @@ void __init tx4938_board_setup(void) ram_size = ((unsigned long)(cr >> 33) + 1) << 21; if (ram_base >= 0x20000000) continue; /* high memory (ignore) */ - printk(" CR%d:%016Lx", i, cr); + printk(KERN_CONT " CR%d:%016llx", i, cr); tx4938_sdram_resource[i].name = "SDRAM"; tx4938_sdram_resource[i].start = ram_base; tx4938_sdram_resource[i].end = ram_base + ram_size - 1; tx4938_sdram_resource[i].flags = IORESOURCE_MEM; request_resource(&iomem_resource, &tx4938_sdram_resource[i]); } - printk(" TR:%09Lx\n", tx4938_sdramcptr->tr); + printk(KERN_CONT " TR:%09llx\n", ____raw_readq(&tx4938_sdramcptr->tr)); /* SRAM */ - if (tx4938_sramcptr->cr & 1) { + if (____raw_readq(&tx4938_sramcptr->cr) & 1) { unsigned int size = 0x800; unsigned long base = - (tx4938_sramcptr->cr >> (39-11)) & ~(size - 1); + (____raw_readq(&tx4938_sramcptr->cr) >> (39-11)) + & ~(size - 1); tx4938_sram_resource.name = "SRAM"; tx4938_sram_resource.start = base; tx4938_sram_resource.end = base + size - 1; -- cgit v1.2.3 From 94a4c32939dede9328c6e4face335eb8441fc18d Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Sat, 19 Jul 2008 01:51:47 +0900 Subject: [MIPS] TXx9: Add 64-bit support SYS_SUPPORTS_64BIT_KERNEL is enabled for RBTX4927/RBTX4938, but actually it was broken for long time (or from the beginning). Now it should work. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/txx9/generic/Makefile | 4 +- arch/mips/txx9/generic/irq_tx4927.c | 2 +- arch/mips/txx9/generic/irq_tx4938.c | 2 +- arch/mips/txx9/generic/setup.c | 16 ++- arch/mips/txx9/generic/setup_tx4927.c | 194 +++++++++++++++++++++++++ arch/mips/txx9/generic/setup_tx4938.c | 259 ++++++++++++++++++++++++++++++++++ arch/mips/txx9/jmr3927/setup.c | 8 -- arch/mips/txx9/rbtx4927/irq.c | 12 +- arch/mips/txx9/rbtx4927/setup.c | 89 ++++-------- arch/mips/txx9/rbtx4938/setup.c | 218 ++-------------------------- 10 files changed, 510 insertions(+), 294 deletions(-) create mode 100644 arch/mips/txx9/generic/setup_tx4927.c create mode 100644 arch/mips/txx9/generic/setup_tx4938.c (limited to 'arch') diff --git a/arch/mips/txx9/generic/Makefile b/arch/mips/txx9/generic/Makefile index ab274ede9a7..9c120771e65 100644 --- a/arch/mips/txx9/generic/Makefile +++ b/arch/mips/txx9/generic/Makefile @@ -4,8 +4,8 @@ obj-y += setup.o obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_SOC_TX4927) += mem_tx4927.o irq_tx4927.o -obj-$(CONFIG_SOC_TX4938) += mem_tx4927.o irq_tx4938.o +obj-$(CONFIG_SOC_TX4927) += mem_tx4927.o setup_tx4927.o irq_tx4927.o +obj-$(CONFIG_SOC_TX4938) += mem_tx4927.o setup_tx4938.o irq_tx4938.o obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o obj-$(CONFIG_KGDB) += dbgio.o diff --git a/arch/mips/txx9/generic/irq_tx4927.c b/arch/mips/txx9/generic/irq_tx4927.c index 6377bd8a905..cbea1fdde82 100644 --- a/arch/mips/txx9/generic/irq_tx4927.c +++ b/arch/mips/txx9/generic/irq_tx4927.c @@ -31,7 +31,7 @@ void __init tx4927_irq_init(void) { mips_cpu_irq_init(); - txx9_irq_init(TX4927_IRC_REG); + txx9_irq_init(TX4927_IRC_REG & 0xfffffffffULL); set_irq_chained_handler(MIPS_CPU_IRQ_BASE + TX4927_IRC_INT, handle_simple_irq); } diff --git a/arch/mips/txx9/generic/irq_tx4938.c b/arch/mips/txx9/generic/irq_tx4938.c index 5fc86c9c9d2..6eac684bf19 100644 --- a/arch/mips/txx9/generic/irq_tx4938.c +++ b/arch/mips/txx9/generic/irq_tx4938.c @@ -19,7 +19,7 @@ void __init tx4938_irq_init(void) { mips_cpu_irq_init(); - txx9_irq_init(TX4938_IRC_REG); + txx9_irq_init(TX4938_IRC_REG & 0xfffffffffULL); set_irq_chained_handler(MIPS_CPU_IRQ_BASE + TX4938_IRC_INT, handle_simple_irq); } diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 8caef07701b..3715a8f5ea4 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -30,6 +30,7 @@ struct resource txx9_ce_res[8]; static char txx9_ce_res_name[8][4]; /* "CEn" */ /* pcode, internal register */ +unsigned int txx9_pcode; char txx9_pcode_str[8]; static struct resource txx9_reg_res = { .name = txx9_pcode_str, @@ -59,15 +60,16 @@ unsigned int txx9_master_clock; unsigned int txx9_cpu_clock; unsigned int txx9_gbus_clock; +int txx9_ccfg_toeon __initdata = 1; /* Minimum CLK support */ struct clk *clk_get(struct device *dev, const char *id) { if (!strcmp(id, "spi-baseclk")) - return (struct clk *)(txx9_gbus_clock / 2 / 4); + return (struct clk *)((unsigned long)txx9_gbus_clock / 2 / 4); if (!strcmp(id, "imbus_clk")) - return (struct clk *)(txx9_gbus_clock / 2); + return (struct clk *)((unsigned long)txx9_gbus_clock / 2); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL(clk_get); @@ -123,6 +125,12 @@ void __init prom_init_cmdline(void) int argc = (int)fw_arg0; char **argv = (char **)fw_arg1; int i; /* Always ignore the "-c" at argv[0] */ +#ifdef CONFIG_64BIT + char *fixed_argv[32]; + for (i = 0; i < argc; i++) + fixed_argv[i] = (char *)(long)(*((__s32 *)argv + i)); + argv = fixed_argv; +#endif /* ignore all built-in args if any f/w args given */ if (argc > 1) @@ -180,6 +188,10 @@ char * __init prom_getcmdline(void) /* wrappers */ void __init plat_mem_setup(void) { + ioport_resource.start = 0; + ioport_resource.end = ~0UL; /* no limit */ + iomem_resource.start = 0; + iomem_resource.end = ~0UL; /* no limit */ txx9_board_vec->mem_setup(); } diff --git a/arch/mips/txx9/generic/setup_tx4927.c b/arch/mips/txx9/generic/setup_tx4927.c new file mode 100644 index 00000000000..89d6e28add9 --- /dev/null +++ b/arch/mips/txx9/generic/setup_tx4927.c @@ -0,0 +1,194 @@ +/* + * TX4927 setup routines + * Based on linux/arch/mips/txx9/rbtx4938/setup.c, + * and RBTX49xx patch from CELF patch archive. + * + * 2003-2005 (c) MontaVista Software, Inc. + * (C) Copyright TOSHIBA CORPORATION 2000-2001, 2004-2007 + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void __init tx4927_wdr_init(void) +{ + /* clear WatchDogReset (W1C) */ + tx4927_ccfg_set(TX4927_CCFG_WDRST); + /* do reset on watchdog */ + tx4927_ccfg_set(TX4927_CCFG_WR); +} + +static struct resource tx4927_sdram_resource[4]; + +void __init tx4927_setup(void) +{ + int i; + __u32 divmode; + int cpuclk = 0; + u64 ccfg; + + txx9_reg_res_init(TX4927_REV_PCODE(), TX4927_REG_BASE, + TX4927_REG_SIZE); + + /* SDRAMC,EBUSC are configured by PROM */ + for (i = 0; i < 8; i++) { + if (!(TX4927_EBUSC_CR(i) & 0x8)) + continue; /* disabled */ + txx9_ce_res[i].start = (unsigned long)TX4927_EBUSC_BA(i); + txx9_ce_res[i].end = + txx9_ce_res[i].start + TX4927_EBUSC_SIZE(i) - 1; + request_resource(&iomem_resource, &txx9_ce_res[i]); + } + + /* clocks */ + ccfg = ____raw_readq(&tx4927_ccfgptr->ccfg); + if (txx9_master_clock) { + /* calculate gbus_clock and cpu_clock from master_clock */ + divmode = (__u32)ccfg & TX4927_CCFG_DIVMODE_MASK; + switch (divmode) { + case TX4927_CCFG_DIVMODE_8: + case TX4927_CCFG_DIVMODE_10: + case TX4927_CCFG_DIVMODE_12: + case TX4927_CCFG_DIVMODE_16: + txx9_gbus_clock = txx9_master_clock * 4; break; + default: + txx9_gbus_clock = txx9_master_clock; + } + switch (divmode) { + case TX4927_CCFG_DIVMODE_2: + case TX4927_CCFG_DIVMODE_8: + cpuclk = txx9_gbus_clock * 2; break; + case TX4927_CCFG_DIVMODE_2_5: + case TX4927_CCFG_DIVMODE_10: + cpuclk = txx9_gbus_clock * 5 / 2; break; + case TX4927_CCFG_DIVMODE_3: + case TX4927_CCFG_DIVMODE_12: + cpuclk = txx9_gbus_clock * 3; break; + case TX4927_CCFG_DIVMODE_4: + case TX4927_CCFG_DIVMODE_16: + cpuclk = txx9_gbus_clock * 4; break; + } + txx9_cpu_clock = cpuclk; + } else { + if (txx9_cpu_clock == 0) + txx9_cpu_clock = 200000000; /* 200MHz */ + /* calculate gbus_clock and master_clock from cpu_clock */ + cpuclk = txx9_cpu_clock; + divmode = (__u32)ccfg & TX4927_CCFG_DIVMODE_MASK; + switch (divmode) { + case TX4927_CCFG_DIVMODE_2: + case TX4927_CCFG_DIVMODE_8: + txx9_gbus_clock = cpuclk / 2; break; + case TX4927_CCFG_DIVMODE_2_5: + case TX4927_CCFG_DIVMODE_10: + txx9_gbus_clock = cpuclk * 2 / 5; break; + case TX4927_CCFG_DIVMODE_3: + case TX4927_CCFG_DIVMODE_12: + txx9_gbus_clock = cpuclk / 3; break; + case TX4927_CCFG_DIVMODE_4: + case TX4927_CCFG_DIVMODE_16: + txx9_gbus_clock = cpuclk / 4; break; + } + switch (divmode) { + case TX4927_CCFG_DIVMODE_8: + case TX4927_CCFG_DIVMODE_10: + case TX4927_CCFG_DIVMODE_12: + case TX4927_CCFG_DIVMODE_16: + txx9_master_clock = txx9_gbus_clock / 4; break; + default: + txx9_master_clock = txx9_gbus_clock; + } + } + /* change default value to udelay/mdelay take reasonable time */ + loops_per_jiffy = txx9_cpu_clock / HZ / 2; + + /* CCFG */ + tx4927_wdr_init(); + /* clear BusErrorOnWrite flag (W1C) */ + tx4927_ccfg_set(TX4927_CCFG_BEOW); + /* enable Timeout BusError */ + if (txx9_ccfg_toeon) + tx4927_ccfg_set(TX4927_CCFG_TOE); + + /* DMA selection */ + txx9_clear64(&tx4927_ccfgptr->pcfg, TX4927_PCFG_DMASEL_ALL); + + /* Use external clock for external arbiter */ + if (!(____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_PCIARB)) + txx9_clear64(&tx4927_ccfgptr->pcfg, TX4927_PCFG_PCICLKEN_ALL); + + printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n", + txx9_pcode_str, + (cpuclk + 500000) / 1000000, + (txx9_master_clock + 500000) / 1000000, + (__u32)____raw_readq(&tx4927_ccfgptr->crir), + (unsigned long long)____raw_readq(&tx4927_ccfgptr->ccfg), + (unsigned long long)____raw_readq(&tx4927_ccfgptr->pcfg)); + + printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str); + for (i = 0; i < 4; i++) { + __u64 cr = TX4927_SDRAMC_CR(i); + unsigned long base, size; + if (!((__u32)cr & 0x00000400)) + continue; /* disabled */ + base = (unsigned long)(cr >> 49) << 21; + size = (((unsigned long)(cr >> 33) & 0x7fff) + 1) << 21; + printk(" CR%d:%016llx", i, (unsigned long long)cr); + tx4927_sdram_resource[i].name = "SDRAM"; + tx4927_sdram_resource[i].start = base; + tx4927_sdram_resource[i].end = base + size - 1; + tx4927_sdram_resource[i].flags = IORESOURCE_MEM; + request_resource(&iomem_resource, &tx4927_sdram_resource[i]); + } + printk(" TR:%09llx\n", + (unsigned long long)____raw_readq(&tx4927_sdramcptr->tr)); + + /* TMR */ + /* disable all timers */ + for (i = 0; i < TX4927_NR_TMR; i++) + txx9_tmr_init(TX4927_TMR_REG(i) & 0xfffffffffULL); + + /* PIO */ + txx9_gpio_init(TX4927_PIO_REG & 0xfffffffffULL, 0, TX4927_NUM_PIO); + __raw_writel(0, &tx4927_pioptr->maskcpu); + __raw_writel(0, &tx4927_pioptr->maskext); +} + +void __init tx4927_time_init(unsigned int tmrnr) +{ + if (____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_TINTDIS) + txx9_clockevent_init(TX4927_TMR_REG(tmrnr) & 0xfffffffffULL, + TXX9_IRQ_BASE + TX4927_IR_TMR(tmrnr), + TXX9_IMCLK); +} + +void __init tx4927_setup_serial(void) +{ +#ifdef CONFIG_SERIAL_TXX9 + int i; + struct uart_port req; + + for (i = 0; i < 2; i++) { + memset(&req, 0, sizeof(req)); + req.line = i; + req.iotype = UPIO_MEM; + req.membase = (unsigned char __iomem *)TX4927_SIO_REG(i); + req.mapbase = TX4927_SIO_REG(i) & 0xfffffffffULL; + req.irq = TXX9_IRQ_BASE + TX4927_IR_SIO(i); + req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; + req.uartclk = TXX9_IMCLK; + early_serial_txx9_setup(&req); + } +#endif /* CONFIG_SERIAL_TXX9 */ +} diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c new file mode 100644 index 00000000000..317378d8579 --- /dev/null +++ b/arch/mips/txx9/generic/setup_tx4938.c @@ -0,0 +1,259 @@ +/* + * TX4938/4937 setup routines + * Based on linux/arch/mips/txx9/rbtx4938/setup.c, + * and RBTX49xx patch from CELF patch archive. + * + * 2003-2005 (c) MontaVista Software, Inc. + * (C) Copyright TOSHIBA CORPORATION 2000-2001, 2004-2007 + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void __init tx4938_wdr_init(void) +{ + /* clear WatchDogReset (W1C) */ + tx4938_ccfg_set(TX4938_CCFG_WDRST); + /* do reset on watchdog */ + tx4938_ccfg_set(TX4938_CCFG_WR); +} + +static struct resource tx4938_sdram_resource[4]; +static struct resource tx4938_sram_resource; + +#define TX4938_SRAM_SIZE 0x800 + +void __init tx4938_setup(void) +{ + int i; + __u32 divmode; + int cpuclk = 0; + u64 ccfg; + + txx9_reg_res_init(TX4938_REV_PCODE(), TX4938_REG_BASE, + TX4938_REG_SIZE); + + /* SDRAMC,EBUSC are configured by PROM */ + for (i = 0; i < 8; i++) { + if (!(TX4938_EBUSC_CR(i) & 0x8)) + continue; /* disabled */ + txx9_ce_res[i].start = (unsigned long)TX4938_EBUSC_BA(i); + txx9_ce_res[i].end = + txx9_ce_res[i].start + TX4938_EBUSC_SIZE(i) - 1; + request_resource(&iomem_resource, &txx9_ce_res[i]); + } + + /* clocks */ + ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg); + if (txx9_master_clock) { + /* calculate gbus_clock and cpu_clock from master_clock */ + divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK; + switch (divmode) { + case TX4938_CCFG_DIVMODE_8: + case TX4938_CCFG_DIVMODE_10: + case TX4938_CCFG_DIVMODE_12: + case TX4938_CCFG_DIVMODE_16: + case TX4938_CCFG_DIVMODE_18: + txx9_gbus_clock = txx9_master_clock * 4; break; + default: + txx9_gbus_clock = txx9_master_clock; + } + switch (divmode) { + case TX4938_CCFG_DIVMODE_2: + case TX4938_CCFG_DIVMODE_8: + cpuclk = txx9_gbus_clock * 2; break; + case TX4938_CCFG_DIVMODE_2_5: + case TX4938_CCFG_DIVMODE_10: + cpuclk = txx9_gbus_clock * 5 / 2; break; + case TX4938_CCFG_DIVMODE_3: + case TX4938_CCFG_DIVMODE_12: + cpuclk = txx9_gbus_clock * 3; break; + case TX4938_CCFG_DIVMODE_4: + case TX4938_CCFG_DIVMODE_16: + cpuclk = txx9_gbus_clock * 4; break; + case TX4938_CCFG_DIVMODE_4_5: + case TX4938_CCFG_DIVMODE_18: + cpuclk = txx9_gbus_clock * 9 / 2; break; + } + txx9_cpu_clock = cpuclk; + } else { + if (txx9_cpu_clock == 0) + txx9_cpu_clock = 300000000; /* 300MHz */ + /* calculate gbus_clock and master_clock from cpu_clock */ + cpuclk = txx9_cpu_clock; + divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK; + switch (divmode) { + case TX4938_CCFG_DIVMODE_2: + case TX4938_CCFG_DIVMODE_8: + txx9_gbus_clock = cpuclk / 2; break; + case TX4938_CCFG_DIVMODE_2_5: + case TX4938_CCFG_DIVMODE_10: + txx9_gbus_clock = cpuclk * 2 / 5; break; + case TX4938_CCFG_DIVMODE_3: + case TX4938_CCFG_DIVMODE_12: + txx9_gbus_clock = cpuclk / 3; break; + case TX4938_CCFG_DIVMODE_4: + case TX4938_CCFG_DIVMODE_16: + txx9_gbus_clock = cpuclk / 4; break; + case TX4938_CCFG_DIVMODE_4_5: + case TX4938_CCFG_DIVMODE_18: + txx9_gbus_clock = cpuclk * 2 / 9; break; + } + switch (divmode) { + case TX4938_CCFG_DIVMODE_8: + case TX4938_CCFG_DIVMODE_10: + case TX4938_CCFG_DIVMODE_12: + case TX4938_CCFG_DIVMODE_16: + case TX4938_CCFG_DIVMODE_18: + txx9_master_clock = txx9_gbus_clock / 4; break; + default: + txx9_master_clock = txx9_gbus_clock; + } + } + /* change default value to udelay/mdelay take reasonable time */ + loops_per_jiffy = txx9_cpu_clock / HZ / 2; + + /* CCFG */ + tx4938_wdr_init(); + /* clear BusErrorOnWrite flag (W1C) */ + tx4938_ccfg_set(TX4938_CCFG_BEOW); + /* enable Timeout BusError */ + if (txx9_ccfg_toeon) + tx4938_ccfg_set(TX4938_CCFG_TOE); + + /* DMA selection */ + txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_DMASEL_ALL); + + /* Use external clock for external arbiter */ + if (!(____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_PCIARB)) + txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_PCICLKEN_ALL); + + printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n", + txx9_pcode_str, + (cpuclk + 500000) / 1000000, + (txx9_master_clock + 500000) / 1000000, + (__u32)____raw_readq(&tx4938_ccfgptr->crir), + (unsigned long long)____raw_readq(&tx4938_ccfgptr->ccfg), + (unsigned long long)____raw_readq(&tx4938_ccfgptr->pcfg)); + + printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str); + for (i = 0; i < 4; i++) { + __u64 cr = TX4938_SDRAMC_CR(i); + unsigned long base, size; + if (!((__u32)cr & 0x00000400)) + continue; /* disabled */ + base = (unsigned long)(cr >> 49) << 21; + size = (((unsigned long)(cr >> 33) & 0x7fff) + 1) << 21; + printk(" CR%d:%016llx", i, (unsigned long long)cr); + tx4938_sdram_resource[i].name = "SDRAM"; + tx4938_sdram_resource[i].start = base; + tx4938_sdram_resource[i].end = base + size - 1; + tx4938_sdram_resource[i].flags = IORESOURCE_MEM; + request_resource(&iomem_resource, &tx4938_sdram_resource[i]); + } + printk(" TR:%09llx\n", + (unsigned long long)____raw_readq(&tx4938_sdramcptr->tr)); + + /* SRAM */ + if (txx9_pcode == 0x4938 && ____raw_readq(&tx4938_sramcptr->cr) & 1) { + unsigned int size = TX4938_SRAM_SIZE; + tx4938_sram_resource.name = "SRAM"; + tx4938_sram_resource.start = + (____raw_readq(&tx4938_sramcptr->cr) >> (39-11)) + & ~(size - 1); + tx4938_sram_resource.end = + tx4938_sram_resource.start + TX4938_SRAM_SIZE - 1; + tx4938_sram_resource.flags = IORESOURCE_MEM; + request_resource(&iomem_resource, &tx4938_sram_resource); + } + + /* TMR */ + /* disable all timers */ + for (i = 0; i < TX4938_NR_TMR; i++) + txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL); + + /* DMA */ + for (i = 0; i < 2; i++) + ____raw_writeq(TX4938_DMA_MCR_MSTEN, + (void __iomem *)(TX4938_DMA_REG(i) + 0x50)); + + /* PIO */ + txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, TX4938_NUM_PIO); + __raw_writel(0, &tx4938_pioptr->maskcpu); + __raw_writel(0, &tx4938_pioptr->maskext); + + if (txx9_pcode == 0x4938) { + __u64 pcfg = ____raw_readq(&tx4938_ccfgptr->pcfg); + /* set PCIC1 reset */ + txx9_set64(&tx4938_ccfgptr->clkctr, TX4938_CLKCTR_PCIC1RST); + if (pcfg & (TX4938_PCFG_ETH0_SEL | TX4938_PCFG_ETH1_SEL)) { + mdelay(1); /* at least 128 cpu clock */ + /* clear PCIC1 reset */ + txx9_clear64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_PCIC1RST); + } else { + printk(KERN_INFO "%s: stop PCIC1\n", txx9_pcode_str); + /* stop PCIC1 */ + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_PCIC1CKD); + } + if (!(pcfg & TX4938_PCFG_ETH0_SEL)) { + printk(KERN_INFO "%s: stop ETH0\n", txx9_pcode_str); + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_ETH0RST); + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_ETH0CKD); + } + if (!(pcfg & TX4938_PCFG_ETH1_SEL)) { + printk(KERN_INFO "%s: stop ETH1\n", txx9_pcode_str); + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_ETH1RST); + txx9_set64(&tx4938_ccfgptr->clkctr, + TX4938_CLKCTR_ETH1CKD); + } + } +} + +void __init tx4938_time_init(unsigned int tmrnr) +{ + if (____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_TINTDIS) + txx9_clockevent_init(TX4938_TMR_REG(tmrnr) & 0xfffffffffULL, + TXX9_IRQ_BASE + TX4938_IR_TMR(tmrnr), + TXX9_IMCLK); +} + +void __init tx4938_setup_serial(void) +{ +#ifdef CONFIG_SERIAL_TXX9 + int i; + struct uart_port req; + unsigned int ch_mask = 0; + + if (__raw_readq(&tx4938_ccfgptr->pcfg) & TX4938_PCFG_ETH0_SEL) + ch_mask |= 1 << 1; /* disable SIO1 by PCFG setting */ + for (i = 0; i < 2; i++) { + if ((1 << i) & ch_mask) + continue; + memset(&req, 0, sizeof(req)); + req.line = i; + req.iotype = UPIO_MEM; + req.membase = (unsigned char __iomem *)TX4938_SIO_REG(i); + req.mapbase = TX4938_SIO_REG(i) & 0xfffffffffULL; + req.irq = TXX9_IRQ_BASE + TX4938_IR_SIO(i); + req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; + req.uartclk = TXX9_IMCLK; + early_serial_txx9_setup(&req); + } +#endif /* CONFIG_SERIAL_TXX9 */ +} diff --git a/arch/mips/txx9/jmr3927/setup.c b/arch/mips/txx9/jmr3927/setup.c index 5e35ef73c5a..03647ebe413 100644 --- a/arch/mips/txx9/jmr3927/setup.c +++ b/arch/mips/txx9/jmr3927/setup.c @@ -105,14 +105,6 @@ static void __init jmr3927_mem_setup(void) _machine_halt = jmr3927_machine_halt; pm_power_off = jmr3927_machine_power_off; - /* - * IO/MEM resources. - */ - ioport_resource.start = 0; - ioport_resource.end = 0xffffffff; - iomem_resource.start = 0; - iomem_resource.end = 0xffffffff; - /* Reboot on panic */ panic_timeout = 180; diff --git a/arch/mips/txx9/rbtx4927/irq.c b/arch/mips/txx9/rbtx4927/irq.c index 70f13211bc2..cd748a93032 100644 --- a/arch/mips/txx9/rbtx4927/irq.c +++ b/arch/mips/txx9/rbtx4927/irq.c @@ -126,14 +126,12 @@ static struct irq_chip toshiba_rbtx4927_irq_ioc_type = { .mask_ack = toshiba_rbtx4927_irq_ioc_disable, .unmask = toshiba_rbtx4927_irq_ioc_enable, }; -#define TOSHIBA_RBTX4927_IOC_INTR_ENAB (void __iomem *)0xbc002000UL -#define TOSHIBA_RBTX4927_IOC_INTR_STAT (void __iomem *)0xbc002006UL static int toshiba_rbtx4927_irq_nested(int sw_irq) { u8 level3; - level3 = readb(TOSHIBA_RBTX4927_IOC_INTR_STAT) & 0x1f; + level3 = readb(rbtx4927_imstat_addr) & 0x1f; if (level3) sw_irq = RBTX4927_IRQ_IOC + fls(level3) - 1; return (sw_irq); @@ -154,18 +152,18 @@ static void toshiba_rbtx4927_irq_ioc_enable(unsigned int irq) { unsigned char v; - v = readb(TOSHIBA_RBTX4927_IOC_INTR_ENAB); + v = readb(rbtx4927_imask_addr); v |= (1 << (irq - RBTX4927_IRQ_IOC)); - writeb(v, TOSHIBA_RBTX4927_IOC_INTR_ENAB); + writeb(v, rbtx4927_imask_addr); } static void toshiba_rbtx4927_irq_ioc_disable(unsigned int irq) { unsigned char v; - v = readb(TOSHIBA_RBTX4927_IOC_INTR_ENAB); + v = readb(rbtx4927_imask_addr); v &= ~(1 << (irq - RBTX4927_IRQ_IOC)); - writeb(v, TOSHIBA_RBTX4927_IOC_INTR_ENAB); + writeb(v, rbtx4927_imask_addr); mmiowb(); } diff --git a/arch/mips/txx9/rbtx4927/setup.c b/arch/mips/txx9/rbtx4927/setup.c index 1657fd935da..3da20ea3e55 100644 --- a/arch/mips/txx9/rbtx4927/setup.c +++ b/arch/mips/txx9/rbtx4927/setup.c @@ -53,17 +53,10 @@ #include #include #include -#include -#include #include #include #include #include /* for TX4937 */ -#ifdef CONFIG_SERIAL_TXX9 -#include -#endif - -static int tx4927_ccfg_toeon = 1; #ifdef CONFIG_PCI static void __init tx4927_pci_setup(void) @@ -184,14 +177,14 @@ static void toshiba_rbtx4927_restart(char *command) printk(KERN_NOTICE "System Rebooting...\n"); /* enable the s/w reset register */ - writeb(RBTX4927_SW_RESET_ENABLE_SET, RBTX4927_SW_RESET_ENABLE); + writeb(1, rbtx4927_softresetlock_addr); /* wait for enable to be seen */ - while ((readb(RBTX4927_SW_RESET_ENABLE) & - RBTX4927_SW_RESET_ENABLE_SET) == 0x00); + while (!(readb(rbtx4927_softresetlock_addr) & 1)) + ; /* do a s/w reset */ - writeb(RBTX4927_SW_RESET_DO_SET, RBTX4927_SW_RESET_DO); + writeb(1, rbtx4927_softreset_addr); /* do something passive while waiting for reset */ local_irq_disable(); @@ -213,9 +206,11 @@ static void toshiba_rbtx4927_power_off(void) /* no return */ } +static void __init rbtx4927_clock_init(void); +static void __init rbtx4937_clock_init(void); + static void __init rbtx4927_mem_setup(void) { - int i; u32 cp0_config; char *argptr; @@ -227,16 +222,18 @@ static void __init rbtx4927_mem_setup(void) cp0_config = cp0_config & ~(TX49_CONF_IC | TX49_CONF_DC); write_c0_config(cp0_config); - ioport_resource.end = 0xffffffff; - iomem_resource.end = 0xffffffff; + if (TX4927_REV_PCODE() == 0x4927) { + rbtx4927_clock_init(); + tx4927_setup(); + } else { + rbtx4937_clock_init(); + tx4938_setup(); + } _machine_restart = toshiba_rbtx4927_restart; _machine_halt = toshiba_rbtx4927_halt; pm_power_off = toshiba_rbtx4927_power_off; - for (i = 0; i < TX4927_NR_TMR; i++) - txx9_tmr_init(TX4927_TMR_REG(0) & 0xfffffffffULL); - #ifdef CONFIG_PCI txx9_alloc_pci_controller(&txx9_primary_pcic, RBTX4927_PCIMEM, RBTX4927_PCIMEM_SIZE, @@ -245,36 +242,13 @@ static void __init rbtx4927_mem_setup(void) set_io_port_base(KSEG1 + RBTX4927_ISA_IO_OFFSET); #endif - /* CCFG */ - /* do reset on watchdog */ - tx4927_ccfg_set(TX4927_CCFG_WR); - /* enable Timeout BusError */ - if (tx4927_ccfg_toeon) - tx4927_ccfg_set(TX4927_CCFG_TOE); - -#ifdef CONFIG_SERIAL_TXX9 - { - extern int early_serial_txx9_setup(struct uart_port *port); - struct uart_port req; - for(i = 0; i < 2; i++) { - memset(&req, 0, sizeof(req)); - req.line = i; - req.iotype = UPIO_MEM; - req.membase = (char *)(0xff1ff300 + i * 0x100); - req.mapbase = 0xff1ff300 + i * 0x100; - req.irq = TXX9_IRQ_BASE + TX4927_IR_SIO(i); - req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; - req.uartclk = 50000000; - early_serial_txx9_setup(&req); - } - } + tx4927_setup_serial(); #ifdef CONFIG_SERIAL_TXX9_CONSOLE argptr = prom_getcmdline(); if (strstr(argptr, "console=") == NULL) { strcat(argptr, " console=ttyS0,38400"); } #endif -#endif #ifdef CONFIG_ROOT_NFS argptr = prom_getcmdline(); @@ -291,19 +265,7 @@ static void __init rbtx4927_mem_setup(void) #endif } -static void __init rbtx49x7_common_time_init(void) -{ - /* change default value to udelay/mdelay take reasonable time */ - loops_per_jiffy = txx9_cpu_clock / HZ / 2; - - mips_hpt_frequency = txx9_cpu_clock / 2; - if (____raw_readq(&tx4927_ccfgptr->ccfg) & TX4927_CCFG_TINTDIS) - txx9_clockevent_init(TX4927_TMR_REG(0) & 0xfffffffffULL, - TXX9_IRQ_BASE + 17, - 50000000); -} - -static void __init rbtx4927_time_init(void) +static void __init rbtx4927_clock_init(void) { /* * ASSUMPTION: PCIDIVMODE is configured for PCI 33MHz or 66MHz. @@ -325,11 +287,9 @@ static void __init rbtx4927_time_init(void) default: txx9_cpu_clock = 200000000; /* 200MHz */ } - - rbtx49x7_common_time_init(); } -static void __init rbtx4937_time_init(void) +static void __init rbtx4937_clock_init(void) { /* * ASSUMPTION: PCIDIVMODE is configured for PCI 33MHz or 66MHz. @@ -357,15 +317,18 @@ static void __init rbtx4937_time_init(void) default: txx9_cpu_clock = 333333333; /* 333MHz */ } +} - rbtx49x7_common_time_init(); +static void __init rbtx4927_time_init(void) +{ + tx4927_time_init(0); } static int __init toshiba_rbtx4927_rtc_init(void) { - static struct resource __initdata res = { - .start = 0x1c010000, - .end = 0x1c010000 + 0x800 - 1, + struct resource res = { + .start = RBTX4927_BRAMRTC_BASE - IO_BASE, + .end = RBTX4927_BRAMRTC_BASE - IO_BASE + 0x800 - 1, .flags = IORESOURCE_MEM, }; struct platform_device *dev = @@ -375,7 +338,7 @@ static int __init toshiba_rbtx4927_rtc_init(void) static int __init rbtx4927_ne_init(void) { - static struct resource __initdata res[] = { + struct resource res[] = { { .start = RBTX4927_RTL_8019_BASE, .end = RBTX4927_RTL_8019_BASE + 0x20 - 1, @@ -434,7 +397,7 @@ struct txx9_board_vec rbtx4937_vec __initdata = { .prom_init = rbtx4927_prom_init, .mem_setup = rbtx4927_mem_setup, .irq_setup = rbtx4927_irq_setup, - .time_init = rbtx4937_time_init, + .time_init = rbtx4927_time_init, .device_init = rbtx4927_device_init, .arch_init = rbtx4937_arch_init, #ifdef CONFIG_PCI diff --git a/arch/mips/txx9/rbtx4938/setup.c b/arch/mips/txx9/rbtx4938/setup.c index c1e076c7b2d..6c2b99bb8af 100644 --- a/arch/mips/txx9/rbtx4938/setup.c +++ b/arch/mips/txx9/rbtx4938/setup.c @@ -20,21 +20,14 @@ #include #include -#include -#include #include #include #include #include -#ifdef CONFIG_SERIAL_TXX9 -#include -#endif #include #include #include -static int tx4938_ccfg_toeon = 1; - static void rbtx4938_machine_halt(void) { printk(KERN_NOTICE "System Halted\n"); @@ -182,189 +175,10 @@ static void __init rbtx4938_spi_setup(void) } static struct resource rbtx4938_fpga_resource; -static struct resource tx4938_sdram_resource[4]; -static struct resource tx4938_sram_resource; - -void __init tx4938_board_setup(void) -{ - int i; - unsigned long divmode; - int cpuclk = 0; - unsigned long pcode = TX4938_REV_PCODE(); - - ioport_resource.start = 0; - ioport_resource.end = 0xffffffff; - iomem_resource.start = 0; - iomem_resource.end = 0xffffffff; /* expand to 4GB */ - - txx9_reg_res_init(pcode, TX4938_REG_BASE, - TX4938_REG_SIZE); - /* SDRAMC,EBUSC are configured by PROM */ - for (i = 0; i < 8; i++) { - if (!(TX4938_EBUSC_CR(i) & 0x8)) - continue; /* disabled */ - txx9_ce_res[i].start = (unsigned long)TX4938_EBUSC_BA(i); - txx9_ce_res[i].end = - txx9_ce_res[i].start + TX4938_EBUSC_SIZE(i) - 1; - request_resource(&iomem_resource, &txx9_ce_res[i]); - } - - /* clocks */ - if (txx9_master_clock) { - u64 ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg); - /* calculate gbus_clock and cpu_clock_freq from master_clock */ - divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK; - switch (divmode) { - case TX4938_CCFG_DIVMODE_8: - case TX4938_CCFG_DIVMODE_10: - case TX4938_CCFG_DIVMODE_12: - case TX4938_CCFG_DIVMODE_16: - case TX4938_CCFG_DIVMODE_18: - txx9_gbus_clock = txx9_master_clock * 4; break; - default: - txx9_gbus_clock = txx9_master_clock; - } - switch (divmode) { - case TX4938_CCFG_DIVMODE_2: - case TX4938_CCFG_DIVMODE_8: - cpuclk = txx9_gbus_clock * 2; break; - case TX4938_CCFG_DIVMODE_2_5: - case TX4938_CCFG_DIVMODE_10: - cpuclk = txx9_gbus_clock * 5 / 2; break; - case TX4938_CCFG_DIVMODE_3: - case TX4938_CCFG_DIVMODE_12: - cpuclk = txx9_gbus_clock * 3; break; - case TX4938_CCFG_DIVMODE_4: - case TX4938_CCFG_DIVMODE_16: - cpuclk = txx9_gbus_clock * 4; break; - case TX4938_CCFG_DIVMODE_4_5: - case TX4938_CCFG_DIVMODE_18: - cpuclk = txx9_gbus_clock * 9 / 2; break; - } - txx9_cpu_clock = cpuclk; - } else { - u64 ccfg = ____raw_readq(&tx4938_ccfgptr->ccfg); - if (txx9_cpu_clock == 0) { - txx9_cpu_clock = 300000000; /* 300MHz */ - } - /* calculate gbus_clock and master_clock from cpu_clock_freq */ - cpuclk = txx9_cpu_clock; - divmode = (__u32)ccfg & TX4938_CCFG_DIVMODE_MASK; - switch (divmode) { - case TX4938_CCFG_DIVMODE_2: - case TX4938_CCFG_DIVMODE_8: - txx9_gbus_clock = cpuclk / 2; break; - case TX4938_CCFG_DIVMODE_2_5: - case TX4938_CCFG_DIVMODE_10: - txx9_gbus_clock = cpuclk * 2 / 5; break; - case TX4938_CCFG_DIVMODE_3: - case TX4938_CCFG_DIVMODE_12: - txx9_gbus_clock = cpuclk / 3; break; - case TX4938_CCFG_DIVMODE_4: - case TX4938_CCFG_DIVMODE_16: - txx9_gbus_clock = cpuclk / 4; break; - case TX4938_CCFG_DIVMODE_4_5: - case TX4938_CCFG_DIVMODE_18: - txx9_gbus_clock = cpuclk * 2 / 9; break; - } - switch (divmode) { - case TX4938_CCFG_DIVMODE_8: - case TX4938_CCFG_DIVMODE_10: - case TX4938_CCFG_DIVMODE_12: - case TX4938_CCFG_DIVMODE_16: - case TX4938_CCFG_DIVMODE_18: - txx9_master_clock = txx9_gbus_clock / 4; break; - default: - txx9_master_clock = txx9_gbus_clock; - } - } - /* change default value to udelay/mdelay take reasonable time */ - loops_per_jiffy = txx9_cpu_clock / HZ / 2; - - /* CCFG */ - /* clear WatchDogReset,BusErrorOnWrite flag (W1C) */ - tx4938_ccfg_set(TX4938_CCFG_WDRST | TX4938_CCFG_BEOW); - /* do reset on watchdog */ - tx4938_ccfg_set(TX4938_CCFG_WR); - /* clear PCIC1 reset */ - txx9_clear64(&tx4938_ccfgptr->clkctr, TX4938_CLKCTR_PCIC1RST); - - /* enable Timeout BusError */ - if (tx4938_ccfg_toeon) - tx4938_ccfg_set(TX4938_CCFG_TOE); - - /* DMA selection */ - txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_DMASEL_ALL); - - /* Use external clock for external arbiter */ - if (!(____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_PCIARB)) - txx9_clear64(&tx4938_ccfgptr->pcfg, TX4938_PCFG_PCICLKEN_ALL); - - printk(KERN_INFO "%s -- %dMHz(M%dMHz) CRIR:%08x CCFG:%llx PCFG:%llx\n", - txx9_pcode_str, - (cpuclk + 500000) / 1000000, - (txx9_master_clock + 500000) / 1000000, - (__u32)____raw_readq(&tx4938_ccfgptr->crir), - (unsigned long long)____raw_readq(&tx4938_ccfgptr->ccfg), - (unsigned long long)____raw_readq(&tx4938_ccfgptr->pcfg)); - - printk(KERN_INFO "%s SDRAMC --", txx9_pcode_str); - for (i = 0; i < 4; i++) { - u64 cr = TX4938_SDRAMC_CR(i); - unsigned long ram_base, ram_size; - if (!((unsigned long)cr & 0x00000400)) - continue; /* disabled */ - ram_base = (unsigned long)(cr >> 49) << 21; - ram_size = ((unsigned long)(cr >> 33) + 1) << 21; - if (ram_base >= 0x20000000) - continue; /* high memory (ignore) */ - printk(KERN_CONT " CR%d:%016llx", i, cr); - tx4938_sdram_resource[i].name = "SDRAM"; - tx4938_sdram_resource[i].start = ram_base; - tx4938_sdram_resource[i].end = ram_base + ram_size - 1; - tx4938_sdram_resource[i].flags = IORESOURCE_MEM; - request_resource(&iomem_resource, &tx4938_sdram_resource[i]); - } - printk(KERN_CONT " TR:%09llx\n", ____raw_readq(&tx4938_sdramcptr->tr)); - - /* SRAM */ - if (____raw_readq(&tx4938_sramcptr->cr) & 1) { - unsigned int size = 0x800; - unsigned long base = - (____raw_readq(&tx4938_sramcptr->cr) >> (39-11)) - & ~(size - 1); - tx4938_sram_resource.name = "SRAM"; - tx4938_sram_resource.start = base; - tx4938_sram_resource.end = base + size - 1; - tx4938_sram_resource.flags = IORESOURCE_MEM; - request_resource(&iomem_resource, &tx4938_sram_resource); - } - - /* TMR */ - for (i = 0; i < TX4938_NR_TMR; i++) - txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL); - - /* enable DMA */ - for (i = 0; i < 2; i++) - ____raw_writeq(TX4938_DMA_MCR_MSTEN, - (void __iomem *)(TX4938_DMA_REG(i) + 0x50)); - - /* PIO */ - __raw_writel(0, &tx4938_pioptr->maskcpu); - __raw_writel(0, &tx4938_pioptr->maskext); - -#ifdef CONFIG_PCI - txx9_alloc_pci_controller(&txx9_primary_pcic, 0, 0, 0, 0); -#endif -} static void __init rbtx4938_time_init(void) { - mips_hpt_frequency = txx9_cpu_clock / 2; - if (____raw_readq(&tx4938_ccfgptr->ccfg) & TX4938_CCFG_TINTDIS) - txx9_clockevent_init(TX4938_TMR_REG(0) & 0xfffffffffULL, - TXX9_IRQ_BASE + TX4938_IR_TMR(0), - txx9_gbus_clock / 2); + tx4938_time_init(0); } static void __init rbtx4938_mem_setup(void) @@ -372,39 +186,24 @@ static void __init rbtx4938_mem_setup(void) unsigned long long pcfg; char *argptr; - iomem_resource.end = 0xffffffff; /* 4GB */ - if (txx9_master_clock == 0) txx9_master_clock = 25000000; /* 25MHz */ - tx4938_board_setup(); -#ifndef CONFIG_PCI + + tx4938_setup(); + +#ifdef CONFIG_PCI + txx9_alloc_pci_controller(&txx9_primary_pcic, 0, 0, 0, 0); +#else set_io_port_base(RBTX4938_ETHER_BASE); #endif -#ifdef CONFIG_SERIAL_TXX9 - { - extern int early_serial_txx9_setup(struct uart_port *port); - int i; - struct uart_port req; - for(i = 0; i < 2; i++) { - memset(&req, 0, sizeof(req)); - req.line = i; - req.iotype = UPIO_MEM; - req.membase = (char *)(0xff1ff300 + i * 0x100); - req.mapbase = 0xff1ff300 + i * 0x100; - req.irq = RBTX4938_IRQ_IRC_SIO(i); - req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; - req.uartclk = 50000000; - early_serial_txx9_setup(&req); - } - } + tx4938_setup_serial(); #ifdef CONFIG_SERIAL_TXX9_CONSOLE argptr = prom_getcmdline(); if (strstr(argptr, "console=") == NULL) { strcat(argptr, " console=ttyS0,38400"); } #endif -#endif #ifdef CONFIG_TOSHIBA_RBTX4938_MPLEX_PIO58_61 printk("PIOSEL: disabling both ata and nand selection\n"); @@ -568,7 +367,6 @@ static int __init rbtx4938_spi_init(void) static void __init rbtx4938_arch_init(void) { - txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, 16); gpiochip_add(&rbtx4938_spi_gpio_chip); rbtx4938_pci_setup(); rbtx4938_spi_init(); -- cgit v1.2.3 From e0eb730757665d7e8ec0e79d9042a9311f3edb7e Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Sat, 19 Jul 2008 01:51:52 +0900 Subject: [MIPS] TXx9: Fix some sparse warnings Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/txx9/generic/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 3715a8f5ea4..8c60c78b9a9 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include #include #ifdef CONFIG_CPU_TX49XX #include -- cgit v1.2.3 From 4914ad4a9f2d484a68422700ba8493db73c7c411 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 20 Jul 2008 11:34:39 +0100 Subject: [MIPS] 32-bit compat: Delete unused sys_truncate64 and sys_ftruncate64. Signed-off-by: Ralf Baechle --- arch/mips/kernel/linux32.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 65af3cc90ab..c266211ed65 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -129,23 +129,6 @@ out: return error; } - -asmlinkage int sys_truncate64(const char __user *path, unsigned int high, - unsigned int low) -{ - if ((int)high < 0) - return -EINVAL; - return sys_truncate(path, ((long) high << 32) | low); -} - -asmlinkage int sys_ftruncate64(unsigned int fd, unsigned int high, - unsigned int low) -{ - if ((int)high < 0) - return -EINVAL; - return sys_ftruncate(fd, ((long) high << 32) | low); -} - /* * sys_execve() executes a new program. */ -- cgit v1.2.3 From 8213bbf9c1c0009872a3278aa7a83ec8f3508195 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 20 Jul 2008 13:16:46 +0100 Subject: [MIPS] Rename MIPS sys_pipe syscall entry point to something MIPS-specific. Signed-off-by: Ralf Baechle --- arch/mips/kernel/scall32-o32.S | 2 +- arch/mips/kernel/scall64-64.S | 2 +- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- arch/mips/kernel/syscall.c | 9 ++++++++- 5 files changed, 12 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index c058c0b61a2..fc4fd4d705e 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -354,7 +354,7 @@ einval: li v0, -EINVAL sys sys_mkdir 2 sys sys_rmdir 1 /* 4040 */ sys sys_dup 1 - sys sys_pipe 0 + sys sysm_pipe 0 sys sys_times 1 sys sys_ni_syscall 0 sys sys_brk 1 /* 4045 */ diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index dc597b600c6..2b73fd1e452 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -219,7 +219,7 @@ sys_call_table: PTR sys_readv PTR sys_writev PTR sys_access /* 5020 */ - PTR sys_pipe + PTR sysm_pipe PTR sys_select PTR sys_sched_yield PTR sys_mremap diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 12940eca789..2654e75d2fe 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -141,7 +141,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_readv PTR compat_sys_writev PTR sys_access /* 6020 */ - PTR sys_pipe + PTR sysm_pipe PTR compat_sys_select PTR sys_sched_yield PTR sys_mremap diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 9a275efb4f0..76167bea5a7 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -247,7 +247,7 @@ sys_call_table: PTR sys_mkdir PTR sys_rmdir /* 4040 */ PTR sys_dup - PTR sys_pipe + PTR sysm_pipe PTR compat_sys_times PTR sys_ni_syscall PTR sys_brk /* 4045 */ diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index af1bdc89748..3523c8d12ed 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -40,7 +40,14 @@ #include #include -asmlinkage int sys_pipe(nabi_no_regargs volatile struct pt_regs regs) +/* + * For historic reasons the pipe(2) syscall on MIPS has an unusual calling + * convention. It returns results in registers $v0 / $v1 which means there + * is no need for it to do verify the validity of a userspace pointer + * argument. Historically that used to be expensive in Linux. These days + * the performance advantage is negligible. + */ +asmlinkage int sysm_pipe(nabi_no_regargs volatile struct pt_regs regs) { int fd[2]; int error, res; -- cgit v1.2.3 From 15648f154a8faea97cbe931e189cf0a57fd066f4 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 16 Jul 2008 21:52:25 +0100 Subject: simserial: Fix up for ldisc changes Noted by Tony Luck although I've done the patches differently and also removed some other bogus oddments. Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- arch/ia64/hp/sim/simserial.c | 46 +++----------------------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 23cafc80d2a..24b1ad5334c 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -193,18 +193,6 @@ static irqreturn_t rs_interrupt_single(int irq, void *dev_id) * ------------------------------------------------------------------- */ -#if 0 -/* - * not really used in our situation so keep them commented out for now - */ -static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */ -static void do_serial_bh(void) -{ - run_task_queue(&tq_serial); - printk(KERN_ERR "do_serial_bh: called\n"); -} -#endif - static void do_softint(struct work_struct *private_) { printk(KERN_ERR "simserial: do_softint called\n"); @@ -351,11 +339,7 @@ static void rs_flush_buffer(struct tty_struct *tty) info->xmit.head = info->xmit.tail = 0; local_irq_restore(flags); - wake_up_interruptible(&tty->write_wait); - - if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && - tty->ldisc.write_wakeup) - (tty->ldisc.write_wakeup)(tty); + tty_wakeup(tty); } /* @@ -404,12 +388,6 @@ static void rs_unthrottle(struct tty_struct * tty) printk(KERN_INFO "simrs_unthrottle called\n"); } -/* - * rs_break() --- routine which turns the break handling on or off - */ -static void rs_break(struct tty_struct *tty, int break_state) -{ -} static int rs_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg) @@ -422,14 +400,6 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file, } switch (cmd) { - case TIOCMGET: - printk(KERN_INFO "rs_ioctl: TIOCMGET called\n"); - return -EINVAL; - case TIOCMBIS: - case TIOCMBIC: - case TIOCMSET: - printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n"); - return -EINVAL; case TIOCGSERIAL: printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n"); return 0; @@ -488,14 +458,6 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file, static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - unsigned int cflag = tty->termios->c_cflag; - - if ( (cflag == old_termios->c_cflag) - && ( RELEVANT_IFLAG(tty->termios->c_iflag) - == RELEVANT_IFLAG(old_termios->c_iflag))) - return; - - /* Handle turning off CRTSCTS */ if ((old_termios->c_cflag & CRTSCTS) && !(tty->termios->c_cflag & CRTSCTS)) { @@ -623,9 +585,8 @@ static void rs_close(struct tty_struct *tty, struct file * filp) * the line discipline to only process XON/XOFF characters. */ shutdown(info); - if (tty->ops->flush_buffer) - tty->ops->flush_buffer(tty); - if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty); + rs_flush_buffer(tty); + tty_ldisc_flush(tty); info->event = 0; info->tty = NULL; if (info->blocked_open) { @@ -955,7 +916,6 @@ static const struct tty_operations hp_ops = { .stop = rs_stop, .start = rs_start, .hangup = rs_hangup, - .break_ctl = rs_break, .wait_until_sent = rs_wait_until_sent, .read_proc = rs_read_proc, }; -- cgit v1.2.3 From 34492b5834ede63d896c93ccba9a4657a8435dc2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Jul 2008 21:54:01 +0100 Subject: MN10300: Fix MN10300's serial port driver to get at its tty_struct Fix MN10300's serial port driver to get at its tty_struct as this moved from struct uart_info into struct tty_port in patch: Signed-off-by: David Howells Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- arch/mn10300/kernel/mn10300-serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index b9c268c6b2f..8b054e7a8ae 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -392,7 +392,7 @@ static int mask_test_and_clear(volatile u8 *ptr, u8 mask) static void mn10300_serial_receive_interrupt(struct mn10300_serial_port *port) { struct uart_icount *icount = &port->uart.icount; - struct tty_struct *tty = port->uart.info->tty; + struct tty_struct *tty = port->uart.info->port.tty; unsigned ix; int count; u8 st, ch, push, status, overrun; -- cgit v1.2.3 From f30828a6745281edda735f642b5f814e1123ecd3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:08 +0200 Subject: m68k: remove CVS keywords This patch removes CVS keywords that weren't updated for a long time from comments. Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/sun3/idprom.c | 2 +- arch/m68k/sun3/prom/Makefile | 1 - arch/m68k/sun3/prom/console.c | 2 +- arch/m68k/sun3/prom/init.c | 2 +- arch/m68k/sun3/prom/misc.c | 2 +- arch/m68k/sun3/prom/printf.c | 2 +- 6 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/m68k/sun3/idprom.c b/arch/m68k/sun3/idprom.c index dca6ab6a4ed..6c5336d62fa 100644 --- a/arch/m68k/sun3/idprom.c +++ b/arch/m68k/sun3/idprom.c @@ -1,4 +1,4 @@ -/* $Id: idprom.c,v 1.22 1996/11/13 05:09:25 davem Exp $ +/* * idprom.c: Routines to load the idprom into kernel addresses and * interpret the data contained within. * diff --git a/arch/m68k/sun3/prom/Makefile b/arch/m68k/sun3/prom/Makefile index 6e48ae2a717..da7eac06bca 100644 --- a/arch/m68k/sun3/prom/Makefile +++ b/arch/m68k/sun3/prom/Makefile @@ -1,4 +1,3 @@ -# $Id: Makefile,v 1.5 1995/11/25 00:59:48 davem Exp $ # Makefile for the Sun Boot PROM interface library under # Linux. # diff --git a/arch/m68k/sun3/prom/console.c b/arch/m68k/sun3/prom/console.c index 52c1427863d..5812560b70f 100644 --- a/arch/m68k/sun3/prom/console.c +++ b/arch/m68k/sun3/prom/console.c @@ -1,4 +1,4 @@ -/* $Id: console.c,v 1.10 1996/12/18 06:46:54 tridge Exp $ +/* * console.c: Routines that deal with sending and receiving IO * to/from the current console device using the PROM. * diff --git a/arch/m68k/sun3/prom/init.c b/arch/m68k/sun3/prom/init.c index 202adfcc316..5f85681af5a 100644 --- a/arch/m68k/sun3/prom/init.c +++ b/arch/m68k/sun3/prom/init.c @@ -1,4 +1,4 @@ -/* $Id: init.c,v 1.9 1996/12/18 06:46:55 tridge Exp $ +/* * init.c: Initialize internal variables used by the PROM * library functions. * diff --git a/arch/m68k/sun3/prom/misc.c b/arch/m68k/sun3/prom/misc.c index b88716f2c68..3d60e1337f7 100644 --- a/arch/m68k/sun3/prom/misc.c +++ b/arch/m68k/sun3/prom/misc.c @@ -1,4 +1,4 @@ -/* $Id: misc.c,v 1.15 1997/05/14 20:45:00 davem Exp $ +/* * misc.c: Miscellaneous prom functions that don't belong * anywhere else. * diff --git a/arch/m68k/sun3/prom/printf.c b/arch/m68k/sun3/prom/printf.c index e7bfde377b5..c8cf98d97f2 100644 --- a/arch/m68k/sun3/prom/printf.c +++ b/arch/m68k/sun3/prom/printf.c @@ -1,4 +1,4 @@ -/* $Id: printf.c,v 1.5 1996/04/04 16:31:07 tridge Exp $ +/* * printf.c: Internal prom library printf facility. * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) -- cgit v1.2.3 From 97d26e73d729c8d967bc5eb9086321956c444dd4 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 17 Jul 2008 21:16:11 +0200 Subject: m68k: vmlinux-std/sun3.lds.S cleanup - use PAGE_SIZE macro This patch includes page.h header into linker script that allow us to use PAGE_SIZE macro instead of numeric constant Signed-off-by: Cyrill Gorcunov Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/kernel/vmlinux-std.lds | 3 ++- arch/m68k/kernel/vmlinux-sun3.lds | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 7537cc5e615..99b0784c055 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -1,6 +1,7 @@ /* ld script to make m68k Linux kernel */ #include +#include OUTPUT_FORMAT("elf32-m68k", "elf32-m68k", "elf32-m68k") OUTPUT_ARCH(m68k) @@ -41,7 +42,7 @@ SECTIONS _edata = .; /* End of data section */ /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index cdc313e7c29..8a4919e4d36 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -1,6 +1,7 @@ /* ld script to make m68k Linux kernel */ #include +#include OUTPUT_FORMAT("elf32-m68k", "elf32-m68k", "elf32-m68k") OUTPUT_ARCH(m68k) @@ -34,7 +35,7 @@ SECTIONS _edata = .; /* will be freed after init */ - . = ALIGN(8192); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; @@ -61,12 +62,12 @@ __init_begin = .; } SECURITY_INIT #ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(8192); + . = ALIGN(PAGE_SIZE); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(8192); + . = ALIGN(PAGE_SIZE); __init_end = .; .data.init.task : { *(.data.init_task) } -- cgit v1.2.3 From edfd92f67eec1bdd905dd7841416eaf945a5b92f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 17 Jul 2008 21:16:12 +0200 Subject: m68k: Allow no CPU/platform type for allnoconfig Allow no CPU/platform type for allnoconfig - Provide a dummy value for FPSTATESIZE if no CPU type was selected - Provide a dummy value for NR_IRQS if no platform type was selected - Warn the user if no CPU or platform type was selected Note: you still cannot build an allnoconfig kernel, as CONFIG_SWAP=n doesn't build and we cannot easily fix that (http://groups.google.com/group/linux.kernel/browse_thread/thread/d430c78b07e1827b) Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/kernel/setup.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c index a9fb83a8c18..b1f39e4cedb 100644 --- a/arch/m68k/kernel/setup.c +++ b/arch/m68k/kernel/setup.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -40,6 +41,11 @@ #include #endif +#if !FPSTATESIZE || !NR_IRQS +#warning No CPU/platform type selected, your kernel will not work! +#warning Are you building an allnoconfig kernel? +#endif + unsigned long m68k_machtype; EXPORT_SYMBOL(m68k_machtype); unsigned long m68k_cputype; -- cgit v1.2.3 From a0c14d28df8fcf939a8efd9332ace164e9f931fb Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 17 Jul 2008 21:16:13 +0200 Subject: Stringify support commas > This is a no-no for those archs that still use -traditional. > > I dunno if this is a problem for you at the moment and the > > right fix is anyway to nuke -traditional. > > > > Sam Signed-off-by: Mathieu Desnoyers Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/fpsp040/Makefile | 1 - arch/m68k/ifpsp060/Makefile | 1 - arch/m68k/kernel/Makefile | 2 -- arch/m68k/lib/Makefile | 2 -- arch/m68k/math-emu/Makefile | 2 -- 5 files changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/m68k/fpsp040/Makefile b/arch/m68k/fpsp040/Makefile index 0214d2f6f8b..9506d883ace 100644 --- a/arch/m68k/fpsp040/Makefile +++ b/arch/m68k/fpsp040/Makefile @@ -10,7 +10,6 @@ obj-y := bindec.o binstr.o decbin.o do_func.o gen_except.o get_op.o \ x_bsun.o x_fline.o x_operr.o x_ovfl.o x_snan.o x_store.o \ x_unfl.o x_unimp.o x_unsupp.o bugfix.o skeleton.o -EXTRA_AFLAGS := -traditional EXTRA_LDFLAGS := -x $(OS_OBJS): fpsp.h diff --git a/arch/m68k/ifpsp060/Makefile b/arch/m68k/ifpsp060/Makefile index 2fe8472cb5e..43b43504945 100644 --- a/arch/m68k/ifpsp060/Makefile +++ b/arch/m68k/ifpsp060/Makefile @@ -6,5 +6,4 @@ obj-y := fskeleton.o iskeleton.o os.o -EXTRA_AFLAGS := -traditional EXTRA_LDFLAGS := -x diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index 7a62a718143..3a7f6222550 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -16,5 +16,3 @@ devres-y = ../../../kernel/irq/devres.o obj-$(CONFIG_PCI) += bios32.o obj-y$(CONFIG_MMU_SUN3) += dma.o # no, it's not a typo - -EXTRA_AFLAGS := -traditional diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile index a18af095cd7..af9abf8d9d9 100644 --- a/arch/m68k/lib/Makefile +++ b/arch/m68k/lib/Makefile @@ -2,7 +2,5 @@ # Makefile for m68k-specific library files.. # -EXTRA_AFLAGS := -traditional - lib-y := ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ checksum.o string.o uaccess.o diff --git a/arch/m68k/math-emu/Makefile b/arch/m68k/math-emu/Makefile index 53994040181..a0935bf9836 100644 --- a/arch/m68k/math-emu/Makefile +++ b/arch/m68k/math-emu/Makefile @@ -2,8 +2,6 @@ # Makefile for the linux kernel. # -EXTRA_AFLAGS := -traditional - #EXTRA_AFLAGS += -DFPU_EMU_DEBUG #EXTRA_CFLAGS += -DFPU_EMU_DEBUG -- cgit v1.2.3 From b739912efc02f80cc4dc5eaef07e5bc7eafee1b0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:14 +0200 Subject: m68k: make multi_defconfig the default defconfig It seems to match the intention behind multi_defconfig to make it the default defconfig. Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index b15173f28a2..8133dbc4496 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -13,7 +13,7 @@ # Copyright (C) 1994 by Hamish Macdonald # -KBUILD_DEFCONFIG := amiga_defconfig +KBUILD_DEFCONFIG := multi_defconfig # override top level makefile AS += -m68020 -- cgit v1.2.3 From d33b4432e634246eef00ef4d425939c253f70dd6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:15 +0200 Subject: m68k: remove AP1000 code Unless I miss something that's code for a sparc machine even the sparc code no longer supports that got copied to m68k when these files were copied. Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/sun3/prom/console.c | 4 ---- arch/m68k/sun3/prom/init.c | 9 --------- arch/m68k/sun3/prom/printf.c | 5 ----- 3 files changed, 18 deletions(-) (limited to 'arch') diff --git a/arch/m68k/sun3/prom/console.c b/arch/m68k/sun3/prom/console.c index 5812560b70f..2bcb6e4bfe5 100644 --- a/arch/m68k/sun3/prom/console.c +++ b/arch/m68k/sun3/prom/console.c @@ -104,8 +104,6 @@ prom_query_input_device() return PROMDEV_ITTYB; } return PROMDEV_I_UNK; - case PROM_AP1000: - return PROMDEV_I_UNK; }; } #endif @@ -166,8 +164,6 @@ prom_query_output_device() }; } break; - case PROM_AP1000: - return PROMDEV_I_UNK; }; return PROMDEV_O_UNK; } diff --git a/arch/m68k/sun3/prom/init.c b/arch/m68k/sun3/prom/init.c index 5f85681af5a..d8e6349336b 100644 --- a/arch/m68k/sun3/prom/init.c +++ b/arch/m68k/sun3/prom/init.c @@ -31,11 +31,6 @@ extern void prom_ranges_init(void); void __init prom_init(struct linux_romvec *rp) { -#ifdef CONFIG_AP1000 - extern struct linux_romvec *ap_prom_init(void); - rp = ap_prom_init(); -#endif - romvec = rp; #ifndef CONFIG_SUN3 switch(romvec->pv_romvers) { @@ -53,10 +48,6 @@ void __init prom_init(struct linux_romvec *rp) prom_printf("PROMLIB: Sun IEEE Prom not supported yet\n"); prom_halt(); break; - case 42: /* why not :-) */ - prom_vers = PROM_AP1000; - break; - default: prom_printf("PROMLIB: Bad PROM version %d\n", romvec->pv_romvers); diff --git a/arch/m68k/sun3/prom/printf.c b/arch/m68k/sun3/prom/printf.c index c8cf98d97f2..df85018f487 100644 --- a/arch/m68k/sun3/prom/printf.c +++ b/arch/m68k/sun3/prom/printf.c @@ -37,10 +37,6 @@ prom_printf(char *fmt, ...) bptr = ppbuf; -#ifdef CONFIG_AP1000 - ap_write(1,bptr,strlen(bptr)); -#else - #ifdef CONFIG_KGDB if (kgdb_initialized) { printk("kgdb_initialized = %d\n", kgdb_initialized); @@ -53,7 +49,6 @@ prom_printf(char *fmt, ...) prom_putchar(ch); } -#endif #endif va_end(args); return; -- cgit v1.2.3 From 8468afc039f03837066132be14cdd9e5fa726f0b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:21 +0200 Subject: export amiga_vblank This patch fixes the following build error: <-- snip --> .. Building modules, stage 2. MODPOST 1203 modules ERROR: "amiga_vblank" [drivers/video/amifb.ko] undefined! .. make[2]: *** [__modpost] Error 1 <-- snip --> Reported-by: Adrian Bunk Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/amiga/config.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index 50f5daab46b..dd0734e54bb 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -51,6 +51,8 @@ unsigned long amiga_chipset; EXPORT_SYMBOL(amiga_chipset); unsigned char amiga_vblank; +EXPORT_SYMBOL(amiga_vblank); + unsigned char amiga_psfreq; struct amiga_hw_present amiga_hw_present; -- cgit v1.2.3 From 0795dbcc4c4c93a929463957993c04cf5fec346c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:23 +0200 Subject: m68k/amiga/: possible cleanups This patch contains the following possible cleanups: - amiints.c: add a proper prototype for amiga_init_IRQ() in include/asm-m68k/amigaints.h - make the following needlessly global code static: - config.c: amiga_model - config.c: amiga_psfreq - config.c: amiga_serial_console_write() - #if 0 the following unused functions: - config.c: amiga_serial_puts() - config.c: amiga_serial_console_wait_key() - config.c: amiga_serial_gets() - remove the following unused variable: - config.c: amiga_masterclock Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/amiga/config.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index dd0734e54bb..df679d96b1c 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -36,14 +36,11 @@ #include #include -unsigned long amiga_model; -EXPORT_SYMBOL(amiga_model); +static unsigned long amiga_model; unsigned long amiga_eclock; EXPORT_SYMBOL(amiga_eclock); -unsigned long amiga_masterclock; - unsigned long amiga_colorclock; EXPORT_SYMBOL(amiga_colorclock); @@ -53,7 +50,7 @@ EXPORT_SYMBOL(amiga_chipset); unsigned char amiga_vblank; EXPORT_SYMBOL(amiga_vblank); -unsigned char amiga_psfreq; +static unsigned char amiga_psfreq; struct amiga_hw_present amiga_hw_present; EXPORT_SYMBOL(amiga_hw_present); @@ -94,8 +91,6 @@ static char *amiga_models[] __initdata = { static char amiga_model_name[13] = "Amiga "; static void amiga_sched_init(irq_handler_t handler); -/* amiga specific irq functions */ -extern void amiga_init_IRQ(void); static void amiga_get_model(char *model); static int amiga_get_hardware_list(char *buffer); /* amiga specific timer functions */ @@ -109,8 +104,6 @@ static void amiga_reset(void); extern void amiga_init_sound(void); static void amiga_mem_console_write(struct console *co, const char *b, unsigned int count); -void amiga_serial_console_write(struct console *co, const char *s, - unsigned int count); #ifdef CONFIG_HEARTBEAT static void amiga_heartbeat(int on); #endif @@ -420,8 +413,7 @@ void __init config_amiga(void) mach_heartbeat = amiga_heartbeat; #endif - /* Fill in the clock values (based on the 700 kHz E-Clock) */ - amiga_masterclock = 40*amiga_eclock; /* 28 MHz */ + /* Fill in the clock value (based on the 700 kHz E-Clock) */ amiga_colorclock = 5*amiga_eclock; /* 3.5 MHz */ /* clear all DMA bits */ @@ -819,8 +811,8 @@ static void amiga_serial_putc(char c) ; } -void amiga_serial_console_write(struct console *co, const char *s, - unsigned int count) +static void amiga_serial_console_write(struct console *co, const char *s, + unsigned int count) { while (count--) { if (*s == '\n') @@ -829,7 +821,7 @@ void amiga_serial_console_write(struct console *co, const char *s, } } -#ifdef CONFIG_SERIAL_CONSOLE +#if 0 void amiga_serial_puts(const char *s) { amiga_serial_console_write(NULL, s, strlen(s)); -- cgit v1.2.3 From 5575d0a3c9676b2886adad67dd4b2ac126a49f1f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:24 +0200 Subject: m68k/atari/debug.c: possible cleanups This patch contains the following possible cleanups: - make the following needlessly global functions (always) static: - atari_mfp_console_write() - atari_scc_console_write() - atari_midi_console_write() - atari_init_mfp_port() - atari_init_scc_port() - atari_init_midi_port() - #if 0 the following unused functions: - atari_mfp_console_wait_key() - atari_scc_console_wait_key() - atari_midi_console_wait_key() - remove the following unused variables: - atari_MFP_init_done - atari_SCC_init_done Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/atari/debug.c | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/m68k/atari/debug.c b/arch/m68k/atari/debug.c index 043ddbc61c7..702b15ccfab 100644 --- a/arch/m68k/atari/debug.c +++ b/arch/m68k/atari/debug.c @@ -20,14 +20,6 @@ #include #include -/* Flag that Modem1 port is already initialized and used */ -int atari_MFP_init_done; -EXPORT_SYMBOL(atari_MFP_init_done); - -/* Flag that Modem1 port is already initialized and used */ -int atari_SCC_init_done; -EXPORT_SYMBOL(atari_SCC_init_done); - /* Can be set somewhere, if a SCC master reset has already be done and should * not be repeated; used by kgdb */ int atari_SCC_reset_done; @@ -47,8 +39,8 @@ static inline void ata_mfp_out(char c) mfp.usart_dta = c; } -void atari_mfp_console_write(struct console *co, const char *str, - unsigned int count) +static void atari_mfp_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -66,8 +58,8 @@ static inline void ata_scc_out(char c) scc.cha_b_data = c; } -void atari_scc_console_write(struct console *co, const char *str, - unsigned int count) +static void atari_scc_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -83,8 +75,8 @@ static inline void ata_midi_out(char c) acia.mid_data = c; } -void atari_midi_console_write(struct console *co, const char *str, - unsigned int count) +static void atari_midi_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -136,7 +128,7 @@ static void atari_par_console_write(struct console *co, const char *str, } } -#ifdef CONFIG_SERIAL_CONSOLE +#if 0 int atari_mfp_console_wait_key(struct console *co) { while (!(mfp.rcv_stat & 0x80)) /* wait for rx buf filled */ @@ -166,11 +158,7 @@ int atari_midi_console_wait_key(struct console *co) * SCC serial ports. They're used by the debugging interface, kgdb, and the * serial console code. */ -#ifndef CONFIG_SERIAL_CONSOLE static void __init atari_init_mfp_port(int cflag) -#else -void atari_init_mfp_port(int cflag) -#endif { /* * timer values for 1200...115200 bps; > 38400 select 110, 134, or 150 @@ -193,8 +181,6 @@ void atari_init_mfp_port(int cflag) mfp.tim_dt_d = baud_table[baud]; mfp.tim_ct_cd |= 0x01; /* start timer D, 1:4 */ mfp.trn_stat |= 0x01; /* enable TX */ - - atari_MFP_init_done = 1; } #define SCC_WRITE(reg, val) \ @@ -214,11 +200,7 @@ void atari_init_mfp_port(int cflag) MFPDELAY(); \ } while (0) -#ifndef CONFIG_SERIAL_CONSOLE static void __init atari_init_scc_port(int cflag) -#else -void atari_init_scc_port(int cflag) -#endif { extern int atari_SCC_reset_done; static int clksrc_table[9] = @@ -277,14 +259,9 @@ void atari_init_scc_port(int cflag) SCC_WRITE(5, reg5 | 8); atari_SCC_reset_done = 1; - atari_SCC_init_done = 1; } -#ifndef CONFIG_SERIAL_CONSOLE static void __init atari_init_midi_port(int cflag) -#else -void atari_init_midi_port(int cflag) -#endif { int baud = cflag & CBAUD; int csize = ((cflag & CSIZE) == CS8) ? 0x10 : 0x00; -- cgit v1.2.3 From 8dfbdf4abad6e5a7bbd097bf7e2c0ec41e0c54b4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:25 +0200 Subject: m68k/mac/: possible cleanups This patch contains the following possible cleanups: - make the following needlessly global code (always) static: - baboon.c: struct baboon - baboon.c: baboon_irq() - config.c: mac_orig_videoaddr - config.c: mac_identify() - config.c: mac_report_hardware() - config.c: mac_debug_console_write() - config.c: mac_sccb_console_write() - config.c: mac_scca_console_write() - config.c: mac_init_scc_port() - oss.c: oss_irq() - oss.c: oss_nubus_irq() - psc.c: psc_debug_dump() - psc.c: psc_dma_die_die_die() - via.c: rbv_clear - remove the unused bootparse.c - #if 0 the following unused functions: - config.c: mac_debugging_short() - config.c: mac_debugging_long() - remove the following unused code: - config.c: mac_bisize - config.c: mac_env - config.c: mac_SCC_init_done - config.c: mac_SCC_reset_done - config.c: mac_init_scca_port() - config.c: mac_init_sccb_port() Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/mac/Makefile | 2 +- arch/m68k/mac/baboon.c | 26 +++++----- arch/m68k/mac/bootparse.c | 122 ---------------------------------------------- arch/m68k/mac/config.c | 11 ++--- arch/m68k/mac/debug.c | 41 ++++------------ arch/m68k/mac/oss.c | 8 +-- arch/m68k/mac/psc.c | 4 +- arch/m68k/mac/via.c | 2 +- 8 files changed, 35 insertions(+), 181 deletions(-) delete mode 100644 arch/m68k/mac/bootparse.c (limited to 'arch') diff --git a/arch/m68k/mac/Makefile b/arch/m68k/mac/Makefile index 1d265ba365a..daebd80bdef 100644 --- a/arch/m68k/mac/Makefile +++ b/arch/m68k/mac/Makefile @@ -2,5 +2,5 @@ # Makefile for Linux arch/m68k/mac source directory # -obj-y := config.o bootparse.o macints.o iop.o via.o oss.o psc.o \ +obj-y := config.o macints.o iop.o via.o oss.o psc.o \ baboon.o macboing.o debug.o misc.o diff --git a/arch/m68k/mac/baboon.c b/arch/m68k/mac/baboon.c index 673a1085984..dae9c982aa8 100644 --- a/arch/m68k/mac/baboon.c +++ b/arch/m68k/mac/baboon.c @@ -23,9 +23,7 @@ /* #define DEBUG_IRQS */ int baboon_present; -volatile struct baboon *baboon; - -irqreturn_t baboon_irq(int, void *); +static volatile struct baboon *baboon; #if 0 extern int macide_ack_intr(struct ata_channel *); @@ -49,21 +47,11 @@ void __init baboon_init(void) printk("Baboon detected at %p\n", baboon); } -/* - * Register the Baboon interrupt dispatcher on nubus slot $C. - */ - -void __init baboon_register_interrupts(void) -{ - request_irq(IRQ_NUBUS_C, baboon_irq, IRQ_FLG_LOCK|IRQ_FLG_FAST, - "baboon", (void *) baboon); -} - /* * Baboon interrupt handler. This works a lot like a VIA. */ -irqreturn_t baboon_irq(int irq, void *dev_id) +static irqreturn_t baboon_irq(int irq, void *dev_id) { int irq_bit, irq_num; unsigned char events; @@ -95,6 +83,16 @@ irqreturn_t baboon_irq(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Register the Baboon interrupt dispatcher on nubus slot $C. + */ + +void __init baboon_register_interrupts(void) +{ + request_irq(IRQ_NUBUS_C, baboon_irq, IRQ_FLG_LOCK|IRQ_FLG_FAST, + "baboon", (void *) baboon); +} + void baboon_irq_enable(int irq) { #ifdef DEBUG_IRQUSE printk("baboon_irq_enable(%d)\n", irq); diff --git a/arch/m68k/mac/bootparse.c b/arch/m68k/mac/bootparse.c deleted file mode 100644 index 36d22360982..00000000000 --- a/arch/m68k/mac/bootparse.c +++ /dev/null @@ -1,122 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -/* - * Booter vars - */ - -int boothowto; -int _boothowto; - -/* - * Called early to parse the environment (passed to us from the booter) - * into a bootinfo struct. Will die as soon as we have our own booter - */ - -#define atol(x) simple_strtoul(x,NULL,0) - -void parse_booter(char *env) -{ - char *name; - char *value; -#if 0 - while(0 && *env) -#else - while(*env) -#endif - { - name=env; - value=name; - while(*value!='='&&*value) - value++; - if(*value=='=') - *value++=0; - env=value; - while(*env) - env++; - env++; -#if 0 - if(strcmp(name,"VIDEO_ADDR")==0) - mac_mch.videoaddr=atol(value); - if(strcmp(name,"ROW_BYTES")==0) - mac_mch.videorow=atol(value); - if(strcmp(name,"SCREEN_DEPTH")==0) - mac_mch.videodepth=atol(value); - if(strcmp(name,"DIMENSIONS")==0) - mac_mch.dimensions=atol(value); -#endif - if(strcmp(name,"BOOTTIME")==0) - mac_bi_data.boottime=atol(value); - if(strcmp(name,"GMTBIAS")==0) - mac_bi_data.gmtbias=atol(value); - if(strcmp(name,"BOOTERVER")==0) - mac_bi_data.bootver=atol(value); - if(strcmp(name,"MACOS_VIDEO")==0) - mac_bi_data.videological=atol(value); - if(strcmp(name,"MACOS_SCC")==0) - mac_bi_data.sccbase=atol(value); - if(strcmp(name,"MACHINEID")==0) - mac_bi_data.id=atol(value); - if(strcmp(name,"MEMSIZE")==0) - mac_bi_data.memsize=atol(value); - if(strcmp(name,"SERIAL_MODEM_FLAGS")==0) - mac_bi_data.serialmf=atol(value); - if(strcmp(name,"SERIAL_MODEM_HSKICLK")==0) - mac_bi_data.serialhsk=atol(value); - if(strcmp(name,"SERIAL_MODEM_GPICLK")==0) - mac_bi_data.serialgpi=atol(value); - if(strcmp(name,"SERIAL_PRINT_FLAGS")==0) - mac_bi_data.printmf=atol(value); - if(strcmp(name,"SERIAL_PRINT_HSKICLK")==0) - mac_bi_data.printhsk=atol(value); - if(strcmp(name,"SERIAL_PRINT_GPICLK")==0) - mac_bi_data.printgpi=atol(value); - if(strcmp(name,"PROCESSOR")==0) - mac_bi_data.cpuid=atol(value); - if(strcmp(name,"ROMBASE")==0) - mac_bi_data.rombase=atol(value); - if(strcmp(name,"TIMEDBRA")==0) - mac_bi_data.timedbra=atol(value); - if(strcmp(name,"ADBDELAY")==0) - mac_bi_data.adbdelay=atol(value); - } -#if 0 /* XXX: TODO with m68k_mach_* */ - /* Fill in the base stuff */ - boot_info.machtype=MACH_MAC; - /* Read this from the macinfo we got ! */ -/* boot_info.cputype=CPU_68020|FPUB_68881;*/ -/* boot_info.memory[0].addr=0;*/ -/* boot_info.memory[0].size=((mac_bi_data.id>>7)&31)<<20;*/ - boot_info.num_memory=1; /* On a MacII */ - boot_info.ramdisk_size=0; /* For now */ - *boot_info.command_line=0; -#endif - } - - -void print_booter(char *env) -{ - char *name; - char *value; - while(*env) - { - name=env; - value=name; - while(*value!='='&&*value) - value++; - if(*value=='=') - *value++=0; - env=value; - while(*env) - env++; - env++; - printk("%s=%s\n", name,value); - } - } - - diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index ad3e3bacae3..c45e18449f3 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -46,7 +46,6 @@ /* Mac bootinfo struct */ struct mac_booter_data mac_bi_data; -int mac_bisize = sizeof mac_bi_data; /* New m68k bootinfo stuff and videobase */ @@ -55,10 +54,8 @@ extern struct mem_info m68k_memory[NUM_MEMINFO]; extern struct mem_info m68k_ramdisk; -void *mac_env; /* Loaded by the boot asm */ - /* The phys. video addr. - might be bogus on some machines */ -unsigned long mac_orig_videoaddr; +static unsigned long mac_orig_videoaddr; /* Mac specific timer functions */ extern unsigned long mac_gettimeoffset(void); @@ -79,6 +76,8 @@ extern void mac_mksound(unsigned int, unsigned int); extern void nubus_sweep_video(void); static void mac_get_model(char *str); +static void mac_identify(void); +static void mac_report_hardware(void); static void __init mac_sched_init(irq_handler_t vector) { @@ -765,7 +764,7 @@ static struct mac_model mac_data_table[] = { } }; -void __init mac_identify(void) +static void __init mac_identify(void) { struct mac_model *m; @@ -821,7 +820,7 @@ void __init mac_identify(void) baboon_init(); } -void __init mac_report_hardware(void) +static void __init mac_report_hardware(void) { printk(KERN_INFO "Apple Macintosh %s\n", macintosh_config->name); } diff --git a/arch/m68k/mac/debug.c b/arch/m68k/mac/debug.c index e8a57138b4a..2165740786a 100644 --- a/arch/m68k/mac/debug.c +++ b/arch/m68k/mac/debug.c @@ -51,6 +51,8 @@ extern void mac_serial_print(const char *); static int peng, line; #endif +#if 0 + void mac_debugging_short(int pos, short num) { #ifdef DEBUG_SCREEN @@ -125,6 +127,8 @@ void mac_debugging_long(int pos, long addr) #endif } +#endif /* 0 */ + #ifdef DEBUG_SERIAL /* * TODO: serial debug code @@ -142,12 +146,6 @@ struct mac_SCC { # define scc (*((volatile struct mac_SCC*)mac_bi_data.sccbase)) -/* Flag that serial port is already initialized and used */ -int mac_SCC_init_done; -/* Can be set somewhere, if a SCC master reset has already be done and should - * not be repeated; used by kgdb */ -int mac_SCC_reset_done; - static int scc_port = -1; static struct console mac_console_driver = { @@ -171,8 +169,8 @@ static struct console mac_console_driver = { * this driver if Mac. */ -void mac_debug_console_write(struct console *co, const char *str, - unsigned int count) +static void mac_debug_console_write(struct console *co, const char *str, + unsigned int count) { mac_serial_print(str); } @@ -209,8 +207,8 @@ static inline void mac_scca_out(char c) scc.cha_a_data = c; } -void mac_sccb_console_write(struct console *co, const char *str, - unsigned int count) +static void mac_sccb_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -219,8 +217,8 @@ void mac_sccb_console_write(struct console *co, const char *str, } } -void mac_scca_console_write(struct console *co, const char *str, - unsigned int count) +static void mac_scca_console_write(struct console *co, const char *str, + unsigned int count) { while (count--) { if (*str == '\n') @@ -265,14 +263,8 @@ void mac_scca_console_write(struct console *co, const char *str, barrier(); \ } while(0) -#ifndef CONFIG_SERIAL_CONSOLE static void __init mac_init_scc_port(int cflag, int port) -#else -void mac_init_scc_port(int cflag, int port) -#endif { - extern int mac_SCC_reset_done; - /* * baud rates: 1200, 1800, 2400, 4800, 9600, 19.2k, 38.4k, 57.6k, 115.2k */ @@ -340,22 +332,9 @@ void mac_init_scc_port(int cflag, int port) SCCA_WRITE(3, reg3 | 1); SCCA_WRITE(5, reg5 | 8); } - - mac_SCC_reset_done = 1; - mac_SCC_init_done = 1; } #endif /* DEBUG_SERIAL */ -void mac_init_scca_port(int cflag) -{ - mac_init_scc_port(cflag, 0); -} - -void mac_init_sccb_port(int cflag) -{ - mac_init_scc_port(cflag, 1); -} - static int __init mac_debug_setup(char *arg) { if (!MACH_IS_MAC) diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 3c943d2ec57..43d83e054b8 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -30,8 +30,8 @@ int oss_present; volatile struct mac_oss *oss; -irqreturn_t oss_irq(int, void *); -irqreturn_t oss_nubus_irq(int, void *); +static irqreturn_t oss_irq(int, void *); +static irqreturn_t oss_nubus_irq(int, void *); extern irqreturn_t via1_irq(int, void *); extern irqreturn_t mac_scc_dispatch(int, void *); @@ -92,7 +92,7 @@ void __init oss_nubus_init(void) * and SCSI; everything else is routed to its own autovector IRQ. */ -irqreturn_t oss_irq(int irq, void *dev_id) +static irqreturn_t oss_irq(int irq, void *dev_id) { int events; @@ -126,7 +126,7 @@ irqreturn_t oss_irq(int irq, void *dev_id) * Unlike the VIA/RBV this is on its own autovector interrupt level. */ -irqreturn_t oss_nubus_irq(int irq, void *dev_id) +static irqreturn_t oss_nubus_irq(int irq, void *dev_id) { int events, irq_bit, i; diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index d66f723b17c..f84a4dd64f9 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -36,7 +36,7 @@ irqreturn_t psc_irq(int, void *); * Debugging dump, used in various places to see what's going on. */ -void psc_debug_dump(void) +static void psc_debug_dump(void) { int i; @@ -55,7 +55,7 @@ void psc_debug_dump(void) * expanded to cover what I think are the other 7 channels. */ -void psc_dma_die_die_die(void) +static void psc_dma_die_die_die(void) { int i; diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index fa485df4160..f3b27d04a31 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -45,7 +45,7 @@ volatile long *via_memory_bogon=(long *)&via_memory_bogon; int rbv_present; int via_alt_mapping; EXPORT_SYMBOL(via_alt_mapping); -__u8 rbv_clear; +static __u8 rbv_clear; /* * Globals for accessing the VIA chip registers without having to -- cgit v1.2.3 From 22deb527ce5d13e07652f81a53032aa0214ea8c3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:26 +0200 Subject: m68k/q40/config.c: make functions static This patch makes the following needlessly global functions static: - q40_reset() - q40_halt() - q40_disable_irqs() - q40_gettimeoffset() - q40_hwclk() - q40_get_ss() - q40_set_clock_mmss() Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/q40/config.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index 476e18eca75..be9de2f3dc4 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -41,14 +41,12 @@ static void q40_get_model(char *model); static int q40_get_hardware_list(char *buffer); extern void q40_sched_init(irq_handler_t handler); -extern unsigned long q40_gettimeoffset(void); -extern int q40_hwclk(int, struct rtc_time *); -extern unsigned int q40_get_ss(void); -extern int q40_set_clock_mmss(unsigned long); +static unsigned long q40_gettimeoffset(void); +static int q40_hwclk(int, struct rtc_time *); +static unsigned int q40_get_ss(void); +static int q40_set_clock_mmss(unsigned long); static int q40_get_rtc_pll(struct rtc_pll_info *pll); static int q40_set_rtc_pll(struct rtc_pll_info *pll); -extern void q40_reset(void); -void q40_halt(void); extern void q40_waitbut(void); void q40_set_vectors(void); @@ -127,7 +125,7 @@ static void q40_heartbeat(int on) } #endif -void q40_reset(void) +static void q40_reset(void) { halted = 1; printk("\n\n*******************************************\n" @@ -137,7 +135,8 @@ void q40_reset(void) while (1) ; } -void q40_halt(void) + +static void q40_halt(void) { halted = 1; printk("\n\n*******************\n" @@ -165,7 +164,8 @@ static unsigned int serports[] = { 0x3f8,0x2f8,0x3e8,0x2e8,0 }; -void q40_disable_irqs(void) + +static void q40_disable_irqs(void) { unsigned i, j; @@ -227,7 +227,7 @@ static inline unsigned char bin2bcd(unsigned char b) } -unsigned long q40_gettimeoffset(void) +static unsigned long q40_gettimeoffset(void) { return 5000 * (ql_ticks != 0); } @@ -248,7 +248,7 @@ unsigned long q40_gettimeoffset(void) * }; */ -int q40_hwclk(int op, struct rtc_time *t) +static int q40_hwclk(int op, struct rtc_time *t) { if (op) { /* Write.... */ @@ -285,7 +285,7 @@ int q40_hwclk(int op, struct rtc_time *t) return 0; } -unsigned int q40_get_ss(void) +static unsigned int q40_get_ss(void) { return bcd2bin(Q40_RTC_SECS); } @@ -295,7 +295,7 @@ unsigned int q40_get_ss(void) * clock is out by > 30 minutes. Logic lifted from atari code. */ -int q40_set_clock_mmss(unsigned long nowtime) +static int q40_set_clock_mmss(unsigned long nowtime) { int retval = 0; short real_seconds = nowtime % 60, real_minutes = (nowtime / 60) % 60; -- cgit v1.2.3 From 07b8125949de66b6552966de8d4280c3a8620359 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:27 +0200 Subject: m68k/sun3/: possible cleanups This patch contains the following possible cleanups: - make the following needlessly global code static: - config.c: sun3_bootmem_alloc() - config.c: sun3_sched_init() - dvma.c: dvma_page() - idprom.c: struct Sun_Machines[] - mmu_emu.c: struct ctx_alloc[] - sun3dvma.c: iommu_use[] - sun3ints.c: led_pattern[] - remove the unused sbus.c Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/sun3/Makefile | 2 +- arch/m68k/sun3/config.c | 7 ++++--- arch/m68k/sun3/dvma.c | 2 +- arch/m68k/sun3/idprom.c | 2 +- arch/m68k/sun3/mmu_emu.c | 2 +- arch/m68k/sun3/sbus.c | 27 --------------------------- arch/m68k/sun3/sun3dvma.c | 2 +- arch/m68k/sun3/sun3ints.c | 2 +- 8 files changed, 10 insertions(+), 36 deletions(-) delete mode 100644 arch/m68k/sun3/sbus.c (limited to 'arch') diff --git a/arch/m68k/sun3/Makefile b/arch/m68k/sun3/Makefile index be1a8470d63..38ba0e0ceda 100644 --- a/arch/m68k/sun3/Makefile +++ b/arch/m68k/sun3/Makefile @@ -2,6 +2,6 @@ # Makefile for Linux arch/m68k/sun3 source directory # -obj-y := sun3ints.o sun3dvma.o sbus.o idprom.o +obj-y := sun3ints.o sun3dvma.o idprom.o obj-$(CONFIG_SUN3) += config.o mmu_emu.o leds.o dvma.o intersil.o diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c index c0fbd278fbb..732087d0735 100644 --- a/arch/m68k/sun3/config.c +++ b/arch/m68k/sun3/config.c @@ -36,7 +36,7 @@ extern char _text, _end; char sun3_reserved_pmeg[SUN3_PMEGS_NUM]; extern unsigned long sun3_gettimeoffset(void); -extern void sun3_sched_init(irq_handler_t handler); +static void sun3_sched_init(irq_handler_t handler); extern void sun3_get_model (char* model); extern void idprom_init (void); extern int sun3_hwclk(int set, struct rtc_time *t); @@ -114,7 +114,8 @@ static void sun3_halt (void) /* sun3 bootmem allocation */ -void __init sun3_bootmem_alloc(unsigned long memory_start, unsigned long memory_end) +static void __init sun3_bootmem_alloc(unsigned long memory_start, + unsigned long memory_end) { unsigned long start_page; @@ -164,7 +165,7 @@ void __init config_sun3(void) sun3_bootmem_alloc(memory_start, memory_end); } -void __init sun3_sched_init(irq_handler_t timer_routine) +static void __init sun3_sched_init(irq_handler_t timer_routine) { sun3_disable_interrupts(); intersil_clock->cmd_reg=(INTERSIL_RUN|INTERSIL_INT_DISABLE|INTERSIL_24H_MODE); diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c index d2b3093f240..d522eaab455 100644 --- a/arch/m68k/sun3/dvma.c +++ b/arch/m68k/sun3/dvma.c @@ -19,7 +19,7 @@ static unsigned long ptelist[120]; -inline unsigned long dvma_page(unsigned long kaddr, unsigned long vaddr) +static unsigned long dvma_page(unsigned long kaddr, unsigned long vaddr) { unsigned long pte; unsigned long j; diff --git a/arch/m68k/sun3/idprom.c b/arch/m68k/sun3/idprom.c index 6c5336d62fa..c86ac37d198 100644 --- a/arch/m68k/sun3/idprom.c +++ b/arch/m68k/sun3/idprom.c @@ -25,7 +25,7 @@ static struct idprom idprom_buffer; * of the Sparc CPU and have a meaningful IDPROM machtype value that we * know about. See asm-sparc/machines.h for empirical constants. */ -struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = { +static struct Sun_Machine_Models Sun_Machines[NUM_SUN_MACHINES] = { /* First, Sun3's */ { .name = "Sun 3/160 Series", .id_machtype = (SM_SUN3 | SM_3_160) }, { .name = "Sun 3/50", .id_machtype = (SM_SUN3 | SM_3_50) }, diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c index fb0f6a20cc3..60f9d4500d7 100644 --- a/arch/m68k/sun3/mmu_emu.c +++ b/arch/m68k/sun3/mmu_emu.c @@ -55,7 +55,7 @@ unsigned char pmeg_ctx[PMEGS_NUM]; /* pointers to the mm structs for each task in each context. 0xffffffff is a marker for kernel context */ -struct mm_struct *ctx_alloc[CONTEXTS_NUM] = { +static struct mm_struct *ctx_alloc[CONTEXTS_NUM] = { [0] = (struct mm_struct *)0xffffffff }; diff --git a/arch/m68k/sun3/sbus.c b/arch/m68k/sun3/sbus.c deleted file mode 100644 index babdbfa3cda..00000000000 --- a/arch/m68k/sun3/sbus.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * SBus helper functions - * - * Sun3 don't have a sbus, but many of the used devices are also - * used on Sparc machines with sbus. To avoid having a lot of - * duplicate code, we provide necessary glue stuff to make using - * of the sbus driver code possible. - * - * (C) 1999 Thomas Bogendoerfer (tsbogend@alpha.franken.de) - */ - -#include -#include -#include - -int __init sbus_init(void) -{ - return 0; -} - -void *sparc_alloc_io (u32 address, void *virtual, int len, char *name, - u32 bus_type, int rdonly) -{ - return (void *)address; -} - -subsys_initcall(sbus_init); diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c index 8709677fa02..f9277e8b415 100644 --- a/arch/m68k/sun3/sun3dvma.c +++ b/arch/m68k/sun3/sun3dvma.c @@ -29,7 +29,7 @@ static inline void dvma_unmap_iommu(unsigned long a, int b) extern void sun3_dvma_init(void); #endif -unsigned long iommu_use[IOMMU_TOTAL_ENTRIES]; +static unsigned long iommu_use[IOMMU_TOTAL_ENTRIES]; #define dvma_index(baddr) ((baddr - DVMA_START) >> DVMA_PAGE_SHIFT) diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c index cf93481adb1..7364cd67455 100644 --- a/arch/m68k/sun3/sun3ints.c +++ b/arch/m68k/sun3/sun3ints.c @@ -30,7 +30,7 @@ void sun3_enable_interrupts(void) sun3_enable_irq(0); } -int led_pattern[8] = { +static int led_pattern[8] = { ~(0x80), ~(0x01), ~(0x40), ~(0x02), ~(0x20), ~(0x04), -- cgit v1.2.3 From 635c0a217425f6f37422b85bcc88a7af9efc457c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 17 Jul 2008 21:16:29 +0200 Subject: m68k/apollo: Add missing call to apollo_parse_bootinfo() Add the missing call to apollo_parse_bootinfo(), which had been lost from a big Apollo support patch by Peter De Schrijver in 1999. Thanks to Adrian Bunk for noticing! Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c index b1f39e4cedb..ea1e44da19b 100644 --- a/arch/m68k/kernel/setup.c +++ b/arch/m68k/kernel/setup.c @@ -122,6 +122,7 @@ extern int bvme6000_parse_bootinfo(const struct bi_record *); extern int mvme16x_parse_bootinfo(const struct bi_record *); extern int mvme147_parse_bootinfo(const struct bi_record *); extern int hp300_parse_bootinfo(const struct bi_record *); +extern int apollo_parse_bootinfo(const struct bi_record *); extern void config_amiga(void); extern void config_atari(void); @@ -189,6 +190,8 @@ static void __init m68k_parse_bootinfo(const struct bi_record *record) unknown = mvme147_parse_bootinfo(record); else if (MACH_IS_HP300) unknown = hp300_parse_bootinfo(record); + else if (MACH_IS_APOLLO) + unknown = apollo_parse_bootinfo(record); else unknown = 1; } -- cgit v1.2.3 From 62bc654e794feb5242c31a59dcc36bab64f7d917 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:30 +0200 Subject: m68k/Mac: remove the unused ADB_KEYBOARD option When the driver was removed back in 2002 the option was forgotten. Reported-by: Robert P. J. Day Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/Kconfig | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 55ea52fe6ac..8012ff7d751 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -578,18 +578,6 @@ config MAC_HID depends on INPUT_ADBHID default y -config ADB_KEYBOARD - bool "Support for ADB keyboard (old driver)" - depends on MAC && !INPUT_ADBHID - help - This option allows you to use an ADB keyboard attached to your - machine. Note that this disables any other (ie. PS/2) keyboard - support, even if your machine is physically capable of using both at - the same time. - - If you use an ADB keyboard (4 pin connector), say Y here. - If you use a PS/2 keyboard (6 pin connector), say N here. - config HPDCA tristate "HP DCA serial support" depends on DIO && SERIAL_8250 -- cgit v1.2.3 From 7ccaee5cadd7a771773bbb878e139697511ebdde Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 17 Jul 2008 21:16:31 +0200 Subject: m68k/Atari: remove the dead ATARI_SCC{,_DMA} options It seems the driver was removed back in kernel 2.3 but the options were forgotten. Reported-by: Robert P. J. Day Signed-off-by: Adrian Bunk Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/Kconfig | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 8012ff7d751..8c5e1de68fc 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -490,28 +490,6 @@ config ATARI_MFPSER Note for Falcon users: You also have an MFP port, it's just not wired to the outside... But you could use the port under Linux. -config ATARI_SCC - tristate "Atari SCC serial support" - depends on ATARI - ---help--- - If you have serial ports based on a Zilog SCC chip (Modem2, Serial2, - LAN) and like to use them under Linux, say Y. All built-in SCC's are - supported (TT, MegaSTE, Falcon), and also the ST-ESCC. If you have - two connectors for channel A (Serial2 and LAN), they are visible as - two separate devices. - - To compile this driver as a module, choose M here. - -config ATARI_SCC_DMA - bool "Atari SCC serial DMA support" - depends on ATARI_SCC - help - This enables DMA support for receiving data on channel A of the SCC. - If you have a TT you may say Y here and read - drivers/char/atari_SCC.README. All other users should say N here, - because only the TT has SCC-DMA, even if your machine keeps claiming - so at boot time. - config ATARI_MIDI tristate "Atari MIDI serial support" depends on ATARI @@ -628,7 +606,7 @@ config DN_SERIAL config SERIAL_CONSOLE bool "Support for serial port console" - depends on (AMIGA || ATARI || MAC || SUN3 || SUN3X || VME || APOLLO) && (ATARI_MFPSER=y || ATARI_SCC=y || ATARI_MIDI=y || MAC_SCC=y || AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y || SERIAL=y || MVME147_SCC || SERIAL167 || MVME162_SCC || BVME6000_SCC || DN_SERIAL) + depends on (AMIGA || ATARI || MAC || SUN3 || SUN3X || VME || APOLLO) && (ATARI_MFPSER=y || ATARI_MIDI=y || MAC_SCC=y || AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y || SERIAL=y || MVME147_SCC || SERIAL167 || MVME162_SCC || BVME6000_SCC || DN_SERIAL) ---help--- If you say Y here, it will be possible to use a serial port as the system console (the system console is the device which receives all -- cgit v1.2.3 From 7b6b948fc0d60a704c15b1cd72345a98e759dd62 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 17 Jul 2008 21:16:33 +0200 Subject: arch/m68k/mm/motorola.c: Eliminate NULL test and memset after alloc_bootmem As noted by Akinobu Mita in patch b1fceac2b9e04d278316b2faddf276015fc06e3b, alloc_bootmem and related functions never return NULL and always return a zeroed region of memory. Thus a NULL test or memset after calls to these functions is unnecessary. This was fixed using the following semantic patch. (http://www.emn.fr/x-info/coccinelle/) // @@ expression E; statement S; @@ E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...) .. when != E ( - BUG_ON (E == NULL); | - if (E == NULL) S ) @@ expression E,E1; @@ E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...) .. when != E - memset(E,0,E1); // Signed-off-by: Julia Lawall Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/mm/motorola.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 30d34f28502..226795bdf35 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -285,7 +285,6 @@ void __init paging_init(void) * to a couple of allocated pages */ empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); - memset(empty_zero_page, 0, PAGE_SIZE); /* * Set up SFC/DFC registers -- cgit v1.2.3 From 93026e217b46b70f9719caf69e716fa3bbe1d20c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 17 Jul 2008 21:16:34 +0200 Subject: arch/m68k/mm/sun3mmu.c: Eliminate NULL test and memset after alloc_bootmem As noted by Akinobu Mita in patch b1fceac2b9e04d278316b2faddf276015fc06e3b, alloc_bootmem and related functions never return NULL and always return a zeroed region of memory. Thus a NULL test or memset after calls to these functions is unnecessary. This was fixed using the following semantic patch. (http://www.emn.fr/x-info/coccinelle/) // @@ expression E; statement S; @@ E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...) .. when != E ( - BUG_ON (E == NULL); | - if (E == NULL) S ) @@ expression E,E1; @@ E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...) .. when != E - memset(E,0,E1); // Signed-off-by: Julia Lawall Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Torvalds --- arch/m68k/mm/sun3mmu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c index 6a6513aa1ce..edceefc1887 100644 --- a/arch/m68k/mm/sun3mmu.c +++ b/arch/m68k/mm/sun3mmu.c @@ -53,7 +53,6 @@ void __init paging_init(void) wp_works_ok = 0; #endif empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); - memset(empty_zero_page, 0, PAGE_SIZE); address = PAGE_OFFSET; pg_dir = swapper_pg_dir; -- cgit v1.2.3 From 9175fc06aee79c349790672178d3fd7507d75c86 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 21 Jul 2008 01:38:14 -0700 Subject: x86: use setup_clear_cpu_cap() when disabling the lapic ... so don't need to call clear_cpu_cap again in early_identify_cpu, and could use cleared_cpu_caps like other places. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 5 +---- arch/x86/kernel/apic_64.c | 2 +- arch/x86/kernel/cpu/common_64.c | 4 ---- 3 files changed, 2 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a437d027f20..e9a00e5074b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1214,9 +1214,6 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { - if (disable_apic) - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - if (!smp_found_config && !cpu_has_apic) return -1; @@ -1700,7 +1697,7 @@ early_param("lapic", parse_lapic); static int __init parse_nolapic(char *arg) { disable_apic = 1; - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } early_param("nolapic", parse_nolapic); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e3d32e27c1..16e586cacbd 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1337,7 +1337,7 @@ early_param("apic", apic_set_verbosity); static __init int setup_disableapic(char *str) { disable_apic = 1; - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + setup_clear_cpu_cap(X86_FEATURE_APIC); return 0; } early_param("disableapic", setup_disableapic); diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7b8cc72feb4..0485cf64452 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -324,10 +324,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) cpu_devs[c->x86_vendor]->c_early_init(c); validate_pat_support(c); - - /* early_param could clear that, but recall get it set again */ - if (disable_apic) - clear_cpu_cap(c, X86_FEATURE_APIC); } /* -- cgit v1.2.3 From 7edf8891ad7aef5f4e97991fed6fb0e605e96ea0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 21 Jul 2008 01:39:03 -0700 Subject: x86: remove extra calling to get ext cpuid level Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 0485cf64452..daee611f014 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -305,7 +305,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } - c->extended_cpuid_level = cpuid_eax(0x80000000); if (c->extended_cpuid_level >= 0x80000007) c->x86_power = cpuid_edx(0x80000007); -- cgit v1.2.3 From cfc1b9a6a683c835a20d5b565ade55baf639f72f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Jul 2008 21:35:38 +0200 Subject: x86: convert Dprintk to pr_debug There are a couple of places where (P)Dprintk is used which is an old compile time enabled printk wrapper. Convert it to the generic pr_debug(). Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 6 ++-- arch/x86/kernel/cpu/perfctr-watchdog.c | 4 +-- arch/x86/kernel/setup_percpu.c | 6 ++-- arch/x86/kernel/smpboot.c | 52 +++++++++++++++++----------------- arch/x86/mm/numa_64.c | 4 --- arch/x86/pci/early.c | 16 +++++------ 6 files changed, 41 insertions(+), 47 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f489d7a9be9..fa88a1d7129 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1021,7 +1021,7 @@ void __init mp_config_acpi_legacy_irqs(void) mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; #endif set_bit(MP_ISA_BUS, mp_bus_not_pci); - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); + pr_debug("Bus #%d is ISA\n", MP_ISA_BUS); #ifdef CONFIG_X86_ES7000 /* @@ -1127,8 +1127,8 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) return gsi; } if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { - Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", - mp_ioapic_routing[ioapic].apic_id, ioapic_pin); + pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n", + mp_ioapic_routing[ioapic].apic_id, ioapic_pin); #ifdef CONFIG_X86_32 return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); #else diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 6d4bdc02388..de7439f82b9 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -250,7 +250,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr, do_div(count, nmi_hz); if(descr) - Dprintk("setting %s to -0x%08Lx\n", descr, count); + pr_debug("setting %s to -0x%08Lx\n", descr, count); wrmsrl(perfctr_msr, 0 - count); } @@ -261,7 +261,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr, do_div(count, nmi_hz); if(descr) - Dprintk("setting %s to -0x%08Lx\n", descr, count); + pr_debug("setting %s to -0x%08Lx\n", descr, count); wrmsr(perfctr_msr, (u32)(-count), 0); } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index cac68430d31..f7745f94c00 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -227,8 +227,8 @@ static void __init setup_node_to_cpumask_map(void) /* allocate the map */ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); - Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", - map, nr_node_ids); + pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", + map, nr_node_ids); /* node_to_cpumask() will now work */ node_to_cpumask_map = map; @@ -248,7 +248,7 @@ void __cpuinit numa_set_node(int cpu, int node) per_cpu(x86_cpu_to_node_map, cpu) = node; else - Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); + pr_debug("Setting node for non-present cpu %d\n", cpu); } void __cpuinit numa_clear_node(int cpu) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 27640196eb7..4b53a647bc0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -216,7 +216,7 @@ static void __cpuinit smp_callin(void) panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, phys_id, cpuid); } - Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); + pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); /* * STARTUP IPIs are fragile beasts as they might sometimes @@ -251,7 +251,7 @@ static void __cpuinit smp_callin(void) * boards) */ - Dprintk("CALLIN, before setup_local_APIC().\n"); + pr_debug("CALLIN, before setup_local_APIC().\n"); smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); @@ -266,7 +266,7 @@ static void __cpuinit smp_callin(void) local_irq_enable(); calibrate_delay(); local_irq_disable(); - Dprintk("Stack at about %p\n", &cpuid); + pr_debug("Stack at about %p\n", &cpuid); /* * Save our processor parameters @@ -513,7 +513,7 @@ static void impress_friends(void) /* * Allow the user to impress friends. */ - Dprintk("Before bogomips.\n"); + pr_debug("Before bogomips.\n"); for_each_possible_cpu(cpu) if (cpu_isset(cpu, cpu_callout_map)) bogosum += cpu_data(cpu).loops_per_jiffy; @@ -523,7 +523,7 @@ static void impress_friends(void) bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); - Dprintk("Before bogocount - setting activated=1.\n"); + pr_debug("Before bogocount - setting activated=1.\n"); } static inline void __inquire_remote_apic(int apicid) @@ -585,7 +585,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) /* Kick the second */ apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); - Dprintk("Waiting for send to finish...\n"); + pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); /* @@ -596,7 +596,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); accept_status = (apic_read(APIC_ESR) & 0xEF); - Dprintk("NMI sent.\n"); + pr_debug("NMI sent.\n"); if (send_status) printk(KERN_ERR "APIC never delivered???\n"); @@ -631,7 +631,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) apic_read(APIC_ESR); } - Dprintk("Asserting INIT.\n"); + pr_debug("Asserting INIT.\n"); /* * Turn INIT on target chip @@ -644,12 +644,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); - Dprintk("Waiting for send to finish...\n"); + pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); mdelay(10); - Dprintk("Deasserting INIT.\n"); + pr_debug("Deasserting INIT.\n"); /* Target chip */ apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); @@ -657,7 +657,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* Send IPI */ apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); - Dprintk("Waiting for send to finish...\n"); + pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); mb(); @@ -684,14 +684,14 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) /* * Run STARTUP IPI loop. */ - Dprintk("#startup loops: %d.\n", num_starts); + pr_debug("#startup loops: %d.\n", num_starts); for (j = 1; j <= num_starts; j++) { - Dprintk("Sending STARTUP #%d.\n", j); + pr_debug("Sending STARTUP #%d.\n", j); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - Dprintk("After apic_write.\n"); + pr_debug("After apic_write.\n"); /* * STARTUP IPI @@ -709,9 +709,9 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) */ udelay(300); - Dprintk("Startup point 1.\n"); + pr_debug("Startup point 1.\n"); - Dprintk("Waiting for send to finish...\n"); + pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); /* @@ -724,7 +724,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) if (send_status || accept_status) break; } - Dprintk("After Startup.\n"); + pr_debug("After Startup.\n"); if (send_status) printk(KERN_ERR "APIC never delivered???\n"); @@ -875,7 +875,7 @@ do_rest: if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { - Dprintk("Setting warm reset code and vector.\n"); + pr_debug("Setting warm reset code and vector.\n"); store_NMI_vector(&nmi_high, &nmi_low); @@ -896,9 +896,9 @@ do_rest: /* * allow APs to start initializing. */ - Dprintk("Before Callout %d.\n", cpu); + pr_debug("Before Callout %d.\n", cpu); cpu_set(cpu, cpu_callout_map); - Dprintk("After Callout %d.\n", cpu); + pr_debug("After Callout %d.\n", cpu); /* * Wait 5s total for a response @@ -911,10 +911,10 @@ do_rest: if (cpu_isset(cpu, cpu_callin_map)) { /* number CPUs logically, starting from 1 (BSP is 0) */ - Dprintk("OK.\n"); + pr_debug("OK.\n"); printk(KERN_INFO "CPU%d: ", cpu); print_cpu_info(&cpu_data(cpu)); - Dprintk("CPU has booted.\n"); + pr_debug("CPU has booted.\n"); } else { boot_error = 1; if (*((volatile unsigned char *)trampoline_base) @@ -959,7 +959,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) WARN_ON(irqs_disabled()); - Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); + pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || !physid_isset(apicid, phys_cpu_present_map)) { @@ -971,7 +971,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) * Already booted CPU? */ if (cpu_isset(cpu, cpu_callin_map)) { - Dprintk("do_boot_cpu %d Already started\n", cpu); + pr_debug("do_boot_cpu %d Already started\n", cpu); return -ENOSYS; } @@ -998,7 +998,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) err = do_boot_cpu(apicid, cpu); #endif if (err) { - Dprintk("do_boot_cpu failed %d\n", err); + pr_debug("do_boot_cpu failed %d\n", err); return -EIO; } @@ -1202,7 +1202,7 @@ void __init native_smp_prepare_boot_cpu(void) void __init native_smp_cpus_done(unsigned int max_cpus) { - Dprintk("Boot done.\n"); + pr_debug("Boot done.\n"); impress_friends(); smp_checks(); diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index b432d578177..9782f42dd31 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -20,10 +20,6 @@ #include #include -#ifndef Dprintk -#define Dprintk(x...) -#endif - struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 858dbe3399f..86631ccbc25 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -7,15 +7,13 @@ /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ -#define PDprintk(x...) - u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) { u32 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inl(0xcfc); if (v != 0xffffffff) - PDprintk("%x reading 4 from %x: %x\n", slot, offset, v); + pr_debug("%x reading 4 from %x: %x\n", slot, offset, v); return v; } @@ -24,7 +22,7 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) u8 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inb(0xcfc + (offset&3)); - PDprintk("%x reading 1 from %x: %x\n", slot, offset, v); + pr_debug("%x reading 1 from %x: %x\n", slot, offset, v); return v; } @@ -33,28 +31,28 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) u16 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inw(0xcfc + (offset&2)); - PDprintk("%x reading 2 from %x: %x\n", slot, offset, v); + pr_debug("%x reading 2 from %x: %x\n", slot, offset, v); return v; } void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val) { - PDprintk("%x writing to %x: %x\n", slot, offset, val); + pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outl(val, 0xcfc); } void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) { - PDprintk("%x writing to %x: %x\n", slot, offset, val); + pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outb(val, 0xcfc + (offset&3)); } void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) { - PDprintk("%x writing to %x: %x\n", slot, offset, val); + pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outw(val, 0xcfc + (offset&2)); } @@ -71,7 +69,7 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func) int j; u32 val; - printk("PCI: %02x:%02x:%02x", bus, slot, func); + printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func); for (i = 0; i < 256; i += 4) { if (!(i & 0x0f)) -- cgit v1.2.3 From f2d0f1dea41fd6c7a347e71b505a155096643517 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 21 Jul 2008 13:04:08 -0700 Subject: x86: Fix help message for STRICT_DEVMEM config option The message talked about "left on" when it meant to say disabled. Signed-off-by: Linus Torvalds --- arch/x86/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 85a87d2ac0c..092f019e033 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -8,7 +8,7 @@ source "lib/Kconfig.debug" config STRICT_DEVMEM bool "Filter access to /dev/mem" help - If this option is left on, you allow userspace (root) access to all + If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental access to this is obviously disastrous, but specific access can be used by people debugging the kernel. Note that with PAT support -- cgit v1.2.3